diff --git a/.coveragerc b/.coveragerc
index 9936b513a6..05b5fdb6ca 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -5,6 +5,7 @@ branch = True
omit =
*/results/*
*/_version.py
+ */conftest.py
[report]
# Regexes for lines to exclude from consideration
diff --git a/doc/source/changes.rst b/doc/source/changes.rst
index e1fb6208d0..3c474c34d3 100644
--- a/doc/source/changes.rst
+++ b/doc/source/changes.rst
@@ -1,13 +1,15 @@
Change Log
----------
-Since 4.14
-==========
-* Removed support for Python 3.5 inline with NEP-29 (:issue:`222`)
+Version 4.15
+============
+* Blackened the code.
+* Added McElroy's and Berndt's measures of system fit (:issue:`215`).
+* Removed support for Python 3.5 inline with NEP-29 (:issue:`222`).
Version 4.14
============
-* Fixed issue where datasets were not installed with wheels (:issue:`217`)
+* Fixed issue where datasets were not installed with wheels (:issue:`217`).
* Switched to property-cached to inherit cached property from property (:issue:`211`).
* Removed all use of :class:`pandas.Panel` (:issue:`211`).
diff --git a/doc/source/conf.py b/doc/source/conf.py
index 3e5f77a11f..7946bd5feb 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -21,6 +21,9 @@
# import sys
# sys.path.insert(0, os.path.abspath('.'))
+import glob
+import os
+import hashlib
from distutils.version import LooseVersion
import linearmodels
@@ -38,54 +41,78 @@
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
-extensions = ['sphinx.ext.autodoc',
- 'sphinx.ext.autosummary',
- 'sphinx.ext.intersphinx',
- 'sphinx.ext.todo',
- 'sphinx.ext.coverage',
- 'sphinx.ext.mathjax',
- 'sphinx.ext.ifconfig',
- 'sphinx.ext.viewcode',
- 'sphinx.ext.githubpages',
- 'numpydoc',
- 'sphinx_autodoc_typehints',
- 'sphinx.ext.autosummary',
- 'sphinx.ext.extlinks',
- 'sphinx.ext.doctest',
- 'IPython.sphinxext.ipython_console_highlighting',
- 'IPython.sphinxext.ipython_directive',
- 'nbsphinx',
- 'sphinx_material'
- ]
+extensions = [
+ "sphinx.ext.autodoc",
+ "sphinx.ext.autosummary",
+ "sphinx.ext.intersphinx",
+ "sphinx.ext.todo",
+ "sphinx.ext.coverage",
+ "sphinx.ext.mathjax",
+ "sphinx.ext.ifconfig",
+ "sphinx.ext.viewcode",
+ "sphinx.ext.githubpages",
+ "numpydoc",
+ "sphinx_autodoc_typehints",
+ "sphinx.ext.autosummary",
+ "sphinx.ext.extlinks",
+ "sphinx.ext.doctest",
+ "IPython.sphinxext.ipython_console_highlighting",
+ "IPython.sphinxext.ipython_directive",
+ "nbsphinx",
+ "sphinx_material",
+]
try:
import sphinxcontrib.spelling # noqa: F401
except ImportError as err: # noqa: F841
pass
else:
- extensions.append('sphinxcontrib.spelling')
+ extensions.append("sphinxcontrib.spelling")
-spelling_word_list_filename = ['spelling_wordlist.txt', 'names_wordlist.txt']
+spelling_word_list_filename = ["spelling_wordlist.txt", "names_wordlist.txt"]
spelling_ignore_pypi_package_names = True
add_module_names = False
+# Copy over notebooks from examples to docs for build
+files = glob.glob("../../examples/*.ipynb") + glob.glob("../../examples/*.png")
+for file_to_copy in files:
+ full_name = os.path.split(file_to_copy)[-1]
+ folder, file_name = full_name.split("_")
+ if not file_name.endswith("ipynb"):
+ file_name = "_".join((folder, file_name))
+ out_dir = os.path.join(folder, "examples")
+ if not os.path.exists(out_dir):
+ os.makedirs(out_dir, exist_ok=True)
+ out_file = os.path.join(out_dir, file_name)
+ existing_hash = ""
+ with open(file_to_copy, "rb") as example:
+ example_file = example.read()
+ example_hash = hashlib.md5(example_file).hexdigest()
+ if os.path.exists(out_file):
+ with open(out_file, "rb") as existing:
+ existing_hash = hashlib.md5(existing.read()).hexdigest()
+ if existing_hash != example_hash:
+ print(f"Copying {file_to_copy} to {out_file}")
+ with open(out_file, "wb") as out:
+ out.write(example_file)
+
# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
+source_suffix = ".rst"
# The master toctree document.
-master_doc = 'index'
+master_doc = "index"
# General information about the project.
-project = 'linearmodels'
-copyright = '2017, Kevin Sheppard'
-author = 'Kevin Sheppard'
+project = "linearmodels"
+copyright = "2017, Kevin Sheppard"
+author = "Kevin Sheppard"
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
@@ -94,14 +121,14 @@
# The short X.Y version.
# The short X.Y version.
version = LooseVersion(linearmodels.__version__)
-if '+' in version.version:
+if "+" in version.version:
version = linearmodels.__version__
- version = version.replace('.dirty', '')
- version = version.split('+')
- commits, tag = version[1].split('.')
+ version = version.replace(".dirty", "")
+ version = version.split("+")
+ commits, tag = version[1].split(".")
version = version[0]
- short_tag = ' (+{0})'.format(commits)
- tag = ' (+' + commits + ', ' + tag + ')'
+ short_tag = " (+{0})".format(commits)
+ tag = " (+" + commits + ", " + tag + ")"
short_version = version + short_tag
version = version + tag
else:
@@ -137,40 +164,40 @@
html_theme_path = sphinx_material.html_theme_path()
html_context = sphinx_material.get_html_context()
-html_theme = 'sphinx_material'
+html_theme = "sphinx_material"
# Adds an HTML table visitor to apply Bootstrap table classes
# sphinx_material theme options (see theme.conf for more information)
html_theme_options = {
- 'base_url': 'http://bashtage.github.io/linearmodels/',
- 'repo_url': 'https://github.com/bashtage/linearmodels/',
- 'repo_name': 'linearmodels',
+ "base_url": "http://bashtage.github.io/linearmodels/",
+ "repo_url": "https://github.com/bashtage/linearmodels/",
+ "repo_name": "linearmodels",
# Set the name of the project to appear in the sidebar
"nav_title": project + " " + short_version,
- 'globaltoc_depth': 2,
- 'globaltoc_collapse': True,
- 'globaltoc_includehidden': True,
- 'theme_color': '#2196f3',
- 'color_primary': 'blue',
- 'color_accent': 'orange',
- 'html_minify': True,
- 'css_minify': True,
- 'master_doc': False,
- 'heroes': {
- 'index': 'Models for panel data, system regression, instrumental \
- variables and asset pricing.'
- }
+ "globaltoc_depth": 2,
+ "globaltoc_collapse": True,
+ "globaltoc_includehidden": True,
+ "theme_color": "#2196f3",
+ "color_primary": "blue",
+ "color_accent": "orange",
+ "html_minify": True,
+ "css_minify": True,
+ "master_doc": False,
+ "heroes": {
+ "index": "Models for panel data, system regression, instrumental \
+ variables and asset pricing."
+ },
}
-html_favicon = 'images/favicon.ico'
-html_logo = 'images/bw-logo.svg'
+html_favicon = "images/favicon.ico"
+html_logo = "images/bw-logo.svg"
# Register the theme as an extension to generate a sitemap.xml
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
html_sidebars = {
"**": ["logo-text.html", "globaltoc.html", "localtoc.html", "searchbox.html"]
@@ -183,15 +210,12 @@
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
-
# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',
-
# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',
-
# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
@@ -201,18 +225,20 @@
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
- (master_doc, 'linearmodels.tex', 'linearmodels Documentation',
- 'Kevin Sheppard', 'manual'),
+ (
+ master_doc,
+ "linearmodels.tex",
+ "linearmodels Documentation",
+ "Kevin Sheppard",
+ "manual",
+ ),
]
# -- Options for manual page output ---------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
-man_pages = [
- (master_doc, 'linearmodels', 'linearmodels Documentation',
- [author], 1)
-]
+man_pages = [(master_doc, "linearmodels", "linearmodels Documentation", [author], 1)]
# -- Options for Texinfo output -------------------------------------------
@@ -220,23 +246,29 @@
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
- (master_doc, 'linearmodels', 'linearmodels Documentation',
- author, 'linearmodels', 'One line description of project.',
- 'Miscellaneous'),
+ (
+ master_doc,
+ "linearmodels",
+ "linearmodels Documentation",
+ author,
+ "linearmodels",
+ "One line description of project.",
+ "Miscellaneous",
+ ),
]
# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {
- 'statsmodels': ('https://www.statsmodels.org/dev/', None),
- 'matplotlib': ('https://matplotlib.org/', None),
- 'scipy': ('https://docs.scipy.org/doc/scipy/reference/', None),
- 'python': ('https://docs.python.org/3', None),
- 'numpy': ('https://docs.scipy.org/doc/numpy', None),
- 'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None),
- 'xarray': ('https://xarray.pydata.org/en/stable/', None)
+ "statsmodels": ("https://www.statsmodels.org/dev/", None),
+ "matplotlib": ("https://matplotlib.org/", None),
+ "scipy": ("https://docs.scipy.org/doc/scipy/reference/", None),
+ "python": ("https://docs.python.org/3", None),
+ "numpy": ("https://docs.scipy.org/doc/numpy", None),
+ "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
+ "xarray": ("https://xarray.pydata.org/en/stable/", None),
}
-extlinks = {'issue': ('https://github.com/bashtage/linearmodels/issues/%s', 'GH')}
+extlinks = {"issue": ("https://github.com/bashtage/linearmodels/issues/%s", "GH")}
doctest_global_setup = """
@@ -256,4 +288,54 @@
napoleon_use_admonition_for_references = True
autosummary_generate = True
-autoclass_content = 'class'
\ No newline at end of file
+autoclass_content = "class"
+
+# Create xrefs
+numpydoc_use_autodoc_signature = True
+numpydoc_xref_param_type = True
+numpydoc_class_members_toctree = False
+numpydoc_xref_aliases = {
+ "Figure": "matplotlib.figure.Figure",
+ "Axes": "matplotlib.axes.Axes",
+ "AxesSubplot": "matplotlib.axes.Axes",
+ "DataFrame": "pandas.DataFrame",
+ "Series": "pandas.Series",
+ "BetweenOLS": "linearmodels.panel.model.BetweenOLS",
+ "FamaMacBeth": "linearmodels.panel.model.FamaMacBeth",
+ "FirstDifferenceOLS": "linearmodels.panel.model.FirstDifferenceOLS",
+ "IV2SLS": "linearmodels.iv.model.IV2SLS",
+ "IV3SLS": "linearmodels.system.model.IV3SLS",
+ "IVGMM": "linearmodels.iv.model.IVGMM",
+ "IVGMMCUE": "linearmodels.iv.model.IVGMMCUE",
+ "IVLIML": "linearmodels.iv.model.IVLIML",
+ "IVSystemGMM": "linearmodels.system.model.IVSystemGMM",
+ "LinearFactorModel": "linearmodels.asset_pricing.model.LinearFactorModel",
+ "LinearFactorModelGMM": "linearmodels.asset_pricing.model.LinearFactorModelGMM",
+ "OLS": "linearmodels.iv.model.OLS",
+ "PanelOLS": "linearmodels.panel.model.PanelOLS",
+ "PooledOLS": "linearmodels.panel.model.PooledOLS",
+ "RandomEffects": "linearmodels.panel.model.RandomEffects",
+ "SUR": "linearmodels.system.model.SUR",
+ "TradedFactorModel": "linearmodels.asset_pricing.model.TradedFactorModel",
+ "AbsorbingLSResults": "linearmodels.iv.absorbing.AbsorbingLSResults",
+ "FirstStageResults": "linearmodels.iv.results.FirstStageResults",
+ "IVGMMResults": "linearmodels.iv.results.IVGMMResults",
+ "IVModelComparison": "linearmodels.iv.results.IVModelComparison",
+ "IVResults": "linearmodels.iv.results.IVResults",
+ "InvalidTestStatistic": "linearmodels.utility.InvalidTestStatistic",
+ "OLSResults": "linearmodels.iv.results.OLSResults",
+ "WaldTestStatistic": "linearmodels.utility.WaldTestStatistic",
+ "PanelEffectsResults": "linearmodels.panel.results.PanelEffectsResults",
+ "PanelModelComparison": "linearmodels.panel.results.PanelModelComparison",
+ "PanelResults": "linearmodels.panel.results.PanelResults",
+ "RandomEffectsResults": "linearmodels.panel.results.RandomEffectsResults",
+ "GMMSystemResults": "linearmodels.system.results.GMMSystemResults",
+ "Summary": "linearmodels.compat.statsmodels.Summary",
+ "SystemEquationResult": "linearmodels.system.results.SystemEquationResult",
+ "SystemResults": "linearmodels.system.results.SystemResults",
+ "GMMFactorModelResults": "linearmodels.asset_pricing.results.GMMFactorModelResults",
+ "LinearFactorModelResults": "linearmodels.asset_pricing.results.LinearFactorModelResults",
+ "PanelData": "linearmodels.panel.data.PanelData",
+ "IVData": "linearmodels.iv.data.IVData",
+ "AttrDict": "linearmodels.utility.AttrDict",
+}
diff --git a/doc/source/system/mathematical-detail.lyx b/doc/source/system/mathematical-detail.lyx
index 625f647608..0cd60630c4 100644
--- a/doc/source/system/mathematical-detail.lyx
+++ b/doc/source/system/mathematical-detail.lyx
@@ -1068,5 +1068,244 @@ ic weighting formula immediately above.
\end_layout
+\begin_layout Subsection*
+System Measures of Fit (
+\begin_inset Formula $R^{2}$
+\end_inset
+
+)
+\end_layout
+
+\begin_layout Standard
+Most measures of fit for systems of equations assume that all equations
+ contains a constant (or equivalent).
+ Caution is needed when interpreting if equations exclude constant terms.
+\end_layout
+
+\begin_layout Subsubsection*
+\noindent
+Overall
+\begin_inset Formula $R^{2}$
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\noindent
+The overall
+\begin_inset Formula $R^{2}$
+\end_inset
+
+ is defined as
+\end_layout
+
+\begin_layout Standard
+\noindent
+\begin_inset Formula
+\[
+R^{2}=1-\frac{\sum_{i=1}^{K}SSR_{i}}{\sum_{i=1}^{K}TSS_{i}}
+\]
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\noindent
+where
+\begin_inset Formula $TSS_{i}$
+\end_inset
+
+ is centered if equation
+\begin_inset Formula $i$
+\end_inset
+
+ contains a constant and uncentered if it does not.
+ When all equations contain constants, it is identical to Judge's measure.
+\end_layout
+
+\begin_layout Subsubsection*
+\noindent
+McElroy
+\end_layout
+
+\begin_layout Standard
+\noindent
+McElroy's (1977) measure is defined as
+\begin_inset Formula
+\[
+R^{2}=1-\frac{\epsilon^{\prime}\Omega^{-1}\epsilon}{Y^{\prime}\left(\Sigma^{-1}\otimes\left(I_{N}-\frac{\iota\iota^{\prime}}{N}\right)\right)Y}
+\]
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\noindent
+where
+\begin_inset Formula $\iota$
+\end_inset
+
+ is a
+\begin_inset Formula $N$
+\end_inset
+
+ by 1 vector of 1s.
+ This is implemented as
+\begin_inset Formula
+\[
+R^{2}=1-\frac{\sum_{i=1}^{N}\sum_{j=1}^{K}\hat{\xi}_{ij}^{2}}{\sum_{i=1}^{N}\sum_{j=1}^{K}\hat{\eta}_{ij}^{2}}
+\]
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\noindent
+where
+\begin_inset Formula
+\begin{align*}
+\hat{\xi} & =\hat{E}\hat{\Sigma}^{-\frac{1}{2}}\\
+\hat{E} & =\left[\begin{array}{cccc}
+\hat{\epsilon}_{1} & \hat{\epsilon}_{2} & \ldots & \hat{\epsilon}_{N}\end{array}\right]
+\end{align*}
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\noindent
+and
+\end_layout
+
+\begin_layout Standard
+\noindent
+\begin_inset Formula
+\begin{align*}
+\hat{\eta} & =\tilde{Y}\hat{\Sigma}^{-\frac{1}{2}}\\
+\tilde{Y} & =\left[\begin{array}{cccc}
+Y_{1}-\hat{\mu}_{1} & Y_{2}-\hat{\mu}_{2} & \ldots & Y_{N}-\hat{\mu}_{N}\end{array}\right].
+\end{align*}
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\noindent
+where the vector of mean parameters is estimated by fitting a SURE to the
+ data (using user specified weights, if provided) where
+\begin_inset Formula $X_{i}=\iota$
+\end_inset
+
+ contains only a constant.
+ Greene provides an alternative formulation of this measure as
+\begin_inset Formula
+\[
+R^{2}=1-\frac{K}{\mathrm{tr}\left(\hat{\Sigma}^{-1}\hat{\Psi}\right)}
+\]
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\noindent
+where
+\begin_inset Formula $\hat{\Psi}=N^{-1}\tilde{Y}^{\prime}\tilde{Y}$
+\end_inset
+
+ is the covariance of the demeaned data.
+\end_layout
+
+\begin_layout Subsubsection*
+\noindent
+Berndt
+\end_layout
+
+\begin_layout Standard
+\noindent
+Berndt's measure is defined as
+\begin_inset Formula
+\[
+R^{2}=1-\frac{\left|\hat{\Sigma}\right|}{\left|\hat{\Psi}\right|}.
+\]
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Subsubsection*
+\noindent
+Judge
+\end_layout
+
+\begin_layout Standard
+\noindent
+Judge's measure is the naive OLS
+\begin_inset Formula $R^{2}$
+\end_inset
+
+ for the system,
+\end_layout
+
+\begin_layout Standard
+\noindent
+\begin_inset Formula
+\[
+R^{2}=1-\frac{\sum_{i=1}^{N}\sum_{j=1}^{K}\hat{E}_{ij}^{2}}{\sum_{i=1}^{N}\sum_{j=1}^{K}\tilde{Y}_{ij}^{2}}.
+\]
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Subsubsection*
+\noindent
+Dhrymes
+\end_layout
+
+\begin_layout Standard
+\noindent
+Dhrymes' measure of fit is a weighted average of the
+\begin_inset Formula $R^{2}$
+\end_inset
+
+ of each equation,
+\begin_inset Formula
+\[
+R^{2}=\sum_{i=1}^{K}R_{i}^{2}\frac{\hat{\Psi}_{ii}}{\mathrm{tr}\left(\hat{\Psi}\right)}
+\]
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\noindent
+where
+\begin_inset Formula $R_{i}^{2}$
+\end_inset
+
+ is the coefficient of determination from equation
+\begin_inset Formula $i$
+\end_inset
+
+.
+\end_layout
+
\end_body
\end_document
diff --git a/doc/source/system/mathematical-detail.txt b/doc/source/system/mathematical-detail.txt
index 04281b0ef4..113ef3a026 100644
--- a/doc/source/system/mathematical-detail.txt
+++ b/doc/source/system/mathematical-detail.txt
@@ -1,7 +1,3 @@
-
-Formulas and Mathematical Detail
-================================
-
Seemingly Unrelated Regression (SUR/SURE)
-----------------------------------------
@@ -217,6 +213,8 @@ hypothesis testing of parameters. It can also lead to more precise
parameter estimates if some residuals are conditionally homoskedastic
and regressors differ across equations.
+.. _basic-notation-1:
+
Basic Notation
~~~~~~~~~~~~~~
@@ -372,3 +370,86 @@ cases these should be the same, and so the covariance of the estimated
parameters will simplify to
.. math:: \widehat{Var\left(\hat{\beta}\right)}=N^{-1}\left(\frac{X^{\prime}Z}{N}\hat{W}^{-1}\frac{Z^{\prime}X}{N}\right)^{-1}.
+
+System Measures of Fit (:math:`R^{2}`)
+--------------------------------------
+
+Most measures of fit for systems of equations assume that all equations
+contains a constant (or equivalent). Caution is needed when interpreting
+if equations exclude constant terms.
+
+Overall :math:`R^{2}`
+~~~~~~~~~~~~~~~~~~~~~
+
+The overall :math:`R^{2}` is defined as
+
+.. math:: R^{2}=1-\frac{\sum_{i=1}^{K}SSR_{i}}{\sum_{i=1}^{K}TSS_{i}}
+
+where :math:`TSS_{i}` is centered if equation :math:`i` contains a
+constant and uncentered if it does not. When all equations contain
+constants, it is identical to Judge’s measure.
+
+McElroy
+~~~~~~~
+
+McElroy’s (1977) measure is defined as
+
+.. math:: R^{2}=1-\frac{\epsilon^{\prime}\Omega^{-1}\epsilon}{Y^{\prime}\left(\Sigma^{-1}\otimes\left(I_{N}-\frac{\iota\iota^{\prime}}{N}\right)\right)Y}
+
+where :math:`\iota` is a :math:`N` by 1 vector of 1s. This is
+implemented as
+
+.. math:: R^{2}=1-\frac{\sum_{i=1}^{N}\sum_{j=1}^{K}\hat{\xi}_{ij}^{2}}{\sum_{i=1}^{N}\sum_{j=1}^{K}\hat{\eta}_{ij}^{2}}
+
+where
+
+.. math::
+
+ \begin{aligned}
+ \hat{\xi} & =\hat{E}\hat{\Sigma}^{-\frac{1}{2}}\\
+ \hat{E} & =\left[\begin{array}{cccc}
+ \hat{\epsilon}_{1} & \hat{\epsilon}_{2} & \ldots & \hat{\epsilon}_{N}\end{array}\right]\end{aligned}
+
+and
+
+.. math::
+
+ \begin{aligned}
+ \hat{\eta} & =\tilde{Y}\hat{\Sigma}^{-\frac{1}{2}}\\
+ \tilde{Y} & =\left[\begin{array}{cccc}
+ Y_{1}-\hat{\mu}_{1} & Y_{2}-\hat{\mu}_{2} & \ldots & Y_{N}-\hat{\mu}_{N}\end{array}\right].\end{aligned}
+
+where the vector of mean parameters is estimated by fitting a SURE to
+the data (using user specified weights, if provided) where
+:math:`X_{i}=\iota` contains only a constant. Greene provides an
+alternative formulation of this measure as
+
+.. math:: R^{2}=1-\frac{K}{\mathrm{tr}\left(\hat{\Sigma}^{-1}\hat{\Psi}\right)}
+
+where :math:`\hat{\Psi}=N^{-1}\tilde{Y}^{\prime}\tilde{Y}` is the
+covariance of the demeaned data.
+
+Berndt
+~~~~~~
+
+Berndt’s measure is defined as
+
+.. math:: R^{2}=1-\frac{\left|\hat{\Sigma}\right|}{\left|\hat{\Psi}\right|}.
+
+Judge
+~~~~~
+
+Judge’s measure is the naive OLS :math:`R^{2}` for the system,
+
+.. math:: R^{2}=1-\frac{\sum_{i=1}^{N}\sum_{j=1}^{K}\hat{E}_{ij}^{2}}{\sum_{i=1}^{N}\sum_{j=1}^{K}\tilde{Y}_{ij}^{2}}.
+
+Dhrymes
+~~~~~~~
+
+Dhrymes’ measure of fit is a weighted average of the :math:`R^{2}` of
+each equation,
+
+.. math:: R^{2}=\sum_{i=1}^{K}R_{i}^{2}\frac{\hat{\Psi}_{ii}}{\mathrm{tr}\left(\hat{\Psi}\right)}
+
+where :math:`R_{i}^{2}` is the coefficient of determination from
+equation :math:`i`.
diff --git a/linearmodels/__init__.py b/linearmodels/__init__.py
index 99195c1e49..2ddf443a6c 100644
--- a/linearmodels/__init__.py
+++ b/linearmodels/__init__.py
@@ -41,31 +41,47 @@
FirstDifferenceOLS, PanelOLS, PooledOLS,
RandomEffects)
from linearmodels.system import IV3SLS, SUR, IVSystemGMM
+
from ._version import get_versions
OLS = _OLS
-WARN_ON_MISSING = os.environ.get('LINEARMODELS_WARN_ON_MISSING', True)
-WARN_ON_MISSING = False if WARN_ON_MISSING in ('0', 'False') else True
-DROP_MISSING = os.environ.get('LINEARMODELS_DROP_MISSING', True)
-DROP_MISSING = False if DROP_MISSING in ('0', 'False') else True
-
-__all__ = ['PooledOLS', 'PanelOLS', 'FirstDifferenceOLS', 'BetweenOLS',
- 'RandomEffects',
- 'FamaMacBeth',
- 'IVLIML', 'IVGMM', 'IVGMMCUE', 'IV2SLS', 'OLS',
- 'SUR', 'IV3SLS', 'IVSystemGMM',
- 'LinearFactorModel', 'LinearFactorModelGMM', 'TradedFactorModel',
- 'WARN_ON_MISSING', 'DROP_MISSING']
+WARN_ON_MISSING = os.environ.get("LINEARMODELS_WARN_ON_MISSING", True)
+WARN_ON_MISSING = False if WARN_ON_MISSING in ("0", "False") else True
+DROP_MISSING = os.environ.get("LINEARMODELS_DROP_MISSING", True)
+DROP_MISSING = False if DROP_MISSING in ("0", "False") else True
+
+__all__ = [
+ "PooledOLS",
+ "PanelOLS",
+ "FirstDifferenceOLS",
+ "BetweenOLS",
+ "RandomEffects",
+ "FamaMacBeth",
+ "IVLIML",
+ "IVGMM",
+ "IVGMMCUE",
+ "IV2SLS",
+ "OLS",
+ "SUR",
+ "IV3SLS",
+ "IVSystemGMM",
+ "LinearFactorModel",
+ "LinearFactorModelGMM",
+ "TradedFactorModel",
+ "WARN_ON_MISSING",
+ "DROP_MISSING",
+]
def test(extra_args=None, exit=True, append=True):
import sys
+
try:
import pytest
except ImportError:
raise ImportError("Need pytest to run tests")
- cmd = ['--tb=short', '--disable-pytest-warnings']
+ cmd = ["--tb=short", "--disable-pytest-warnings"]
if extra_args:
if not isinstance(extra_args, list):
extra_args = [extra_args]
@@ -75,11 +91,11 @@ def test(extra_args=None, exit=True, append=True):
cmd = extra_args
pkg = os.path.dirname(__file__)
cmd = [pkg] + cmd
- print("running: pytest {}".format(' '.join(cmd)))
+ print("running: pytest {}".format(" ".join(cmd)))
status = pytest.main(cmd)
if exit:
sys.exit(status)
-__version__ = get_versions()['version']
+__version__ = get_versions()["version"]
del get_versions
diff --git a/linearmodels/_version.py b/linearmodels/_version.py
index 2865a507d9..6430286885 100644
--- a/linearmodels/_version.py
+++ b/linearmodels/_version.py
@@ -68,8 +68,7 @@ def decorate(f):
return decorate
-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
- env=None):
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None):
"""Call the given command(s)."""
assert isinstance(commands, list)
p = None
@@ -77,10 +76,13 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
try:
dispcmd = str([c] + args)
# remember shell=False, so use git.cmd on windows, not just git
- p = subprocess.Popen([c] + args, cwd=cwd, env=env,
- stdout=subprocess.PIPE,
- stderr=(subprocess.PIPE if hide_stderr
- else None))
+ p = subprocess.Popen(
+ [c] + args,
+ cwd=cwd,
+ env=env,
+ stdout=subprocess.PIPE,
+ stderr=(subprocess.PIPE if hide_stderr else None),
+ )
break
except EnvironmentError:
e = sys.exc_info()[1]
@@ -117,16 +119,22 @@ def versions_from_parentdir(parentdir_prefix, root, verbose):
for i in range(3):
dirname = os.path.basename(root)
if dirname.startswith(parentdir_prefix):
- return {"version": dirname[len(parentdir_prefix):],
- "full-revisionid": None,
- "dirty": False, "error": None, "date": None}
+ return {
+ "version": dirname[len(parentdir_prefix) :],
+ "full-revisionid": None,
+ "dirty": False,
+ "error": None,
+ "date": None,
+ }
else:
rootdirs.append(root)
root = os.path.dirname(root) # up a level
if verbose:
- print("Tried directories %s but none started with prefix %s" %
- (str(rootdirs), parentdir_prefix))
+ print(
+ "Tried directories %s but none started with prefix %s"
+ % (str(rootdirs), parentdir_prefix)
+ )
raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
@@ -182,7 +190,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
# starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
# just "foo-1.0". If we see a "tag: " prefix, prefer those.
TAG = "tag: "
- tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
+ tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)])
if not tags:
# Either we're using git < 1.8.3, or there really are no tags. We use
# a heuristic: assume all version tags have a digit. The old git %d
@@ -191,7 +199,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
# between branches and tags. By ignoring refnames without digits, we
# filter out many common branch names like "release" and
# "stabilization", as well as "HEAD" and "master".
- tags = set([r for r in refs if re.search(r'\d', r)])
+ tags = set([r for r in refs if re.search(r"\d", r)])
if verbose:
print("discarding '%s', no digits" % ",".join(refs - tags))
if verbose:
@@ -199,19 +207,26 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
for ref in sorted(tags):
# sorting will prefer e.g. "2.0" over "2.0rc1"
if ref.startswith(tag_prefix):
- r = ref[len(tag_prefix):]
+ r = ref[len(tag_prefix) :]
if verbose:
print("picking %s" % r)
- return {"version": r,
- "full-revisionid": keywords["full"].strip(),
- "dirty": False, "error": None,
- "date": date}
+ return {
+ "version": r,
+ "full-revisionid": keywords["full"].strip(),
+ "dirty": False,
+ "error": None,
+ "date": date,
+ }
# no suitable tags, so version is "0+unknown", but full hex is still there
if verbose:
print("no suitable tags, using unknown + full revision id")
- return {"version": "0+unknown",
- "full-revisionid": keywords["full"].strip(),
- "dirty": False, "error": "no suitable tags", "date": None}
+ return {
+ "version": "0+unknown",
+ "full-revisionid": keywords["full"].strip(),
+ "dirty": False,
+ "error": "no suitable tags",
+ "date": None,
+ }
@register_vcs_handler("git", "pieces_from_vcs")
@@ -226,8 +241,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
if sys.platform == "win32":
GITS = ["git.cmd", "git.exe"]
- out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
- hide_stderr=True)
+ out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True)
if rc != 0:
if verbose:
print("Directory %s not under git control" % root)
@@ -235,10 +249,19 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
# if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
# if there isn't one, this yields HEX[-dirty] (no NUM)
- describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
- "--always", "--long",
- "--match", "%s*" % tag_prefix],
- cwd=root)
+ describe_out, rc = run_command(
+ GITS,
+ [
+ "describe",
+ "--tags",
+ "--dirty",
+ "--always",
+ "--long",
+ "--match",
+ "%s*" % tag_prefix,
+ ],
+ cwd=root,
+ )
# --long was added in git-1.5.5
if describe_out is None:
raise NotThisMethod("'git describe' failed")
@@ -261,17 +284,16 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
dirty = git_describe.endswith("-dirty")
pieces["dirty"] = dirty
if dirty:
- git_describe = git_describe[:git_describe.rindex("-dirty")]
+ git_describe = git_describe[: git_describe.rindex("-dirty")]
# now we have TAG-NUM-gHEX or HEX
if "-" in git_describe:
# TAG-NUM-gHEX
- mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+ mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
if not mo:
# unparseable. Maybe git-describe is misbehaving?
- pieces["error"] = ("unable to parse git-describe output: '%s'"
- % describe_out)
+ pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out
return pieces
# tag
@@ -280,10 +302,12 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
if verbose:
fmt = "tag '%s' doesn't start with prefix '%s'"
print(fmt % (full_tag, tag_prefix))
- pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
- % (full_tag, tag_prefix))
+ pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % (
+ full_tag,
+ tag_prefix,
+ )
return pieces
- pieces["closest-tag"] = full_tag[len(tag_prefix):]
+ pieces["closest-tag"] = full_tag[len(tag_prefix) :]
# distance: number of commits since tag
pieces["distance"] = int(mo.group(2))
@@ -294,13 +318,13 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
else:
# HEX: no tags
pieces["closest-tag"] = None
- count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
- cwd=root)
+ count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
pieces["distance"] = int(count_out) # total number of commits
# commit date: see ISO-8601 comment in git_versions_from_keywords()
- date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"],
- cwd=root)[0].strip()
+ date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[
+ 0
+ ].strip()
pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
return pieces
@@ -331,8 +355,7 @@ def render_pep440(pieces):
rendered += ".dirty"
else:
# exception #1
- rendered = "0+untagged.%d.g%s" % (pieces["distance"],
- pieces["short"])
+ rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
return rendered
@@ -446,11 +469,13 @@ def render_git_describe_long(pieces):
def render(pieces, style):
"""Render the given version pieces into the requested style."""
if pieces["error"]:
- return {"version": "unknown",
- "full-revisionid": pieces.get("long"),
- "dirty": None,
- "error": pieces["error"],
- "date": None}
+ return {
+ "version": "unknown",
+ "full-revisionid": pieces.get("long"),
+ "dirty": None,
+ "error": pieces["error"],
+ "date": None,
+ }
if not style or style == "default":
style = "pep440" # the default
@@ -470,9 +495,13 @@ def render(pieces, style):
else:
raise ValueError("unknown style '%s'" % style)
- return {"version": rendered, "full-revisionid": pieces["long"],
- "dirty": pieces["dirty"], "error": None,
- "date": pieces.get("date")}
+ return {
+ "version": rendered,
+ "full-revisionid": pieces["long"],
+ "dirty": pieces["dirty"],
+ "error": None,
+ "date": pieces.get("date"),
+ }
def get_versions():
@@ -486,8 +515,7 @@ def get_versions():
verbose = cfg.verbose
try:
- return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
- verbose)
+ return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose)
except NotThisMethod:
pass
@@ -496,13 +524,16 @@ def get_versions():
# versionfile_source is the relative path from the top of the source
# tree (where the .git directory might live) to this file. Invert
# this to find the root from __file__.
- for i in cfg.versionfile_source.split('/'):
+ for i in cfg.versionfile_source.split("/"):
root = os.path.dirname(root)
except NameError:
- return {"version": "0+unknown", "full-revisionid": None,
- "dirty": None,
- "error": "unable to find root of source tree",
- "date": None}
+ return {
+ "version": "0+unknown",
+ "full-revisionid": None,
+ "dirty": None,
+ "error": "unable to find root of source tree",
+ "date": None,
+ }
try:
pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
@@ -516,6 +547,10 @@ def get_versions():
except NotThisMethod:
pass
- return {"version": "0+unknown", "full-revisionid": None,
- "dirty": None,
- "error": "unable to compute version", "date": None}
+ return {
+ "version": "0+unknown",
+ "full-revisionid": None,
+ "dirty": None,
+ "error": "unable to compute version",
+ "date": None,
+ }
diff --git a/linearmodels/asset_pricing/__init__.py b/linearmodels/asset_pricing/__init__.py
index 0b61427ff9..9733e05e74 100644
--- a/linearmodels/asset_pricing/__init__.py
+++ b/linearmodels/asset_pricing/__init__.py
@@ -2,4 +2,4 @@
LinearFactorModelGMM,
TradedFactorModel)
-__all__ = ['TradedFactorModel', 'LinearFactorModelGMM', 'LinearFactorModel']
+__all__ = ["TradedFactorModel", "LinearFactorModelGMM", "LinearFactorModel"]
diff --git a/linearmodels/asset_pricing/covariance.py b/linearmodels/asset_pricing/covariance.py
index 258e0fadeb..48fd266699 100644
--- a/linearmodels/asset_pricing/covariance.py
+++ b/linearmodels/asset_pricing/covariance.py
@@ -8,7 +8,6 @@
class _HACMixin(object):
-
def __init__(self):
self._bandwidth = None # pragma: no cover
self._moments = None # pragma: no cover
@@ -32,10 +31,10 @@ def bandwidth(self):
def _check_kernel(self, kernel):
if not isinstance(kernel, str):
- raise TypeError('kernel must be the name of a kernel')
+ raise TypeError("kernel must be the name of a kernel")
self._kernel = kernel.lower()
if self._kernel not in KERNEL_LOOKUP:
- raise ValueError('Unknown kernel')
+ raise ValueError("Unknown kernel")
def _check_bandwidth(self, bandwidth):
self._bandwidth = bandwidth
@@ -43,9 +42,9 @@ def _check_bandwidth(self, bandwidth):
try:
bandwidth = float(bandwidth)
except (TypeError, ValueError):
- raise TypeError('bandwidth must be either None or a float')
+ raise TypeError("bandwidth must be either None or a float")
if bandwidth < 0:
- raise ValueError('bandwidth must be non-negative.')
+ raise ValueError("bandwidth must be non-negative.")
def _kernel_cov(self, z):
nobs = z.shape[0]
@@ -79,16 +78,20 @@ class HeteroskedasticCovariance(object):
Degree of freedom value ot use if debiasing
"""
- def __init__(self, xe, *, jacobian=None, inv_jacobian=None,
- center=True, debiased=False, df=0):
+ def __init__(
+ self, xe, *, jacobian=None, inv_jacobian=None, center=True, debiased=False, df=0
+ ):
self._moments = self._xe = xe
self._jac = jacobian
self._inv_jac = inv_jacobian
self._center = center
- if (jacobian is None and inv_jacobian is None) \
- or (jacobian is not None and inv_jacobian is not None):
- raise ValueError('One and only one of jacobian or inv_jacobian must be provided.')
+ if (jacobian is None and inv_jacobian is None) or (
+ jacobian is not None and inv_jacobian is not None
+ ):
+ raise ValueError(
+ "One and only one of jacobian or inv_jacobian must be provided."
+ )
self._debiased = debiased
self._df = df
if jacobian is not None:
@@ -100,11 +103,11 @@ def __str__(self):
return self.__class__.__name__
def __repr__(self):
- return self.__str__() + ', id: {0}'.format(hex(id(self)))
+ return self.__str__() + ", id: {0}".format(hex(id(self)))
@property
def config(self):
- return {'type': self.__class__.__name__}
+ return {"type": self.__class__.__name__}
@property
def s(self):
@@ -113,7 +116,7 @@ def s(self):
Returns
-------
- s : ndarray
+ ndarray
Covariance of the scores or moment conditions
"""
xe = self._xe
@@ -150,7 +153,7 @@ def cov(self):
Returns
-------
- c : ndarray
+ ndarray
Parameter covariance
"""
s = self.s
@@ -198,26 +201,38 @@ class KernelCovariance(HeteroskedasticCovariance, _HACMixin):
linearmodels.iv.covariance.kernel_weight_quadratic_spectral
"""
- def __init__(self, xe, *, jacobian=None, inv_jacobian=None,
- kernel='bartlett', bandwidth=None, center=True,
- debiased=False, df=0):
- super(KernelCovariance, self).__init__(xe, jacobian=jacobian,
- inv_jacobian=inv_jacobian,
- center=center,
- debiased=debiased, df=df)
+ def __init__(
+ self,
+ xe,
+ *,
+ jacobian=None,
+ inv_jacobian=None,
+ kernel="bartlett",
+ bandwidth=None,
+ center=True,
+ debiased=False,
+ df=0
+ ):
+ super(KernelCovariance, self).__init__(
+ xe,
+ jacobian=jacobian,
+ inv_jacobian=inv_jacobian,
+ center=center,
+ debiased=debiased,
+ df=df,
+ )
self._check_kernel(kernel)
self._check_bandwidth(bandwidth)
def __str__(self):
- descr = ', Kernel: {0}, Bandwidth: {1}'.format(self._kernel,
- self.bandwidth)
+ descr = ", Kernel: {0}, Bandwidth: {1}".format(self._kernel, self.bandwidth)
return self.__class__.__name__ + descr
@property
def config(self):
out = super(KernelCovariance, self).config
- out['kernel'] = self._kernel
- out['bandwidth'] = self.bandwidth
+ out["kernel"] = self._kernel
+ out["bandwidth"] = self.bandwidth
return out
@property
@@ -227,7 +242,7 @@ def s(self):
Returns
-------
- s : ndarray
+ ndarray
Covariance of the scores or moment conditions
"""
xe = self._xe
@@ -263,7 +278,7 @@ def w(self, moments):
Returns
-------
- w : ndarray
+ ndarray
Weighting matrix computed from moment conditions
"""
if self._center:
@@ -290,7 +305,7 @@ class KernelWeight(HeteroskedasticWeight, _HACMixin):
Non-negative integer bandwidth
"""
- def __init__(self, moments, center=True, kernel='bartlett', bandwidth=None):
+ def __init__(self, moments, center=True, kernel="bartlett", bandwidth=None):
super(KernelWeight, self).__init__(moments, center=center)
self._check_kernel(kernel)
self._check_bandwidth(bandwidth)
@@ -306,7 +321,7 @@ def w(self, moments):
Returns
-------
- w : ndarray
+ ndarray
Weighting matrix computed from moment conditions
"""
if self._center:
diff --git a/linearmodels/asset_pricing/model.py b/linearmodels/asset_pricing/model.py
index 95724f893e..69016d8f3f 100644
--- a/linearmodels/asset_pricing/model.py
+++ b/linearmodels/asset_pricing/model.py
@@ -20,14 +20,14 @@
def callback_factory(obj, args, disp=1):
- d = {'iter': 0}
+ d = {"iter": 0}
disp = int(disp)
def callback(params):
fval = obj(params, *args)
- if disp > 0 and (d['iter'] % disp == 0):
- print('Iteration: {0}, Objective: {1}'.format(d['iter'], fval))
- d['iter'] += 1
+ if disp > 0 and (d["iter"] % disp == 0):
+ print("Iteration: {0}, Objective: {1}".format(d["iter"], fval))
+ d["iter"] += 1
return callback
@@ -37,9 +37,9 @@ class TradedFactorModel(object):
Parameters
----------
- portfolios : array-like
+ portfolios : array_like
Test portfolio returns (nobs by nportfolio)
- factors : array-like
+ factors : array_like
Priced factor returns (nobs by nfactor)
Notes
@@ -61,8 +61,8 @@ class TradedFactorModel(object):
"""
def __init__(self, portfolios, factors):
- self.portfolios = IVData(portfolios, var_name='portfolio')
- self.factors = IVData(factors, var_name='factor')
+ self.portfolios = IVData(portfolios, var_name="portfolio")
+ self.factors = IVData(factors, var_name="factor")
self._name = self.__class__.__name__
self._formula = None
self._validate_data()
@@ -70,19 +70,21 @@ def __init__(self, portfolios, factors):
def __str__(self):
out = self.__class__.__name__
f, p = self.factors.shape[1], self.portfolios.shape[1]
- out += ' with {0} factors, {1} test portfolios'.format(f, p)
+ out += " with {0} factors, {1} test portfolios".format(f, p)
return out
def __repr__(self):
- return self.__str__() + '\nid: {0}'.format(hex(id(self)))
+ return self.__str__() + "\nid: {0}".format(hex(id(self)))
def _drop_missing(self):
data = (self.portfolios, self.factors)
missing = np.any(np.c_[[dh.isnull for dh in data]], 0)
if any(missing):
if all(missing):
- raise ValueError('All observations contain missing data. '
- 'Model cannot be estimated.')
+ raise ValueError(
+ "All observations contain missing data. "
+ "Model cannot be estimated."
+ )
self.portfolios.drop(missing)
self.factors.drop(missing)
missing_warning(missing)
@@ -93,23 +95,31 @@ def _validate_data(self):
p = self.portfolios.ndarray
f = self.factors.ndarray
if p.shape[0] != f.shape[0]:
- raise ValueError('The number of observations in portfolios and '
- 'factors is not the same.')
+ raise ValueError(
+ "The number of observations in portfolios and "
+ "factors is not the same."
+ )
self._drop_missing()
p = self.portfolios.ndarray
f = self.factors.ndarray
if has_constant(p)[0]:
- raise ValueError('portfolios must not contains a constant or '
- 'equivalent and must not have rank\n'
- 'less than the dimension of the smaller shape.')
+ raise ValueError(
+ "portfolios must not contains a constant or "
+ "equivalent and must not have rank\n"
+ "less than the dimension of the smaller shape."
+ )
if has_constant(f)[0]:
- raise ValueError('factors must not contain a constant or equivalent.')
+ raise ValueError("factors must not contain a constant or equivalent.")
if np.linalg.matrix_rank(f) < f.shape[1]:
- raise ValueError('Model cannot be estimated. factors do not have full column rank.')
+ raise ValueError(
+ "Model cannot be estimated. factors do not have full column rank."
+ )
if p.shape[0] < (f.shape[1] + 1):
- raise ValueError('Model cannot be estimated. portfolios must have factors + 1 or '
- 'more returns to\nestimate the model parameters.')
+ raise ValueError(
+ "Model cannot be estimated. portfolios must have factors + 1 or "
+ "more returns to\nestimate the model parameters."
+ )
@property
def formula(self):
@@ -121,16 +131,26 @@ def formula(self, value):
@staticmethod
def _prepare_data_from_formula(formula, data, portfolios):
- na_action = NAAction(on_NA='raise', NA_types=[])
+ na_action = NAAction(on_NA="raise", NA_types=[])
orig_formula = formula
if portfolios is not None:
- factors = dmatrix(formula + ' + 0', data, return_type='dataframe', NA_action=na_action)
+ factors = dmatrix(
+ formula + " + 0", data, return_type="dataframe", NA_action=na_action
+ )
else:
- formula = formula.split('~')
- portfolios = dmatrix(formula[0].strip() + ' + 0', data,
- return_type='dataframe', NA_action=na_action)
- factors = dmatrix(formula[1].strip() + ' + 0', data,
- return_type='dataframe', NA_action=na_action)
+ formula = formula.split("~")
+ portfolios = dmatrix(
+ formula[0].strip() + " + 0",
+ data,
+ return_type="dataframe",
+ NA_action=na_action,
+ )
+ factors = dmatrix(
+ formula[1].strip() + " + 0",
+ data,
+ return_type="dataframe",
+ NA_action=na_action,
+ )
return factors, portfolios, orig_formula
@@ -143,12 +163,12 @@ def from_formula(cls, formula, data, *, portfolios=None):
Patsy formula modified for the syntax described in the notes
data : DataFrame
DataFrame containing the variables used in the formula
- portfolios : array-like, optional
+ portfolios : array_like, optional
Portfolios to be used in the model
Returns
-------
- model : TradedFactorModel
+ TradedFactorModel
Model instance
Notes
@@ -172,12 +192,14 @@ def from_formula(cls, formula, data, *, portfolios=None):
>>> formula = 'MktRF + SMB + HML'
>>> mod = TradedFactorModel.from_formula(formula, data, portfolios=portfolios)
"""
- factors, portfolios, formula = cls._prepare_data_from_formula(formula, data, portfolios)
+ factors, portfolios, formula = cls._prepare_data_from_formula(
+ formula, data, portfolios
+ )
mod = cls(portfolios, factors)
mod.formula = formula
return mod
- def fit(self, cov_type='robust', debiased=True, **cov_config):
+ def fit(self, cov_type="robust", debiased=True, **cov_config):
"""
Estimate model parameters
@@ -193,7 +215,7 @@ def fit(self, cov_type='robust', debiased=True, **cov_config):
Returns
-------
- results : LinearFactorModelResults
+ LinearFactorModelResults
Results class with parameter estimates, covariance and test statistics
Notes
@@ -221,41 +243,61 @@ def fit(self, cov_type='robust', debiased=True, **cov_config):
nloading = (nfactor + 1) * nportfolio
xpxi = np.eye(nloading + nfactor)
- xpxi[:nloading, :nloading] = np.kron(np.eye(nportfolio), np.linalg.pinv(fc.T @ fc / nobs))
+ xpxi[:nloading, :nloading] = np.kron(
+ np.eye(nportfolio), np.linalg.pinv(fc.T @ fc / nobs)
+ )
f_rep = np.tile(fc, (1, nportfolio))
eps_rep = np.tile(eps, (nfactor + 1, 1)) # 1 2 3 ... 25 1 2 3 ...
- eps_rep = eps_rep.ravel(order='F')
- eps_rep = np.reshape(eps_rep, (nobs, (nfactor + 1) * nportfolio), order='F')
+ eps_rep = eps_rep.ravel(order="F")
+ eps_rep = np.reshape(eps_rep, (nobs, (nfactor + 1) * nportfolio), order="F")
xe = f_rep * eps_rep
xe = np.c_[xe, fe]
- if cov_type in ('robust', 'heteroskedastic'):
- cov_est = HeteroskedasticCovariance(xe, inv_jacobian=xpxi, center=False,
- debiased=debiased, df=fc.shape[1])
- rp_cov_est = HeteroskedasticCovariance(fe, jacobian=np.eye(f.shape[1]), center=False,
- debiased=debiased, df=1)
- elif cov_type == 'kernel':
- cov_est = KernelCovariance(xe, inv_jacobian=xpxi, center=False, debiased=debiased,
- df=fc.shape[1], **cov_config)
+ if cov_type in ("robust", "heteroskedastic"):
+ cov_est = HeteroskedasticCovariance(
+ xe, inv_jacobian=xpxi, center=False, debiased=debiased, df=fc.shape[1]
+ )
+ rp_cov_est = HeteroskedasticCovariance(
+ fe, jacobian=np.eye(f.shape[1]), center=False, debiased=debiased, df=1
+ )
+ elif cov_type == "kernel":
+ cov_est = KernelCovariance(
+ xe,
+ inv_jacobian=xpxi,
+ center=False,
+ debiased=debiased,
+ df=fc.shape[1],
+ **cov_config
+ )
bw = cov_est.bandwidth
_cov_config = {k: v for k, v in cov_config.items()}
- _cov_config['bandwidth'] = bw
- rp_cov_est = KernelCovariance(fe, jacobian=np.eye(f.shape[1]), center=False,
- debiased=debiased, df=1, **_cov_config)
+ _cov_config["bandwidth"] = bw
+ rp_cov_est = KernelCovariance(
+ fe,
+ jacobian=np.eye(f.shape[1]),
+ center=False,
+ debiased=debiased,
+ df=1,
+ **_cov_config
+ )
else:
- raise ValueError('Unknown cov_type: {0}'.format(cov_type))
+ raise ValueError("Unknown cov_type: {0}".format(cov_type))
full_vcv = cov_est.cov
rp_cov = rp_cov_est.cov
vcv = full_vcv[:nloading, :nloading]
# Rearrange VCV
- order = np.reshape(np.arange((nfactor + 1) * nportfolio), (nportfolio, nfactor + 1))
+ order = np.reshape(
+ np.arange((nfactor + 1) * nportfolio), (nportfolio, nfactor + 1)
+ )
order = order.T.ravel()
vcv = vcv[order][:, order]
# Return values
alpha_vcv = vcv[:nportfolio, :nportfolio]
stat = float(alphas.T @ np.linalg.pinv(alpha_vcv) @ alphas)
- jstat = WaldTestStatistic(stat, 'All alphas are 0', nportfolio, name='J-statistic')
+ jstat = WaldTestStatistic(
+ stat, "All alphas are 0", nportfolio, name="J-statistic"
+ )
params = b.T
betas = b[1:].T
residual_ss = (eps ** 2).sum()
@@ -264,19 +306,34 @@ def fit(self, cov_type='robust', debiased=True, **cov_config):
r2 = 1 - residual_ss / total_ss
param_names = []
for portfolio in self.portfolios.cols:
- param_names.append('alpha-{0}'.format(portfolio))
+ param_names.append("alpha-{0}".format(portfolio))
for factor in self.factors.cols:
- param_names.append('beta-{0}-{1}'.format(portfolio, factor))
+ param_names.append("beta-{0}-{1}".format(portfolio, factor))
for factor in self.factors.cols:
- param_names.append('lambda-{0}'.format(factor))
-
- res = AttrDict(params=params, cov=full_vcv, betas=betas, rp=rp, rp_cov=rp_cov,
- alphas=alphas, alpha_vcv=alpha_vcv, jstat=jstat,
- rsquared=r2, total_ss=total_ss, residual_ss=residual_ss,
- param_names=param_names, portfolio_names=self.portfolios.cols,
- factor_names=self.factors.cols, name=self._name,
- cov_type=cov_type, model=self, nobs=nobs, rp_names=self.factors.cols,
- cov_est=cov_est)
+ param_names.append("lambda-{0}".format(factor))
+
+ res = AttrDict(
+ params=params,
+ cov=full_vcv,
+ betas=betas,
+ rp=rp,
+ rp_cov=rp_cov,
+ alphas=alphas,
+ alpha_vcv=alpha_vcv,
+ jstat=jstat,
+ rsquared=r2,
+ total_ss=total_ss,
+ residual_ss=residual_ss,
+ param_names=param_names,
+ portfolio_names=self.portfolios.cols,
+ factor_names=self.factors.cols,
+ name=self._name,
+ cov_type=cov_type,
+ model=self,
+ nobs=nobs,
+ rp_names=self.factors.cols,
+ cov_est=cov_est,
+ )
return LinearFactorModelResults(res)
@@ -286,15 +343,15 @@ class LinearFactorModel(TradedFactorModel):
Parameters
----------
- portfolios : array-like
+ portfolios : array_like
Test portfolio returns (nobs by nportfolio)
- factors : array-like
+ factors : array_like
Priced factor returns (nobs by nfactor)
risk_free : bool, optional
Flag indicating whether the risk-free rate should be estimated
from returns along other risk premia. If False, the returns are
assumed to be excess returns using the correct risk-free rate.
- sigma : array-like, optional
+ sigma : array_like, optional
Positive definite residual covariance (nportfolio by nportfolio)
Notes
@@ -335,7 +392,9 @@ def __init__(self, portfolios, factors, *, risk_free=False, sigma=None):
super(LinearFactorModel, self).__init__(portfolios, factors)
self._validate_additional_data()
if sigma is None:
- self._sigma_m12 = self._sigma_inv = self._sigma = np.eye(self.portfolios.shape[1])
+ self._sigma_m12 = self._sigma_inv = self._sigma = np.eye(
+ self.portfolios.shape[1]
+ )
else:
self._sigma = np.asarray(sigma)
vals, vecs = np.linalg.eigh(sigma)
@@ -345,23 +404,26 @@ def __init__(self, portfolios, factors, *, risk_free=False, sigma=None):
def __str__(self):
out = super(LinearFactorModel, self).__str__()
if np.any(self._sigma != np.eye(self.portfolios.shape[1])):
- out += ' using GLS'
- out += '\nEstimated risk-free rate: {0}'.format(self._risk_free)
+ out += " using GLS"
+ out += "\nEstimated risk-free rate: {0}".format(self._risk_free)
return out
def _validate_additional_data(self):
f = self.factors.ndarray
p = self.portfolios.ndarray
- nrp = (f.shape[1] + int(self._risk_free))
+ nrp = f.shape[1] + int(self._risk_free)
if p.shape[1] < nrp:
- raise ValueError('The number of test portfolio must be at least as '
- 'large as the number of risk premia, including the '
- 'risk free rate if estimated.')
+ raise ValueError(
+ "The number of test portfolio must be at least as "
+ "large as the number of risk premia, including the "
+ "risk free rate if estimated."
+ )
@classmethod
- def from_formula(cls, formula, data, *, portfolios=None, risk_free=False,
- sigma=None):
+ def from_formula(
+ cls, formula, data, *, portfolios=None, risk_free=False, sigma=None
+ ):
"""
Parameters
----------
@@ -369,19 +431,19 @@ def from_formula(cls, formula, data, *, portfolios=None, risk_free=False,
Patsy formula modified for the syntax described in the notes
data : DataFrame
DataFrame containing the variables used in the formula
- portfolios : array-like, optional
+ portfolios : array_like, optional
Portfolios to be used in the model. If provided, must use formula
syntax containing only factors.
risk_free : bool, optional
Flag indicating whether the risk-free rate should be estimated
from returns along other risk premia. If False, the returns are
assumed to be excess returns using the correct risk-free rate.
- sigma : array-like, optional
+ sigma : array_like, optional
Positive definite residual covariance (nportfolio by nportfolio)
Returns
-------
- model : LinearFactorModel
+ LinearFactorModel
Model instance
Notes
@@ -405,12 +467,14 @@ def from_formula(cls, formula, data, *, portfolios=None, risk_free=False,
>>> formula = 'MktRF + SMB + HML'
>>> mod = LinearFactorModel.from_formula(formula, data, portfolios=portfolios)
"""
- factors, portfolios, formula = cls._prepare_data_from_formula(formula, data, portfolios)
+ factors, portfolios, formula = cls._prepare_data_from_formula(
+ formula, data, portfolios
+ )
mod = cls(portfolios, factors, risk_free=risk_free, sigma=sigma)
mod.formula = formula
return mod
- def fit(self, cov_type='robust', debiased=True, **cov_config):
+ def fit(self, cov_type="robust", debiased=True, **cov_config):
"""
Estimate model parameters
@@ -426,7 +490,7 @@ def fit(self, cov_type='robust', debiased=True, **cov_config):
Returns
-------
- results : LinearFactorModelResults
+ LinearFactorModelResults
Results class with parameter estimates, covariance and test statistics
Notes
@@ -461,21 +525,28 @@ def fit(self, cov_type='robust', debiased=True, **cov_config):
# Jacobian
jacobian = self._jacobian(betas, lam, alphas)
- if cov_type not in ('robust', 'heteroskedastic', 'kernel'):
- raise ValueError('Unknown weight: {0}'.format(cov_type))
- if cov_type in ('robust', 'heteroskedastic'):
+ if cov_type not in ("robust", "heteroskedastic", "kernel"):
+ raise ValueError("Unknown weight: {0}".format(cov_type))
+ if cov_type in ("robust", "heteroskedastic"):
cov_est = HeteroskedasticCovariance
else: # 'kernel':
cov_est = KernelCovariance
- cov_est = cov_est(moments, jacobian=jacobian, center=False,
- debiased=debiased, df=fc.shape[1], **cov_config)
+ cov_est = cov_est(
+ moments,
+ jacobian=jacobian,
+ center=False,
+ debiased=debiased,
+ df=fc.shape[1],
+ **cov_config
+ )
# VCV
full_vcv = cov_est.cov
alpha_vcv = full_vcv[s2:, s2:]
stat = float(alphas.T @ np.linalg.pinv(alpha_vcv) @ alphas)
- jstat = WaldTestStatistic(stat, 'All alphas are 0', nport - nf - nrf,
- name='J-statistic')
+ jstat = WaldTestStatistic(
+ stat, "All alphas are 0", nport - nf - nrf, name="J-statistic"
+ )
total_ss = ((p - p.mean(0)[None, :]) ** 2).sum()
residual_ss = (eps ** 2).sum()
@@ -486,13 +557,13 @@ def fit(self, cov_type='robust', debiased=True, **cov_config):
params = np.c_[alphas, betas]
param_names = []
for portfolio in self.portfolios.cols:
- param_names.append('alpha-{0}'.format(portfolio))
+ param_names.append("alpha-{0}".format(portfolio))
for factor in self.factors.cols:
- param_names.append('beta-{0}-{1}'.format(portfolio, factor))
+ param_names.append("beta-{0}-{1}".format(portfolio, factor))
if not excess_returns:
- param_names.append('lambda-risk_free')
+ param_names.append("lambda-risk_free")
for factor in self.factors.cols:
- param_names.append('lambda-{0}'.format(factor))
+ param_names.append("lambda-{0}".format(factor))
# Pivot vcv to remove unnecessary and have correct order
order = np.reshape(np.arange(s1), (nport, nf + 1))
@@ -503,14 +574,29 @@ def fit(self, cov_type='robust', debiased=True, **cov_config):
factor_names = list(self.factors.cols)
rp_names = factor_names[:]
if not excess_returns:
- rp_names.insert(0, 'risk_free')
- res = AttrDict(params=params, cov=full_vcv, betas=betas, rp=rp, rp_cov=rp_cov,
- alphas=alphas, alpha_vcv=alpha_vcv, jstat=jstat,
- rsquared=r2, total_ss=total_ss, residual_ss=residual_ss,
- param_names=param_names, portfolio_names=self.portfolios.cols,
- factor_names=factor_names, name=self._name,
- cov_type=cov_type, model=self, nobs=nobs, rp_names=rp_names,
- cov_est=cov_est)
+ rp_names.insert(0, "risk_free")
+ res = AttrDict(
+ params=params,
+ cov=full_vcv,
+ betas=betas,
+ rp=rp,
+ rp_cov=rp_cov,
+ alphas=alphas,
+ alpha_vcv=alpha_vcv,
+ jstat=jstat,
+ rsquared=r2,
+ total_ss=total_ss,
+ residual_ss=residual_ss,
+ param_names=param_names,
+ portfolio_names=self.portfolios.cols,
+ factor_names=factor_names,
+ name=self._name,
+ cov_type=cov_type,
+ model=self,
+ nobs=nobs,
+ rp_names=rp_names,
+ cov_est=cov_est,
+ )
return LinearFactorModelResults(res)
@@ -544,7 +630,7 @@ def _jacobian(self, betas, lam, alphas):
block = np.zeros((nf + nrf, nf + 1))
block[:, 1:] = b_tilde[[i]].T @ _lam.T
block[nrf:, 1:] -= alpha_tilde[i] * np.eye(nf)
- jac[s1:s2, (i * (nf + 1)):((i + 1) * (nf + 1))] = block
+ jac[s1:s2, (i * (nf + 1)) : ((i + 1) * (nf + 1))] = block
jac[s1:s2, s1:s2] = bc.T @ sigma_inv @ bc
zero_lam = np.r_[[[0]], _lam]
jac[s2:s3, :s1] = np.kron(np.eye(nport), zero_lam.T)
@@ -575,9 +661,9 @@ class LinearFactorModelGMM(LinearFactorModel):
Parameters
----------
- portfolios : array-like
+ portfolios : array_like
Test portfolio returns (nobs by nportfolio)
- factors : array-like
+ factors : array_like
Priced factors values (nobs by nfactor)
risk_free : bool, optional
Flag indicating whether the risk-free rate should be estimated
@@ -614,7 +700,9 @@ class LinearFactorModelGMM(LinearFactorModel):
"""
def __init__(self, factors, portfolios, *, risk_free=False):
- super(LinearFactorModelGMM, self).__init__(factors, portfolios, risk_free=risk_free)
+ super(LinearFactorModelGMM, self).__init__(
+ factors, portfolios, risk_free=risk_free
+ )
@classmethod
def from_formula(cls, formula, data, *, portfolios=None, risk_free=False):
@@ -625,7 +713,7 @@ def from_formula(cls, formula, data, *, portfolios=None, risk_free=False):
Patsy formula modified for the syntax described in the notes
data : DataFrame
DataFrame containing the variables used in the formula
- portfolios : array-like, optional
+ portfolios : array_like, optional
Portfolios to be used in the model. If provided, must use formula
syntax containing only factors.
risk_free : bool, optional
@@ -635,7 +723,7 @@ def from_formula(cls, formula, data, *, portfolios=None, risk_free=False):
Returns
-------
- model : LinearFactorModelGMM
+ LinearFactorModelGMM
Model instance
Notes
@@ -659,13 +747,24 @@ def from_formula(cls, formula, data, *, portfolios=None, risk_free=False):
>>> formula = 'MktRF + SMB + HML'
>>> mod = LinearFactorModel.from_formula(formula, data, portfolios=portfolios)
"""
- factors, portfolios, formula = cls._prepare_data_from_formula(formula, data, portfolios)
+ factors, portfolios, formula = cls._prepare_data_from_formula(
+ formula, data, portfolios
+ )
mod = cls(portfolios, factors, risk_free=risk_free)
mod.formula = formula
return mod
- def fit(self, center=True, use_cue=False, steps=2, disp=10, max_iter=1000,
- cov_type='robust', debiased=True, **cov_config):
+ def fit(
+ self,
+ center=True,
+ use_cue=False,
+ steps=2,
+ disp=10,
+ max_iter=1000,
+ cov_type="robust",
+ debiased=True,
+ **cov_config
+ ):
"""
Estimate model parameters
@@ -695,7 +794,7 @@ def fit(self, center=True, use_cue=False, steps=2, disp=10, max_iter=1000,
Returns
-------
- results : GMMFactorModelResults
+ GMMFactorModelResults
Results class with parameter estimates, covariance and test statistics
Notes
@@ -710,7 +809,9 @@ def fit(self, center=True, use_cue=False, steps=2, disp=10, max_iter=1000,
excess_returns = not self._risk_free
nrf = int(not bool(excess_returns))
# 1. Starting Values - use 2 pass
- mod = LinearFactorModel(self.portfolios, self.factors, risk_free=self._risk_free)
+ mod = LinearFactorModel(
+ self.portfolios, self.factors, risk_free=self._risk_free
+ )
res = mod.fit()
betas = np.asarray(res.betas).ravel()
lam = np.asarray(res.risk_premia)
@@ -718,9 +819,9 @@ def fit(self, center=True, use_cue=False, steps=2, disp=10, max_iter=1000,
sv = np.r_[betas, lam, mu][:, None]
g = self._moments(sv, excess_returns)
g -= g.mean(0)[None, :] if center else 0
- if cov_type not in ('robust', 'heteroskedastic', 'kernel'):
- raise ValueError('Unknown weight: {0}'.format(cov_type))
- if cov_type in ('robust', 'heteroskedastic'):
+ if cov_type not in ("robust", "heteroskedastic", "kernel"):
+ raise ValueError("Unknown weight: {0}".format(cov_type))
+ if cov_type in ("robust", "heteroskedastic"):
weight_est = HeteroskedasticWeight
cov_est = HeteroskedasticCovariance
else: # 'kernel':
@@ -733,8 +834,13 @@ def fit(self, center=True, use_cue=False, steps=2, disp=10, max_iter=1000,
# 2. Step 1 using w = inv(s) from SV
callback = callback_factory(self._j, args, disp=disp)
- res = minimize(self._j, sv, args=args, callback=callback,
- options={'disp': bool(disp), 'maxiter': max_iter})
+ res = minimize(
+ self._j,
+ sv,
+ args=args,
+ callback=callback,
+ options={"disp": bool(disp), "maxiter": max_iter},
+ )
params = res.x
last_obj = res.fun
iters = 1
@@ -748,8 +854,13 @@ def fit(self, center=True, use_cue=False, steps=2, disp=10, max_iter=1000,
# 2. Step 1 using w = inv(s) from SV
callback = callback_factory(self._j, args, disp=disp)
- res = minimize(self._j, params, args=args, callback=callback,
- options={'disp': bool(disp), 'maxiter': max_iter})
+ res = minimize(
+ self._j,
+ params,
+ args=args,
+ callback=callback,
+ options={"disp": bool(disp), "maxiter": max_iter},
+ )
params = res.x
obj = res.fun
if np.abs(obj - last_obj) < 1e-6:
@@ -760,8 +871,13 @@ def fit(self, center=True, use_cue=False, steps=2, disp=10, max_iter=1000,
args = (excess_returns, weight_est)
obj = self._j_cue
callback = callback_factory(obj, args, disp=disp)
- res = minimize(obj, params, args=args, callback=callback,
- options={'disp': bool(disp), 'maxiter': max_iter})
+ res = minimize(
+ obj,
+ params,
+ args=args,
+ callback=callback,
+ options={"disp": bool(disp), "maxiter": max_iter},
+ )
params = res.x
# 4. Compute final S and G for inference
@@ -769,8 +885,14 @@ def fit(self, center=True, use_cue=False, steps=2, disp=10, max_iter=1000,
s = g.T @ g / nobs
jac = self._jacobian(params, excess_returns)
- cov_est = cov_est(g, jacobian=jac, center=center, debiased=debiased,
- df=self.factors.shape[1], **cov_config)
+ cov_est = cov_est(
+ g,
+ jacobian=jac,
+ center=center,
+ debiased=debiased,
+ df=self.factors.shape[1],
+ **cov_config
+ )
full_vcv = cov_est.cov
sel = slice((n * k), (n * k + k + nrf))
@@ -780,10 +902,12 @@ def fit(self, center=True, use_cue=False, steps=2, disp=10, max_iter=1000,
alphas = g.mean(0)[sel, None]
alpha_vcv = s[sel, sel] / nobs
stat = self._j(params, excess_returns, w)
- jstat = WaldTestStatistic(stat, 'All alphas are 0', n - k - nrf, name='J-statistic')
+ jstat = WaldTestStatistic(
+ stat, "All alphas are 0", n - k - nrf, name="J-statistic"
+ )
# R2 calculation
- betas = np.reshape(params[:(n * k)], (n, k))
+ betas = np.reshape(params[: (n * k)], (n, k))
resids = self.portfolios.ndarray - self.factors.ndarray @ betas.T
resids -= resids.mean(0)[None, :]
residual_ss = (resids ** 2).sum()
@@ -794,23 +918,39 @@ def fit(self, center=True, use_cue=False, steps=2, disp=10, max_iter=1000,
param_names = []
for portfolio in self.portfolios.cols:
for factor in self.factors.cols:
- param_names.append('beta-{0}-{1}'.format(portfolio, factor))
+ param_names.append("beta-{0}-{1}".format(portfolio, factor))
if not excess_returns:
- param_names.append('lambda-risk_free')
- param_names.extend(['lambda-{0}'.format(f) for f in self.factors.cols])
- param_names.extend(['mu-{0}'.format(f) for f in self.factors.cols])
+ param_names.append("lambda-risk_free")
+ param_names.extend(["lambda-{0}".format(f) for f in self.factors.cols])
+ param_names.extend(["mu-{0}".format(f) for f in self.factors.cols])
rp_names = list(self.factors.cols)[:]
if not excess_returns:
- rp_names.insert(0, 'risk_free')
+ rp_names.insert(0, "risk_free")
params = np.c_[alphas, betas]
# 5. Return values
- res = AttrDict(params=params, cov=full_vcv, betas=betas, rp=rp, rp_cov=rp_cov,
- alphas=alphas, alpha_vcv=alpha_vcv, jstat=jstat,
- rsquared=r2, total_ss=total_ss, residual_ss=residual_ss,
- param_names=param_names, portfolio_names=self.portfolios.cols,
- factor_names=self.factors.cols, name=self._name,
- cov_type=cov_type, model=self, nobs=nobs, rp_names=rp_names,
- iter=iters, cov_est=cov_est)
+ res = AttrDict(
+ params=params,
+ cov=full_vcv,
+ betas=betas,
+ rp=rp,
+ rp_cov=rp_cov,
+ alphas=alphas,
+ alpha_vcv=alpha_vcv,
+ jstat=jstat,
+ rsquared=r2,
+ total_ss=total_ss,
+ residual_ss=residual_ss,
+ param_names=param_names,
+ portfolio_names=self.portfolios.cols,
+ factor_names=self.factors.cols,
+ name=self._name,
+ cov_type=cov_type,
+ model=self,
+ nobs=nobs,
+ rp_names=rp_names,
+ iter=iters,
+ cov_est=cov_est,
+ )
return GMMFactorModelResults(res)
@@ -877,10 +1017,10 @@ def _jacobian(self, params, excess_returns):
b = betas[[i]]
else:
b = np.c_[[1], betas[[i]]]
- jac12[(i * (k + 1)):(i + 1) * (k + 1)] = f_aug.T @ (iota @ b) / nobs
+ jac12[(i * (k + 1)) : (i + 1) * (k + 1)] = f_aug.T @ (iota @ b) / nobs
b = betas[[i]]
- jac13[(i * (k + 1)):(i + 1) * (k + 1)] = -f_aug.T @ (iota @ b) / nobs
+ jac13[(i * (k + 1)) : (i + 1) * (k + 1)] = -f_aug.T @ (iota @ b) / nobs
jac[:r1, s1:s2] = jac12
jac[:r1, s2:] = jac13
jac[-k:, -k:] = np.eye(k)
diff --git a/linearmodels/asset_pricing/results.py b/linearmodels/asset_pricing/results.py
index 5aac36354c..64b2dc8ae3 100644
--- a/linearmodels/asset_pricing/results.py
+++ b/linearmodels/asset_pricing/results.py
@@ -5,9 +5,9 @@
import datetime as dt
-from property_cached import cached_property
import numpy as np
import pandas as pd
+from property_cached import cached_property
from scipy import stats
from statsmodels.iolib.summary import SimpleTable, fmt_2cols, fmt_params
@@ -23,6 +23,7 @@ class LinearFactorModelResults(_SummaryStr):
results : dict[str, any]
A dictionary of results from the model estimation.
"""
+
def __init__(self, results):
self._jstat = results.jstat
self._params = results.params
@@ -40,7 +41,7 @@ def __init__(self, results):
self.model = results.model
self._nobs = results.nobs
self._datetime = dt.datetime.now()
- self._cols = ['alpha'] + ['{0}'.format(f) for f in self._factor_names]
+ self._cols = ["alpha"] + ["{0}".format(f) for f in self._factor_names]
self._rp_names = results.rp_names
self._alpha_vcv = results.alpha_vcv
self._cov_est = results.cov_est
@@ -53,27 +54,31 @@ def summary(self):
``summary.as_html()`` and ``summary.as_latex()``.
"""
- title = self.name + ' Estimation Summary'
+ title = self.name + " Estimation Summary"
- top_left = [('No. Test Portfolios:', len(self._portfolio_names)),
- ('No. Factors:', len(self._factor_names)),
- ('No. Observations:', self.nobs),
- ('Date:', self._datetime.strftime('%a, %b %d %Y')),
- ('Time:', self._datetime.strftime('%H:%M:%S')),
- ('Cov. Estimator:', self._cov_type),
- ('', '')]
+ top_left = [
+ ("No. Test Portfolios:", len(self._portfolio_names)),
+ ("No. Factors:", len(self._factor_names)),
+ ("No. Observations:", self.nobs),
+ ("Date:", self._datetime.strftime("%a, %b %d %Y")),
+ ("Time:", self._datetime.strftime("%H:%M:%S")),
+ ("Cov. Estimator:", self._cov_type),
+ ("", ""),
+ ]
j_stat = _str(self.j_statistic.stat)
j_pval = pval_format(self.j_statistic.pval)
j_dist = self.j_statistic.dist_name
- top_right = [('R-squared:', _str(self.rsquared)),
- ('J-statistic:', j_stat),
- ('P-value', j_pval),
- ('Distribution:', j_dist),
- ('', ''),
- ('', ''),
- ('', '')]
+ top_right = [
+ ("R-squared:", _str(self.rsquared)),
+ ("J-statistic:", j_stat),
+ ("P-value", j_pval),
+ ("Distribution:", j_dist),
+ ("", ""),
+ ("", ""),
+ ("", ""),
+ ]
stubs = []
vals = []
@@ -87,9 +92,9 @@ def summary(self):
# Top Table
# Parameter table
fmt = fmt_2cols
- fmt['data_fmts'][1] = '%18s'
+ fmt["data_fmts"][1] = "%18s"
- top_right = [('%-21s' % (' ' + k), v) for k, v in top_right]
+ top_right = [("%-21s" % (" " + k), v) for k, v in top_right]
stubs = []
vals = []
for stub, val in top_right:
@@ -103,11 +108,7 @@ def summary(self):
tstats = np.asarray(self.risk_premia / self.risk_premia_se)
pvalues = 2 - 2 * stats.norm.cdf(np.abs(tstats))
ci = rp + se * stats.norm.ppf([[0.025, 0.975]])
- param_data = np.c_[rp,
- se,
- tstats[:, None],
- pvalues[:, None],
- ci]
+ param_data = np.c_[rp, se, tstats[:, None], pvalues[:, None], ci]
data = []
for row in param_data:
txt_row = []
@@ -117,24 +118,26 @@ def summary(self):
f = pval_format
txt_row.append(f(v))
data.append(txt_row)
- title = 'Risk Premia Estimates'
+ title = "Risk Premia Estimates"
table_stubs = list(self.risk_premia.index)
- header = ['Parameter', 'Std. Err.', 'T-stat', 'P-value', 'Lower CI', 'Upper CI']
- table = SimpleTable(data,
- stubs=table_stubs,
- txt_fmt=fmt_params,
- headers=header,
- title=title)
+ header = ["Parameter", "Std. Err.", "T-stat", "P-value", "Lower CI", "Upper CI"]
+ table = SimpleTable(
+ data, stubs=table_stubs, txt_fmt=fmt_params, headers=header, title=title
+ )
smry.tables.append(table)
- smry.add_extra_txt(['Covariance estimator:',
- str(self._cov_est),
- 'See full_summary for complete results'])
+ smry.add_extra_txt(
+ [
+ "Covariance estimator:",
+ str(self._cov_est),
+ "See full_summary for complete results",
+ ]
+ )
return smry
@staticmethod
def _single_table(params, se, name, param_names, first=False):
- tstats = (params / se)
+ tstats = params / se
pvalues = 2 - 2 * stats.norm.cdf(tstats)
ci = params + se * stats.norm.ppf([[0.025, 0.975]])
param_data = np.c_[params, se, tstats, pvalues, ci]
@@ -148,14 +151,22 @@ def _single_table(params, se, name, param_names, first=False):
f = pval_format
txt_row.append(f(v))
data.append(txt_row)
- title = '{0} Coefficients'.format(name)
+ title = "{0} Coefficients".format(name)
table_stubs = param_names
if first:
- header = ['Parameter', 'Std. Err.', 'T-stat', 'P-value', 'Lower CI', 'Upper CI']
+ header = [
+ "Parameter",
+ "Std. Err.",
+ "T-stat",
+ "P-value",
+ "Lower CI",
+ "Upper CI",
+ ]
else:
header = None
- table = SimpleTable(data, stubs=table_stubs, txt_fmt=fmt_params, headers=header,
- title=title)
+ table = SimpleTable(
+ data, stubs=table_stubs, txt_fmt=fmt_params, headers=header, title=title
+ )
return table
@@ -168,10 +179,16 @@ def full_summary(self):
param_names = list(params.columns)
first = True
for row in params.index:
- smry.tables.append(SimpleTable(['']))
- smry.tables.append(self._single_table(np.asarray(params.loc[row])[:, None],
- np.asarray(se.loc[row])[:, None],
- row, param_names, first))
+ smry.tables.append(SimpleTable([""]))
+ smry.tables.append(
+ self._single_table(
+ np.asarray(params.loc[row])[:, None],
+ np.asarray(se.loc[row])[:, None],
+ row,
+ param_names,
+ first,
+ )
+ )
first = False
return smry
@@ -199,7 +216,9 @@ def betas(self):
@property
def params(self):
"""Estimated parameters"""
- return pd.DataFrame(self._params, columns=self._cols, index=self._portfolio_names)
+ return pd.DataFrame(
+ self._params, columns=self._cols, index=self._portfolio_names
+ )
@property
def std_errors(self):
@@ -224,7 +243,9 @@ def cov_estimator(self):
@property
def cov(self):
"""Estimated covariance of parameters"""
- return pd.DataFrame(self._cov, columns=self._param_names, index=self._param_names)
+ return pd.DataFrame(
+ self._cov, columns=self._param_names, index=self._param_names
+ )
@property
def j_statistic(self):
@@ -233,7 +254,7 @@ def j_statistic(self):
Returns
-------
- j : WaldTestStatistic
+ WaldTestStatistic
Test statistic for null that model prices test portfolios
Notes
diff --git a/linearmodels/compat/numpy.py b/linearmodels/compat/numpy.py
index 02c68a4eba..5792d29f64 100644
--- a/linearmodels/compat/numpy.py
+++ b/linearmodels/compat/numpy.py
@@ -2,7 +2,7 @@
import numpy as np
-NP_LT_114 = LooseVersion(np.__version__) < LooseVersion('1.14')
+NP_LT_114 = LooseVersion(np.__version__) < LooseVersion("1.14")
def lstsq(a, b, rcond=None):
@@ -14,4 +14,4 @@ def lstsq(a, b, rcond=None):
return np.linalg.lstsq(a, b, rcond=rcond)
-__all__ = ['lstsq']
+__all__ = ["lstsq"]
diff --git a/linearmodels/compat/pandas.py b/linearmodels/compat/pandas.py
index 8deabb8750..1d943bccc3 100644
--- a/linearmodels/compat/pandas.py
+++ b/linearmodels/compat/pandas.py
@@ -5,11 +5,20 @@
from linearmodels.typing import AnyPandas
-PD_LT_023 = LooseVersion(pd.__version__) < LooseVersion('0.23')
-
-__all__ = ['is_string_dtype', 'is_numeric_dtype', 'is_categorical',
- 'is_string_like', 'is_categorical_dtype', 'is_datetime64_any_dtype',
- 'concat', 'get_codes', 'to_numpy', 'assert_series_equal']
+PD_LT_023 = LooseVersion(pd.__version__) < LooseVersion("0.23")
+
+__all__ = [
+ "is_string_dtype",
+ "is_numeric_dtype",
+ "is_categorical",
+ "is_string_like",
+ "is_categorical_dtype",
+ "is_datetime64_any_dtype",
+ "concat",
+ "get_codes",
+ "to_numpy",
+ "assert_series_equal",
+]
try:
from pandas.testing import assert_series_equal
@@ -23,21 +32,25 @@ def concat(*args, **kwargs):
See pandas.compat
"""
- if PD_LT_023 and 'sort' in kwargs:
+ if PD_LT_023 and "sort" in kwargs:
kwargs = kwargs.copy()
- del kwargs['sort']
+ del kwargs["sort"]
elif not PD_LT_023:
- if 'sort' not in kwargs:
+ if "sort" not in kwargs:
kwargs = kwargs.copy()
- kwargs['sort'] = False
+ kwargs["sort"] = False
return pd.concat(*args, **kwargs)
try:
- from pandas.api.types import (is_numeric_dtype, is_categorical,
- is_string_dtype, is_categorical_dtype,
- is_datetime64_any_dtype)
+ from pandas.api.types import (
+ is_numeric_dtype,
+ is_categorical,
+ is_string_dtype,
+ is_categorical_dtype,
+ is_datetime64_any_dtype,
+ )
# From pandas 0.20.1
def is_string_like(obj):
@@ -50,15 +63,21 @@ def is_string_like(obj):
Returns
-------
- is_str_like : bool
+ bool
Whether `obj` is a string or not.
"""
return isinstance(obj, str)
+
except ImportError: # pragma: no cover
- from pandas.core.common import (is_string_dtype, is_numeric_dtype,
- is_categorical, is_categorical_dtype,
- is_datetime64_any_dtype, is_string_like)
+ from pandas.core.common import (
+ is_string_dtype,
+ is_numeric_dtype,
+ is_categorical,
+ is_categorical_dtype,
+ is_datetime64_any_dtype,
+ is_string_like,
+ )
def get_codes(index):
diff --git a/linearmodels/compat/statsmodels.py b/linearmodels/compat/statsmodels.py
index 6c91ab84eb..f939bf3117 100644
--- a/linearmodels/compat/statsmodels.py
+++ b/linearmodels/compat/statsmodels.py
@@ -8,10 +8,10 @@ def as_html(self):
Returns
-------
- html : string
+ str
concatenated summary tables in HTML format
"""
- html = summary.summary_return(self.tables, return_fmt='html')
+ html = summary.summary_return(self.tables, return_fmt="html")
if self.extra_txt is not None:
- html = html + '
' + self.extra_txt.replace('\n', '
')
+ html = html + "
" + self.extra_txt.replace("\n", "
")
return html
diff --git a/linearmodels/conftest.py b/linearmodels/conftest.py
index c6b08602c6..421b392797 100644
--- a/linearmodels/conftest.py
+++ b/linearmodels/conftest.py
@@ -2,15 +2,13 @@
def pytest_addoption(parser):
- parser.addoption("--skip-slow", action="store_true",
- help="skip slow tests")
- parser.addoption("--only-slow", action="store_true",
- help="run only slow tests")
+ parser.addoption("--skip-slow", action="store_true", help="skip slow tests")
+ parser.addoption("--only-slow", action="store_true", help="run only slow tests")
def pytest_runtest_setup(item):
- if 'slow' in item.keywords and item.config.getoption("--skip-slow"): # pragma: no cover
- pytest.skip("skipping due to --skip-slow") # pragma: no cover
+ if "slow" in item.keywords and item.config.getoption("--skip-slow"):
+ pytest.skip("skipping due to --skip-slow")
- if 'slow' not in item.keywords and item.config.getoption("--only-slow"): # pragma: no cover
- pytest.skip("skipping due to --only-slow") # pragma: no cover
+ if "slow" not in item.keywords and item.config.getoption("--only-slow"):
+ pytest.skip("skipping due to --only-slow")
diff --git a/linearmodels/datasets/__init__.py b/linearmodels/datasets/__init__.py
index 76f82fb947..06a1c19514 100644
--- a/linearmodels/datasets/__init__.py
+++ b/linearmodels/datasets/__init__.py
@@ -8,4 +8,4 @@ def get_path(f):
def load(module, file_name):
- return pd.read_csv(join(get_path(module), file_name), compression='bz2')
+ return pd.read_csv(join(get_path(module), file_name), compression="bz2")
diff --git a/linearmodels/datasets/birthweight/__init__.py b/linearmodels/datasets/birthweight/__init__.py
index db57de8278..e148fa72eb 100644
--- a/linearmodels/datasets/birthweight/__init__.py
+++ b/linearmodels/datasets/birthweight/__init__.py
@@ -22,4 +22,5 @@
def load():
from linearmodels import datasets
- return datasets.load(__file__, 'birthweight.csv.bz2')
+
+ return datasets.load(__file__, "birthweight.csv.bz2")
diff --git a/linearmodels/datasets/card/__init__.py b/linearmodels/datasets/card/__init__.py
index bcdbe5ea05..04cb809e66 100644
--- a/linearmodels/datasets/card/__init__.py
+++ b/linearmodels/datasets/card/__init__.py
@@ -43,4 +43,5 @@
def load():
from linearmodels import datasets
- return datasets.load(__file__, 'card.csv.bz2')
+
+ return datasets.load(__file__, "card.csv.bz2")
diff --git a/linearmodels/datasets/fertility/__init__.py b/linearmodels/datasets/fertility/__init__.py
index 4af9ff36f0..dc0d0d58f3 100644
--- a/linearmodels/datasets/fertility/__init__.py
+++ b/linearmodels/datasets/fertility/__init__.py
@@ -34,4 +34,5 @@
def load():
from linearmodels import datasets
- return datasets.load(__file__, 'fertility.csv.bz2')
+
+ return datasets.load(__file__, "fertility.csv.bz2")
diff --git a/linearmodels/datasets/french/__init__.py b/linearmodels/datasets/french/__init__.py
index af2489b7f6..d5f27e48be 100644
--- a/linearmodels/datasets/french/__init__.py
+++ b/linearmodels/datasets/french/__init__.py
@@ -45,6 +45,7 @@
def load():
from linearmodels import datasets
- data = datasets.load(__file__, 'french.csv.bz2')
- data['dates'] = pd.to_datetime(data.dates)
+
+ data = datasets.load(__file__, "french.csv.bz2")
+ data["dates"] = pd.to_datetime(data.dates)
return data
diff --git a/linearmodels/datasets/fringe/__init__.py b/linearmodels/datasets/fringe/__init__.py
index 2658da371a..0366e9387d 100644
--- a/linearmodels/datasets/fringe/__init__.py
+++ b/linearmodels/datasets/fringe/__init__.py
@@ -47,4 +47,5 @@
def load():
from linearmodels import datasets
- return datasets.load(__file__, 'fringe.csv.bz2')
+
+ return datasets.load(__file__, "fringe.csv.bz2")
diff --git a/linearmodels/datasets/jobtraining/__init__.py b/linearmodels/datasets/jobtraining/__init__.py
index 895d595a5b..1fb5ec8617 100644
--- a/linearmodels/datasets/jobtraining/__init__.py
+++ b/linearmodels/datasets/jobtraining/__init__.py
@@ -38,4 +38,5 @@
def load():
from linearmodels import datasets
- return datasets.load(__file__, 'jobtraining.csv.bz2')
+
+ return datasets.load(__file__, "jobtraining.csv.bz2")
diff --git a/linearmodels/datasets/meps/__init__.py b/linearmodels/datasets/meps/__init__.py
index 28892f05c5..9a620b5bde 100644
--- a/linearmodels/datasets/meps/__init__.py
+++ b/linearmodels/datasets/meps/__init__.py
@@ -33,4 +33,5 @@
def load():
from linearmodels import datasets
- return datasets.load(__file__, 'meps.csv.bz2')
+
+ return datasets.load(__file__, "meps.csv.bz2")
diff --git a/linearmodels/datasets/mroz/__init__.py b/linearmodels/datasets/mroz/__init__.py
index 77a84006d3..b926d465f2 100644
--- a/linearmodels/datasets/mroz/__init__.py
+++ b/linearmodels/datasets/mroz/__init__.py
@@ -30,4 +30,5 @@
def load():
from linearmodels import datasets
- return datasets.load(__file__, 'mroz.csv.bz2')
+
+ return datasets.load(__file__, "mroz.csv.bz2")
diff --git a/linearmodels/datasets/munnell/__init__.py b/linearmodels/datasets/munnell/__init__.py
index 42e97c1139..0973e167de 100644
--- a/linearmodels/datasets/munnell/__init__.py
+++ b/linearmodels/datasets/munnell/__init__.py
@@ -18,4 +18,5 @@
def load():
from linearmodels import datasets
- return datasets.load(__file__, 'munnell.csv.bz2')
+
+ return datasets.load(__file__, "munnell.csv.bz2")
diff --git a/linearmodels/datasets/wage/__init__.py b/linearmodels/datasets/wage/__init__.py
index fdfb153e74..35ae7c835b 100644
--- a/linearmodels/datasets/wage/__init__.py
+++ b/linearmodels/datasets/wage/__init__.py
@@ -24,4 +24,5 @@
def load():
from linearmodels import datasets
- return datasets.load(__file__, 'wage.csv.bz2')
+
+ return datasets.load(__file__, "wage.csv.bz2")
diff --git a/linearmodels/datasets/wage_panel/__init__.py b/linearmodels/datasets/wage_panel/__init__.py
index ac04937c7b..5057bd204a 100644
--- a/linearmodels/datasets/wage_panel/__init__.py
+++ b/linearmodels/datasets/wage_panel/__init__.py
@@ -20,4 +20,5 @@
def load():
from linearmodels import datasets
- return datasets.load(__file__, 'wage_panel.csv.bz2')
+
+ return datasets.load(__file__, "wage_panel.csv.bz2")
diff --git a/linearmodels/formula.py b/linearmodels/formula.py
index 984a02a8c1..26a0e7dd4d 100644
--- a/linearmodels/formula.py
+++ b/linearmodels/formula.py
@@ -3,9 +3,20 @@
PanelOLS, PooledOLS, RandomEffects)
from linearmodels.system import IV3SLS, SUR, IVSystemGMM
-__all__ = ['between_ols', 'random_effects', 'first_difference_ols',
- 'pooled_ols', 'panel_ols', 'iv_2sls', 'iv_gmm', 'iv_gmm_cue',
- 'iv_liml', 'sur', 'iv_3sls', 'iv_system_gmm']
+__all__ = [
+ "between_ols",
+ "random_effects",
+ "first_difference_ols",
+ "pooled_ols",
+ "panel_ols",
+ "iv_2sls",
+ "iv_gmm",
+ "iv_gmm_cue",
+ "iv_liml",
+ "sur",
+ "iv_3sls",
+ "iv_system_gmm",
+]
iv_2sls = IV2SLS.from_formula
iv_liml = IVLIML.from_formula
diff --git a/linearmodels/iv/__init__.py b/linearmodels/iv/__init__.py
index 70e7d4c750..3a89bfc8eb 100644
--- a/linearmodels/iv/__init__.py
+++ b/linearmodels/iv/__init__.py
@@ -2,5 +2,12 @@
from .model import IV2SLS, IVGMM, IVGMMCUE, IVLIML # flake8: noqa
from .results import compare # flake8: noqa
-__all__ = ['IV2SLS', 'IVGMM', 'IVGMMCUE', 'IVLIML', 'compare',
- 'AbsorbingLS', 'Interaction']
+__all__ = [
+ "IV2SLS",
+ "IVGMM",
+ "IVGMMCUE",
+ "IVLIML",
+ "compare",
+ "AbsorbingLS",
+ "Interaction",
+]
diff --git a/linearmodels/iv/_utility.py b/linearmodels/iv/_utility.py
index 0da5867ba6..bdf27a6008 100644
--- a/linearmodels/iv/_utility.py
+++ b/linearmodels/iv/_utility.py
@@ -34,7 +34,7 @@ def proj(y: ndarray, x: ndarray) -> ndarray:
Returns
-------
- yhat : ndarray
+ ndarray
Projected values of y (nobs by nseries)
"""
return x @ (np.linalg.pinv(x) @ y)
@@ -53,7 +53,7 @@ def annihilate(y: ndarray, x: ndarray) -> ndarray:
Returns
-------
- eps : ndarray
+ ndarray
Residuals values of y minus y projected on x (nobs by nseries)
"""
return y - proj(y, x)
@@ -80,44 +80,52 @@ class IVFormulaParser(object):
def __init__(self, formula: str, data: DataFrame, eval_env: int = 2):
self._formula = formula
self._data = data
- self._na_action = NAAction(on_NA='raise', NA_types=[])
+ self._na_action = NAAction(on_NA="raise", NA_types=[])
self._eval_env = eval_env
self._components = {} # type: Dict[str, str]
self._parse()
def _parse(self):
- blocks = self._formula.strip().split('~')
+ blocks = self._formula.strip().split("~")
if len(blocks) == 2:
dep = blocks[0].strip()
exog = blocks[1].strip()
- endog = '0'
- instr = '0'
+ endog = "0"
+ instr = "0"
elif len(blocks) == 3:
blocks = [bl.strip() for bl in blocks]
- if '[' not in blocks[1] or ']' not in blocks[2]:
- raise ValueError('formula not understood. Endogenous variables and '
- 'instruments must be segregated in a block that '
- 'starts with [ and ends with ].')
+ if "[" not in blocks[1] or "]" not in blocks[2]:
+ raise ValueError(
+ "formula not understood. Endogenous variables and "
+ "instruments must be segregated in a block that "
+ "starts with [ and ends with ]."
+ )
dep = blocks[0].strip()
- exog, endog = [bl.strip() for bl in blocks[1].split('[')]
- instr, exog2 = [bl.strip() for bl in blocks[2].split(']')]
- if endog[0] == '+' or endog[-1] == '+':
- raise ValueError('endogenous block must not start or end with +. This block '
- 'was: {0}'.format(endog))
- if instr[0] == '+' or instr[-1] == '+':
- raise ValueError('instrument block must not start or end with +. This '
- 'block was: {0}'.format(instr))
+ exog, endog = [bl.strip() for bl in blocks[1].split("[")]
+ instr, exog2 = [bl.strip() for bl in blocks[2].split("]")]
+ if endog[0] == "+" or endog[-1] == "+":
+ raise ValueError(
+ "endogenous block must not start or end with +. This block "
+ "was: {0}".format(endog)
+ )
+ if instr[0] == "+" or instr[-1] == "+":
+ raise ValueError(
+ "instrument block must not start or end with +. This "
+ "block was: {0}".format(instr)
+ )
if exog2:
exog += exog2
if exog:
- exog = exog[:-1].strip() if exog[-1] == '+' else exog
- exog = '0' if not exog else '0 + ' + exog
+ exog = exog[:-1].strip() if exog[-1] == "+" else exog
+ exog = "0" if not exog else "0 + " + exog
else:
- raise ValueError('formula contains more then 2 separators (~)')
- comp = {'dependent': '0 + ' + dep,
- 'exog': exog,
- 'endog': endog,
- 'instruments': instr}
+ raise ValueError("formula contains more then 2 separators (~)")
+ comp = {
+ "dependent": "0 + " + dep,
+ "exog": exog,
+ "endog": endog,
+ "instruments": instr,
+ }
self._components = comp
@property
@@ -140,33 +148,53 @@ def data(self) -> Tuple[OptionalDataFrame, ...]:
@property
def dependent(self) -> DataFrame:
"""Dependent variable"""
- dep = self.components['dependent']
- dep = dmatrix('0 + ' + dep, self._data, eval_env=self._eval_env,
- return_type='dataframe', NA_action=self._na_action)
+ dep = self.components["dependent"]
+ dep = dmatrix(
+ "0 + " + dep,
+ self._data,
+ eval_env=self._eval_env,
+ return_type="dataframe",
+ NA_action=self._na_action,
+ )
return dep
@property
def exog(self) -> OptionalDataFrame:
"""Exogenous variables"""
- exog = self.components['exog']
- exog = dmatrix(exog, self._data, eval_env=self._eval_env,
- return_type='dataframe', NA_action=self._na_action)
+ exog = self.components["exog"]
+ exog = dmatrix(
+ exog,
+ self._data,
+ eval_env=self._eval_env,
+ return_type="dataframe",
+ NA_action=self._na_action,
+ )
return self._empty_check(exog)
@property
def endog(self) -> OptionalDataFrame:
"""Endogenous variables"""
- endog = self.components['endog']
- endog = dmatrix('0 + ' + endog, self._data, eval_env=self._eval_env,
- return_type='dataframe', NA_action=self._na_action)
+ endog = self.components["endog"]
+ endog = dmatrix(
+ "0 + " + endog,
+ self._data,
+ eval_env=self._eval_env,
+ return_type="dataframe",
+ NA_action=self._na_action,
+ )
return self._empty_check(endog)
@property
def instruments(self) -> OptionalDataFrame:
"""Instruments"""
- instr = self.components['instruments']
- instr = dmatrix('0 + ' + instr, self._data, eval_env=self._eval_env,
- return_type='dataframe', NA_action=self._na_action)
+ instr = self.components["instruments"]
+ instr = dmatrix(
+ "0 + " + instr,
+ self._data,
+ eval_env=self._eval_env,
+ return_type="dataframe",
+ NA_action=self._na_action,
+ )
return self._empty_check(instr)
diff --git a/linearmodels/iv/absorbing.py b/linearmodels/iv/absorbing.py
index 2f95f13ca0..4c236a911c 100644
--- a/linearmodels/iv/absorbing.py
+++ b/linearmodels/iv/absorbing.py
@@ -27,7 +27,7 @@
except ImportError:
from hashlib import sha1 as hash_func
-SCALAR_DTYPES = {'int8': int8, 'int16': int16, 'int32': int32, 'int64': int64}
+SCALAR_DTYPES = {"int8": int8, "int16": int16, "int32": int32, "int64": int64}
_VARIABLE_CACHE = defaultdict(dict) # type: DefaultDict[Hashable, Dict[str, ndarray]]
@@ -45,8 +45,9 @@ def clear_cache():
_VARIABLE_CACHE.clear()
-def lsmr_annihilate(x: csc_matrix, y: ndarray, use_cache: bool = True, x_hash=None,
- **lsmr_options) -> ndarray:
+def lsmr_annihilate(
+ x: csc_matrix, y: ndarray, use_cache: bool = True, x_hash=None, **lsmr_options
+) -> ndarray:
r"""
Removes projection of x on y from y
@@ -66,7 +67,7 @@ def lsmr_annihilate(x: csc_matrix, y: ndarray, use_cache: bool = True, x_hash=No
Returns
-------
- resids : ndarray
+ ndarray
Returns the residuals from regressing y on x, (nobs, nvar)
Notes
@@ -81,14 +82,14 @@ def lsmr_annihilate(x: csc_matrix, y: ndarray, use_cache: bool = True, x_hash=No
"""
use_cache = use_cache and x_hash is not None
- regressor_hash = x_hash if x_hash is not None else ''
+ regressor_hash = x_hash if x_hash is not None else ""
default_opts = dict(atol=1e-8, btol=1e-8, show=False)
default_opts.update(lsmr_options)
resids = []
for i in range(y.shape[1]):
- _y = y[:, i:i + 1]
+ _y = y[:, i : i + 1]
- variable_digest = ''
+ variable_digest = ""
if use_cache:
hasher = hash_func()
hasher.update(ascontiguousarray(_y.data))
@@ -98,7 +99,7 @@ def lsmr_annihilate(x: csc_matrix, y: ndarray, use_cache: bool = True, x_hash=No
resid = _VARIABLE_CACHE[regressor_hash][variable_digest]
else:
beta = lsmr(x, _y, **default_opts)[0]
- resid = y[:, i:i + 1] - (x.dot(csc_matrix(beta[:, None]))).A
+ resid = y[:, i : i + 1] - (x.dot(csc_matrix(beta[:, None]))).A
_VARIABLE_CACHE[regressor_hash][variable_digest] = resid
resids.append(resid)
if resids:
@@ -119,7 +120,7 @@ def category_product(cats: AnyPandas) -> Series:
Returns
-------
- cp : Series
+ Series
Categorical series containing the cartesian product of the categories
in cats
"""
@@ -129,7 +130,7 @@ def category_product(cats: AnyPandas) -> Series:
sizes = []
for c in cats:
if not is_categorical(cats[c]):
- raise TypeError('cats must contain only categorical variables')
+ raise TypeError("cats must contain only categorical variables")
col = cats[c]
max_code = get_codes(col.cat).max()
size = 1
@@ -139,14 +140,18 @@ def category_product(cats: AnyPandas) -> Series:
nobs = cats.shape[0]
total_size = sum(sizes)
if total_size >= 63:
- raise ValueError('There are too many cats with too many states to use this method.')
+ raise ValueError(
+ "There are too many cats with too many states to use this method."
+ )
dtype_size = min(filter(lambda v: total_size < (v - 1), (8, 16, 32, 64)))
- dtype_str = 'int{0:d}'.format(dtype_size)
+ dtype_str = "int{0:d}".format(dtype_size)
dtype_val = dtype(dtype_str)
codes = zeros(nobs, dtype=dtype_val)
cum_size = 0
for i, col in enumerate(cats):
- codes += (get_codes(cats[col].cat).astype(dtype_val) << SCALAR_DTYPES[dtype_str](cum_size))
+ codes += get_codes(cats[col].cat).astype(dtype_val) << SCALAR_DTYPES[dtype_str](
+ cum_size
+ )
cum_size += sizes[i]
return Series(Categorical(codes), index=cats.index)
@@ -162,15 +167,16 @@ def category_interaction(cat: Series, precondition: bool = True) -> csc_matrix:
Returns
-------
- dummies : csc_matrix
+ csc_matrix
Sparse matrix of dummies with unit column norm
"""
codes = get_codes(category_product(cat).cat)
return dummy_matrix(codes[:, None], precondition=precondition)[0]
-def category_continuous_interaction(cat: AnyPandas, cont: AnyPandas,
- precondition: bool = True) -> csc_matrix:
+def category_continuous_interaction(
+ cat: AnyPandas, cont: AnyPandas, precondition: bool = True
+) -> csc_matrix:
"""
Parameters
----------
@@ -183,7 +189,7 @@ def category_continuous_interaction(cat: AnyPandas, cont: AnyPandas,
Returns
-------
- interact : csc_matrix
+ csc_matrix
Sparse matrix of dummy interactions with unit column norm
"""
codes = get_codes(category_product(cat).cat)
@@ -240,10 +246,15 @@ class Interaction(object):
>>> interact.sparse.shape # Cart product of all cats, 5**4, times ncont, 3
(100000, 1875)
"""
- _iv_data = IVData(None, 'none', 1)
- def __init__(self, cat: OptionalArrayLike = None, cont: OptionalArrayLike = None,
- nobs: int = None):
+ _iv_data = IVData(None, "none", 1)
+
+ def __init__(
+ self,
+ cat: OptionalArrayLike = None,
+ cont: OptionalArrayLike = None,
+ nobs: int = None,
+ ):
self._cat = cat
self._cont = cont
self._cat_data = self._iv_data
@@ -257,26 +268,28 @@ def nobs(self):
def _check_data(self):
cat, cont = self._cat, self._cont
- cat_nobs = getattr(cat, 'shape', (0,))[0]
- cont_nobs = getattr(cont, 'shape', (0,))[0]
+ cat_nobs = getattr(cat, "shape", (0,))[0]
+ cont_nobs = getattr(cont, "shape", (0,))[0]
nobs = max(cat_nobs, cont_nobs)
if cat is None and cont is None:
if self._nobs is not None:
- self._cont_data = self._cat_data = IVData(None, 'none', nobs=self._nobs)
+ self._cont_data = self._cat_data = IVData(None, "none", nobs=self._nobs)
else:
- raise ValueError('nobs must be provided when cat and cont are None')
+ raise ValueError("nobs must be provided when cat and cont are None")
return
self._nobs = nobs
- self._cat_data = IVData(cat, 'cat', nobs=nobs, convert_dummies=False)
- self._cont_data = IVData(cont, 'cont', nobs=nobs, convert_dummies=False)
+ self._cat_data = IVData(cat, "cat", nobs=nobs, convert_dummies=False)
+ self._cont_data = IVData(cont, "cont", nobs=nobs, convert_dummies=False)
if self._cat_data.shape[1] == self._cont_data.shape[1] == 0:
- raise ValueError('Both cat and cont are empty arrays')
+ raise ValueError("Both cat and cont are empty arrays")
cat_data = self._cat_data.pandas
convert = [col for col in cat_data if not (is_categorical(cat_data[col]))]
if convert:
- cat_data = DataFrame({col: cat_data[col].astype('category') for col in cat_data})
- self._cat_data = IVData(cat_data, 'cat', convert_dummies=False)
+ cat_data = DataFrame(
+ {col: cat_data[col].astype("category") for col in cat_data}
+ )
+ self._cat_data = IVData(cat_data, "cat", convert_dummies=False)
@property
def cat(self) -> DataFrame:
@@ -303,7 +316,7 @@ def sparse(self) -> csc_matrix:
Returns
-------
- dummy_interact : csc_matrix
+ csc_matrix
Dummy interaction constructed from the cartesian product of
the categories and each of the continuous variables.
@@ -320,9 +333,12 @@ def sparse(self) -> csc_matrix:
if self.cat.shape[1] and self.cont.shape[1]:
out = []
for col in self.cont:
- out.append(category_continuous_interaction(self.cat, self.cont[col],
- precondition=False))
- return sp.hstack(out, format='csc')
+ out.append(
+ category_continuous_interaction(
+ self.cat, self.cont[col], precondition=False
+ )
+ )
+ return sp.hstack(out, format="csc")
elif self.cat.shape[1]:
return category_interaction(category_product(self.cat), precondition=False)
elif self.cont.shape[1]:
@@ -340,7 +356,9 @@ def hash(self):
cat_hashes = []
cat = self.cat
for col in cat:
- hasher.update(ascontiguousarray(to_numpy(get_codes(self.cat[col].cat)).data))
+ hasher.update(
+ ascontiguousarray(to_numpy(get_codes(self.cat[col].cat)).data)
+ )
cat_hashes.append(hasher.hexdigest())
hasher = _reset(hasher)
cat_hashes = tuple(sorted(cat_hashes))
@@ -355,7 +373,7 @@ def hash(self):
return sorted(hashes)
@staticmethod
- def from_frame(frame: DataFrame) -> 'Interaction':
+ def from_frame(frame: DataFrame) -> "Interaction":
"""
Convenience function the simplifies using a DataFrame
@@ -368,7 +386,7 @@ def from_frame(frame: DataFrame) -> 'Interaction':
Returns
-------
- interaction : Interaction
+ Interaction
Instance using the columns of frame
Examples
@@ -412,9 +430,14 @@ class AbsorbingRegressor(object):
Weights, if any
"""
- def __init__(self, *, cat: DataFrame = None, cont: DataFrame = None,
- interactions: List[Interaction] = None,
- weights: ndarray = None):
+ def __init__(
+ self,
+ *,
+ cat: DataFrame = None,
+ cont: DataFrame = None,
+ interactions: List[Interaction] = None,
+ weights: ndarray = None
+ ):
self._cat = cat
self._cont = cont
self._interactions = interactions
@@ -438,7 +461,9 @@ def hash(self):
hasher = hash_func()
if self._cat is not None:
for col in self._cat:
- hasher.update(ascontiguousarray(to_numpy(get_codes(self._cat[col].cat)).data))
+ hasher.update(
+ ascontiguousarray(to_numpy(get_codes(self._cat[col].cat)).data)
+ )
hashes.append((hasher.hexdigest(),))
hasher = _reset(hasher)
if self._cont is not None:
@@ -471,11 +496,13 @@ def _regressors(self) -> csc_matrix:
regressors.extend([interact.sparse for interact in self._interactions])
if regressors:
- regressor_mat = sp.hstack(regressors, format='csc')
+ regressor_mat = sp.hstack(regressors, format="csc")
approx_rank = regressor_mat.shape[1]
self._approx_rank = approx_rank
if self._weights is not None:
- return (sp.diags(sqrt(self._weights.squeeze())).dot(regressor_mat)).asformat('csc')
+ return (
+ sp.diags(sqrt(self._weights.squeeze())).dot(regressor_mat)
+ ).asformat("csc")
return regressor_mat
else:
self._approx_rank = 0
@@ -565,14 +592,19 @@ class AbsorbingLS(object):
>>> mod = AbsorbingLS(dep, exog, absorb=absorb, interactions=iaction)
"""
- def __init__(self, dependent: ArrayLike, exog: OptionalArrayLike = None, *,
- absorb: InteractionVar = None,
- interactions: Union[InteractionVar, Iterable[InteractionVar]] = None,
- weights: OptionalArrayLike = None):
-
- self._dependent = IVData(dependent, 'dependent')
+ def __init__(
+ self,
+ dependent: ArrayLike,
+ exog: OptionalArrayLike = None,
+ *,
+ absorb: InteractionVar = None,
+ interactions: Union[InteractionVar, Iterable[InteractionVar]] = None,
+ weights: OptionalArrayLike = None
+ ):
+
+ self._dependent = IVData(dependent, "dependent")
self._nobs = nobs = self._dependent.shape[0]
- self._exog = IVData(exog, 'exog', nobs=self._nobs)
+ self._exog = IVData(exog, "exog", nobs=self._nobs)
self._absorb = absorb
if isinstance(absorb, DataFrame):
self._absorb_inter = Interaction.from_frame(absorb)
@@ -581,7 +613,7 @@ def __init__(self, dependent: ArrayLike, exog: OptionalArrayLike = None, *,
elif isinstance(absorb, Interaction):
self._absorb_inter = absorb
else:
- raise TypeError('absorb must ba a DataFrame or an Interaction')
+ raise TypeError("absorb must ba a DataFrame or an Interaction")
self._weights = weights
self._is_weighted = False
self._check_weights()
@@ -598,7 +630,7 @@ def __init__(self, dependent: ArrayLike, exog: OptionalArrayLike = None, *,
self._drop_locs = self._drop_missing()
self._columns = self._exog.cols
self._index = self._dependent.rows
- self._method = 'Absorbing LS'
+ self._method = "Absorbing LS"
self._const_col = 0
self._has_constant = False
@@ -634,22 +666,26 @@ def _check_weights(self):
if self._weights is None:
nobs = self._dependent.shape[0]
self._is_weighted = False
- self._weight_data = IVData(ones(nobs), 'weights')
+ self._weight_data = IVData(ones(nobs), "weights")
else:
self._is_weighted = True
weights = IVData(self._weights).ndarray
weights = weights / nanmean(weights)
- self._weight_data = IVData(weights, var_name='weights', nobs=self._nobs)
+ self._weight_data = IVData(weights, var_name="weights", nobs=self._nobs)
def _check_shape(self):
nobs = self._nobs
if self._absorb is not None:
if self._absorb_inter.nobs != nobs:
- raise ValueError('absorb and dependent have different number of observations')
+ raise ValueError(
+ "absorb and dependent have different number of observations"
+ )
for interact in self._interaction_list:
if interact.nobs != nobs:
- raise ValueError('interactions ({0}) and dependent have different number of '
- 'observations'.format(str(interact)))
+ raise ValueError(
+ "interactions ({0}) and dependent have different number of "
+ "observations".format(str(interact))
+ )
@property
def absorbed_dependent(self) -> DataFrame:
@@ -658,7 +694,7 @@ def absorbed_dependent(self) -> DataFrame:
Returns
-------
- dependent : DataFrame
+ DataFrame
Dependent after effects have been absorbed
Raises
@@ -668,7 +704,9 @@ def absorbed_dependent(self) -> DataFrame:
"""
if self._absorbed_dependent is not None:
return self._absorbed_dependent
- raise RuntimeError('fit must be called once before absorbed_dependent is available')
+ raise RuntimeError(
+ "fit must be called once before absorbed_dependent is available"
+ )
@property
def absorbed_exog(self) -> DataFrame:
@@ -677,7 +715,7 @@ def absorbed_exog(self) -> DataFrame:
Returns
-------
- exogenous : DataFrame
+ DataFrame
Exogenous after effects have been absorbed
Raises
@@ -687,7 +725,7 @@ def absorbed_exog(self) -> DataFrame:
"""
if self._absorbed_exog is not None:
return self._absorbed_exog
- raise RuntimeError('fit must be called once before absorbed_exog is available')
+ raise RuntimeError("fit must be called once before absorbed_exog is available")
@property
def weights(self):
@@ -707,7 +745,7 @@ def has_constant(self):
@property
def instruments(self):
- return IVData(None, 'instrument', nobs=self._dependent.shape[0])
+ return IVData(None, "instrument", nobs=self._dependent.shape[0])
def _prepare_interactions(self):
if self._interactions is None:
@@ -723,13 +761,19 @@ def _prepare_interactions(self):
elif isinstance(interact, Interaction):
self._interaction_list.append(interact)
else:
- raise TypeError('interactions must contain DataFrames or Interactions')
+ raise TypeError(
+ "interactions must contain DataFrames or Interactions"
+ )
def _first_time_fit(self, use_cache, lsmr_options):
weights = self.weights.ndarray if self._is_weighted else None
- areg = AbsorbingRegressor(cat=self._absorb_inter.cat, cont=self._absorb_inter.cont,
- interactions=self._interaction_list, weights=weights)
+ areg = AbsorbingRegressor(
+ cat=self._absorb_inter.cat,
+ cont=self._absorb_inter.cont,
+ interactions=self._interaction_list,
+ weights=weights,
+ )
areg_constant = areg.has_constant
self._regressors = preconditioner(areg.regressors)[0]
self._num_params += areg.approx_rank
@@ -751,10 +795,12 @@ def _first_time_fit(self, use_cache, lsmr_options):
lsmr_options = {} if lsmr_options is None else lsmr_options
if self._regressors.shape[1] > 0:
- dep_resid = lsmr_annihilate(self._regressors, dep, use_cache, self._regressors_hash,
- **lsmr_options)
- exog_resid = lsmr_annihilate(self._regressors, exog, use_cache,
- self._regressors_hash, **lsmr_options)
+ dep_resid = lsmr_annihilate(
+ self._regressors, dep, use_cache, self._regressors_hash, **lsmr_options
+ )
+ exog_resid = lsmr_annihilate(
+ self._regressors, exog, use_cache, self._regressors_hash, **lsmr_options
+ )
else:
dep_resid = dep
exog_resid = exog
@@ -763,13 +809,24 @@ def _first_time_fit(self, use_cache, lsmr_options):
dep_resid += root_w * mu_dep
exog_resid += root_w * mu_exog
- self._absorbed_dependent = DataFrame(dep_resid, index=self._dependent.pandas.index,
- columns=self._dependent.pandas.columns)
- self._absorbed_exog = DataFrame(exog_resid, index=self._exog.pandas.index,
- columns=self._exog.pandas.columns)
-
- def fit(self, *, cov_type: str = 'robust', debiased: bool = False, lsmr_options: dict = None,
- use_cache: bool = True, **cov_config: Any):
+ self._absorbed_dependent = DataFrame(
+ dep_resid,
+ index=self._dependent.pandas.index,
+ columns=self._dependent.pandas.columns,
+ )
+ self._absorbed_exog = DataFrame(
+ exog_resid, index=self._exog.pandas.index, columns=self._exog.pandas.columns
+ )
+
+ def fit(
+ self,
+ *,
+ cov_type: str = "robust",
+ debiased: bool = False,
+ lsmr_options: dict = None,
+ use_cache: bool = True,
+ **cov_config: Any
+ ):
"""
Estimate model parameters
@@ -805,7 +862,7 @@ def fit(self, *, cov_type: str = 'robust', debiased: bool = False, lsmr_options:
Returns
-------
- results : AbsorbingLSResults
+ AbsorbingLSResults
Results container
Notes
@@ -843,18 +900,19 @@ def fit(self, *, cov_type: str = 'robust', debiased: bool = False, lsmr_options:
self._num_params += exog_resid.shape[1]
cov_estimator = COVARIANCE_ESTIMATORS[cov_type]
- cov_config['debiased'] = debiased
- cov_config['kappa'] = 0.0
+ cov_config["debiased"] = debiased
+ cov_config["kappa"] = 0.0
cov_config_copy = {k: v for k, v in cov_config.items()}
- if 'center' in cov_config_copy:
- del cov_config_copy['center']
- cov_estimator = cov_estimator(exog_resid, dep_resid, exog_resid, params, **cov_config_copy)
+ if "center" in cov_config_copy:
+ del cov_config_copy["center"]
+ cov_estimator = cov_estimator(
+ exog_resid, dep_resid, exog_resid, params, **cov_config_copy
+ )
- results = {'kappa': 0.0,
- 'liml_kappa': 0.0}
+ results = {"kappa": 0.0, "liml_kappa": 0.0}
pe = self._post_estimation(params, cov_estimator, cov_type)
results.update(pe)
- results['df_model'] = self._num_params
+ results["df_model"] = self._num_params
return AbsorbingLSResults(results, self)
@@ -869,7 +927,7 @@ def resids(self, params: ndarray):
Returns
-------
- resids : ndarray
+ ndarray
Model residuals
"""
resids = self.wresids(params)
@@ -886,7 +944,7 @@ def wresids(self, params: ndarray):
Returns
-------
- wresids : ndarray
+ ndarray
Weighted model residuals
Notes
@@ -894,7 +952,9 @@ def wresids(self, params: ndarray):
Uses weighted versions of data instead of raw data. Identical to
resids if all weights are unity.
"""
- return to_numpy(self._absorbed_dependent) - to_numpy(self._absorbed_exog) @ params
+ return (
+ to_numpy(self._absorbed_dependent) - to_numpy(self._absorbed_exog) @ params
+ )
def _f_statistic(self, params: ndarray, cov: ndarray, debiased: bool):
const_loc = find_constant(self._exog.ndarray)
@@ -906,10 +966,16 @@ def _post_estimation(self, params: ndarray, cov_estimator, cov_type: str):
columns = self._columns
index = self._index
eps = self.resids(params)
- fitted = DataFrame(self._dependent.ndarray - eps, index=self._dependent.rows,
- columns=['fitted_values'])
- absorbed_effects = DataFrame(to_numpy(self._absorbed_dependent) - to_numpy(fitted),
- columns=['absorbed_effects'], index=self._dependent.rows)
+ fitted = DataFrame(
+ self._dependent.ndarray - eps,
+ index=self._dependent.rows,
+ columns=["fitted_values"],
+ )
+ absorbed_effects = DataFrame(
+ to_numpy(self._absorbed_dependent) - to_numpy(fitted),
+ columns=["absorbed_effects"],
+ index=self._dependent.rows,
+ )
weps = self.wresids(params)
cov = cov_estimator.cov
@@ -930,7 +996,7 @@ def _post_estimation(self, params: ndarray, cov_estimator, cov_type: str):
# If absorbing contains a constant, but exog does not, no need to demean
if self._const_col is not None:
col = self._const_col
- x = to_numpy(self._absorbed_exog)[:, col:col + 1]
+ x = to_numpy(self._absorbed_exog)[:, col : col + 1]
mu = (lstsq(x, to_numpy(e))[0]).squeeze()
e = e - x * mu
@@ -938,25 +1004,27 @@ def _post_estimation(self, params: ndarray, cov_estimator, cov_type: str):
r2_absorbed = max(1 - residual_ss / aborbed_total_ss, 0.0)
fstat = self._f_statistic(params, cov, debiased)
- out = {'params': Series(params.squeeze(), columns, name='parameter'),
- 'eps': Series(eps.squeeze(), index=index, name='residual'),
- 'weps': Series(weps.squeeze(), index=index, name='weighted residual'),
- 'cov': DataFrame(cov, columns=columns, index=columns),
- 's2': float(cov_estimator.s2),
- 'debiased': debiased,
- 'residual_ss': float(residual_ss),
- 'total_ss': float(total_ss),
- 'r2': float(r2),
- 'fstat': fstat,
- 'vars': columns,
- 'instruments': [],
- 'cov_config': cov_estimator.config,
- 'cov_type': cov_type,
- 'method': self._method,
- 'cov_estimator': cov_estimator,
- 'fitted': fitted,
- 'original_index': self._original_index,
- 'absorbed_effects': absorbed_effects,
- 'absorbed_r2': r2_absorbed}
+ out = {
+ "params": Series(params.squeeze(), columns, name="parameter"),
+ "eps": Series(eps.squeeze(), index=index, name="residual"),
+ "weps": Series(weps.squeeze(), index=index, name="weighted residual"),
+ "cov": DataFrame(cov, columns=columns, index=columns),
+ "s2": float(cov_estimator.s2),
+ "debiased": debiased,
+ "residual_ss": float(residual_ss),
+ "total_ss": float(total_ss),
+ "r2": float(r2),
+ "fstat": fstat,
+ "vars": columns,
+ "instruments": [],
+ "cov_config": cov_estimator.config,
+ "cov_type": cov_type,
+ "method": self._method,
+ "cov_estimator": cov_estimator,
+ "fitted": fitted,
+ "original_index": self._original_index,
+ "absorbed_effects": absorbed_effects,
+ "absorbed_r2": r2_absorbed,
+ }
return out
diff --git a/linearmodels/iv/common.py b/linearmodels/iv/common.py
index 3624a040f9..51b3ac5acf 100644
--- a/linearmodels/iv/common.py
+++ b/linearmodels/iv/common.py
@@ -23,8 +23,9 @@ def find_constant(x):
return None
-def f_statistic(params: ndarray, cov: ndarray, debiased: bool, resid_df: int,
- const_loc: int = None):
+def f_statistic(
+ params: ndarray, cov: ndarray, debiased: bool, resid_df: int, const_loc: int = None
+):
"""
Parameters
----------
@@ -42,18 +43,20 @@ def f_statistic(params: ndarray, cov: ndarray, debiased: bool, resid_df: int,
Returns
-------
- f_stat : WaldTestStatistic
+ WaldTestStatistic
WaldTestStatistic instance
"""
- null = 'All parameters ex. constant are zero'
- name = 'Model F-statistic'
+ null = "All parameters ex. constant are zero"
+ name = "Model F-statistic"
nvar = params.shape[0]
non_const = list(range(nvar))
if const_loc is not None:
non_const.pop(const_loc)
if not non_const:
- return InvalidTestStatistic('Model contains no non-constant exogenous terms', name=name)
+ return InvalidTestStatistic(
+ "Model contains no non-constant exogenous terms", name=name
+ )
test_params = params[non_const]
test_cov = cov[ix_(non_const, non_const)]
test_stat = test_params.T @ inv(test_cov) @ test_params
diff --git a/linearmodels/iv/covariance.py b/linearmodels/iv/covariance.py
index fa049610ab..f9c3b0f113 100644
--- a/linearmodels/iv/covariance.py
+++ b/linearmodels/iv/covariance.py
@@ -10,10 +10,12 @@
from linearmodels.typing import Numeric, OptionalNumeric
-KernelWeight = Union[Callable[[int, float], ndarray],
- Callable[[float, float], ndarray],
- Callable[[int, VarArg(Any)], ndarray],
- Callable[[Numeric, int], Any]]
+KernelWeight = Union[
+ Callable[[int, float], ndarray],
+ Callable[[float, float], ndarray],
+ Callable[[int, VarArg(Any)], ndarray],
+ Callable[[Numeric, int], Any],
+]
CLUSTER_ERR = """
clusters has the wrong nobs. Expected {0}, got {1}. Any missing observation
@@ -36,7 +38,7 @@ def _cov_cluster(z: ndarray, clusters: ndarray) -> ndarray:
Returns
-------
- c : ndarray
+ ndarray
k by k cluster asymptotic covariance
"""
@@ -71,14 +73,16 @@ def _cov_kernel(z: ndarray, w: ndarray) -> ndarray:
Returns
-------
- c : ndarray
+ ndarray
k by k kernel asymptotic covariance
"""
k = len(w)
n = z.shape[0]
if k > n:
- raise ValueError('Length of w ({0}) is larger than the number '
- 'of elements in z ({1})'.format(k, n))
+ raise ValueError(
+ "Length of w ({0}) is larger than the number "
+ "of elements in z ({1})".format(k, n)
+ )
s = z.T @ z
for i in range(1, len(w)):
op = z[i:].T @ z[:-i]
@@ -99,7 +103,7 @@ def kernel_weight_bartlett(bw: int, *args) -> ndarray:
Returns
-------
- weights : ndarray
+ ndarray
Weight array ordered by lag position (maxlag + 1)
Notes
@@ -124,7 +128,7 @@ def kernel_weight_quadratic_spectral(bw: Numeric, n: int) -> ndarray:
Returns
-------
- weights : ndarray
+ ndarray
Weight array ordered by lag position (maxlag + 1)
Notes
@@ -165,7 +169,7 @@ def kernel_weight_parzen(bw: int, *args) -> ndarray:
Returns
-------
- weights : ndarray
+ ndarray
Weight array ordered by lag position (maxlag + 1)
Notes
@@ -182,7 +186,7 @@ def kernel_weight_parzen(bw: int, *args) -> ndarray:
return w
-def kernel_optimal_bandwidth(x: ndarray, kernel: str = 'bartlett') -> int:
+def kernel_optimal_bandwidth(x: ndarray, kernel: str = "bartlett") -> int:
"""
Parameters
x : ndarray
@@ -196,7 +200,7 @@ def kernel_optimal_bandwidth(x: ndarray, kernel: str = 'bartlett') -> int:
Returns
-------
- m : int
+ int
Optimal bandwidth. Set to nobs - 1 if computed bandwidth is larger.
Notes
@@ -215,17 +219,17 @@ def kernel_optimal_bandwidth(x: ndarray, kernel: str = 'bartlett') -> int:
"""
t = x.shape[0]
x = x.squeeze()
- if kernel in ('bartlett', 'newey-west'):
+ if kernel in ("bartlett", "newey-west"):
q, c = 1, 1.1447
m_star = int(ceil(4 * (t / 100) ** (2 / 9)))
- elif kernel in ('qs', 'andrews', 'quadratic-spectral'):
+ elif kernel in ("qs", "andrews", "quadratic-spectral"):
q, c = 2, 1.3221
m_star = int(ceil(4 * (t / 100) ** (4 / 25)))
- elif kernel in ('gallant', 'parzen'):
+ elif kernel in ("gallant", "parzen"):
q, c = 2, 2.6614
m_star = int(ceil(4 * (t / 100) ** (2 / 25)))
else:
- raise ValueError('Unknown kernel: {0}'.format(kernel))
+ raise ValueError("Unknown kernel: {0}".format(kernel))
sigma = empty(m_star + 1)
sigma[0] = x.T @ x / t
for i in range(1, m_star + 1):
@@ -238,13 +242,15 @@ def kernel_optimal_bandwidth(x: ndarray, kernel: str = 'bartlett') -> int:
return min(int(ceil(m)), t - 1)
-KERNEL_LOOKUP = {'bartlett': kernel_weight_bartlett,
- 'newey-west': kernel_weight_bartlett,
- 'quadratic-spectral': kernel_weight_quadratic_spectral,
- 'qs': kernel_weight_quadratic_spectral,
- 'andrews': kernel_weight_quadratic_spectral,
- 'gallant': kernel_weight_parzen,
- 'parzen': kernel_weight_parzen} # type: Dict[str, KernelWeight]
+KERNEL_LOOKUP = {
+ "bartlett": kernel_weight_bartlett,
+ "newey-west": kernel_weight_bartlett,
+ "quadratic-spectral": kernel_weight_quadratic_spectral,
+ "qs": kernel_weight_quadratic_spectral,
+ "andrews": kernel_weight_quadratic_spectral,
+ "gallant": kernel_weight_parzen,
+ "parzen": kernel_weight_parzen,
+} # type: Dict[str, KernelWeight]
class HomoskedasticCovariance(object):
@@ -290,12 +296,19 @@ class HomoskedasticCovariance(object):
:math:`Z` is the matrix of instruments, including exogenous regressors.
"""
- def __init__(self, x: ndarray, y: ndarray, z: ndarray, params: ndarray, debiased: bool = False,
- kappa: Numeric = 1):
+ def __init__(
+ self,
+ x: ndarray,
+ y: ndarray,
+ z: ndarray,
+ params: ndarray,
+ debiased: bool = False,
+ kappa: Numeric = 1,
+ ):
if not (x.shape[0] == y.shape[0] == z.shape[0]):
- raise ValueError('x, y and z must have the same number of rows')
+ raise ValueError("x, y and z must have the same number of rows")
if not x.shape[1] == len(params):
- raise ValueError('x and params must have compatible dimensions')
+ raise ValueError("x and params must have compatible dimensions")
self.x = x
self.y = y
@@ -307,19 +320,22 @@ def __init__(self, x: ndarray, y: ndarray, z: ndarray, params: ndarray, debiased
self._pinvz = pinv(z)
nobs, nvar = x.shape
self._scale = nobs / (nobs - nvar) if self._debiased else 1
- self._name = 'Unadjusted Covariance (Homoskedastic)'
+ self._name = "Unadjusted Covariance (Homoskedastic)"
def __str__(self) -> str:
out = self._name
- out += '\nDebiased: {0}'.format(self._debiased)
+ out += "\nDebiased: {0}".format(self._debiased)
if self._kappa != 1:
- out += '\nKappa: {0:0.3f}'.format(self._kappa)
+ out += "\nKappa: {0:0.3f}".format(self._kappa)
return out
def __repr__(self) -> str:
- return self.__str__() + '\n' + \
- self.__class__.__name__ + \
- ', id: {0}'.format(hex(id(self)))
+ return (
+ self.__str__()
+ + "\n"
+ + self.__class__.__name__
+ + ", id: {0}".format(hex(id(self)))
+ )
@property
def s(self) -> ndarray:
@@ -371,8 +387,7 @@ def debiased(self) -> bool:
@property
def config(self) -> Dict[str, Any]:
- return {'debiased': self.debiased,
- 'kappa': self._kappa}
+ return {"debiased": self.debiased, "kappa": self._kappa}
class HeteroskedasticCovariance(HomoskedasticCovariance):
@@ -420,10 +435,19 @@ class HeteroskedasticCovariance(HomoskedasticCovariance):
:math:`Z` is the matrix of instruments, including exogenous regressors.
"""
- def __init__(self, x: ndarray, y: ndarray, z: ndarray, params: ndarray, debiased: bool = False,
- kappa: Numeric = 1):
- super(HeteroskedasticCovariance, self).__init__(x, y, z, params, debiased, kappa)
- self._name = 'Robust Covariance (Heteroskedastic)'
+ def __init__(
+ self,
+ x: ndarray,
+ y: ndarray,
+ z: ndarray,
+ params: ndarray,
+ debiased: bool = False,
+ kappa: Numeric = 1,
+ ):
+ super(HeteroskedasticCovariance, self).__init__(
+ x, y, z, params, debiased, kappa
+ )
+ self._name = "Robust Covariance (Heteroskedastic)"
@property
def s(self) -> ndarray:
@@ -504,25 +528,33 @@ class KernelCovariance(HomoskedasticCovariance):
linearmodels.iv.covariance.kernel_weight_quadratic_spectral
"""
- def __init__(self, x: ndarray, y: ndarray, z: ndarray, params: ndarray,
- kernel: str = 'bartlett',
- bandwidth: OptionalNumeric = None, debiased: bool = False, kappa: Numeric = 1):
+ def __init__(
+ self,
+ x: ndarray,
+ y: ndarray,
+ z: ndarray,
+ params: ndarray,
+ kernel: str = "bartlett",
+ bandwidth: OptionalNumeric = None,
+ debiased: bool = False,
+ kappa: Numeric = 1,
+ ):
super(KernelCovariance, self).__init__(x, y, z, params, debiased, kappa)
self._kernels = KERNEL_LOOKUP
self._kernel = kernel
self._bandwidth = bandwidth
self._auto_bandwidth = False
- self._name = 'Kernel Covariance (HAC)'
+ self._name = "Kernel Covariance (HAC)"
if kernel not in KERNEL_LOOKUP:
- raise ValueError('Unknown kernel: {0}'.format(kernel))
+ raise ValueError("Unknown kernel: {0}".format(kernel))
def __str__(self) -> str:
out = super(KernelCovariance, self).__str__()
- out += '\nKernel: {0}'.format(self._kernel)
- out += '\nAutomatic Bandwidth: {0}'.format(self._auto_bandwidth)
+ out += "\nKernel: {0}".format(self._kernel)
+ out += "\nAutomatic Bandwidth: {0}".format(self._auto_bandwidth)
if self._bandwidth:
- out += '\nBandwidth: {0}'.format(self._bandwidth)
+ out += "\nBandwidth: {0}".format(self._bandwidth)
return out
@property
@@ -535,11 +567,12 @@ def s(self) -> ndarray:
xhat = z @ (pinvz @ x)
xhat_e = xhat * eps
- kernel = self.config['kernel']
- bw = self.config['bandwidth']
+ kernel = self.config["kernel"]
+ bw = self.config["bandwidth"]
if bw is None:
self._auto_bandwidth = True
from linearmodels.utility import has_constant
+
const, loc = has_constant(xhat)
sel = ones((xhat.shape[1], 1))
if const:
@@ -556,10 +589,12 @@ def s(self) -> ndarray:
@property
def config(self) -> Dict[str, Any]:
- return {'debiased': self.debiased,
- 'bandwidth': self._bandwidth,
- 'kernel': self._kernel,
- 'kappa': self._kappa}
+ return {
+ "debiased": self.debiased,
+ "bandwidth": self._bandwidth,
+ "kernel": self._kernel,
+ "kappa": self._kappa,
+ }
class ClusteredCovariance(HomoskedasticCovariance):
@@ -614,8 +649,16 @@ class ClusteredCovariance(HomoskedasticCovariance):
:math:`Z` is the matrix of instruments, including exogenous regressors.
"""
- def __init__(self, x: ndarray, y: ndarray, z: ndarray, params: ndarray,
- clusters: ndarray = None, debiased: bool = False, kappa: Numeric = 1):
+ def __init__(
+ self,
+ x: ndarray,
+ y: ndarray,
+ z: ndarray,
+ params: ndarray,
+ clusters: ndarray = None,
+ debiased: bool = False,
+ kappa: Numeric = 1,
+ ):
super(ClusteredCovariance, self).__init__(x, y, z, params, debiased, kappa)
nobs = x.shape[0]
@@ -626,15 +669,18 @@ def __init__(self, x: ndarray, y: ndarray, z: ndarray, params: ndarray,
self._num_clusters = [len(unique(clusters))]
self._num_clusters_str = str(self._num_clusters[0])
else:
- self._num_clusters = [len(unique(clusters[:, 0])), len(unique(clusters[:, 1]))]
- self._num_clusters_str = ', '.join(map(str, self._num_clusters))
+ self._num_clusters = [
+ len(unique(clusters[:, 0])),
+ len(unique(clusters[:, 1])),
+ ]
+ self._num_clusters_str = ", ".join(map(str, self._num_clusters))
if clusters is not None and clusters.shape[0] != nobs:
raise ValueError(CLUSTER_ERR.format(nobs, clusters.shape[0]))
- self._name = 'Clustered Covariance (One-Way)'
+ self._name = "Clustered Covariance (One-Way)"
def __str__(self) -> str:
out = super(ClusteredCovariance, self).__str__()
- out += '\nNum Clusters: {0}'.format(self._num_clusters_str)
+ out += "\nNum Clusters: {0}".format(self._num_clusters_str)
return out
@property
@@ -674,6 +720,8 @@ def rescale(s: ndarray, nc: int, nobs: int) -> ndarray:
@property
def config(self) -> Dict[str, Any]:
- return {'debiased': self.debiased,
- 'clusters': self._clusters,
- 'kappa': self._kappa}
+ return {
+ "debiased": self.debiased,
+ "clusters": self._clusters,
+ "kappa": self._kappa,
+ }
diff --git a/linearmodels/iv/data.py b/linearmodels/iv/data.py
index d3d68f4a37..71ca0136a0 100644
--- a/linearmodels/iv/data.py
+++ b/linearmodels/iv/data.py
@@ -22,14 +22,14 @@
except ImportError:
HAS_XARRAY = False
-dim_err = '{0} has too many dims. Maximum is 2, actual is {1}'
-type_err = 'Only ndarrays, DataArrays and Series and DataFrames are supported'
+dim_err = "{0} has too many dims. Maximum is 2, actual is {1}"
+type_err = "Only ndarrays, DataArrays and Series and DataFrames are supported"
def convert_columns(s, drop_first):
if is_categorical(s):
out = pd.get_dummies(s, drop_first=drop_first)
- out.columns = [str(s.name) + '.' + str(c) for c in out]
+ out.columns = [str(s.name) + "." + str(c) for c in out]
return out
return s
@@ -52,7 +52,7 @@ class IVData(object):
Variable name to use when naming variables in NumPy arrays or
xarray DataArrays
nobs : int, optiona
- Number of observation, used when `x` is None. If `x` is array-like,
+ Number of observation, used when `x` is None. If `x` is array_like,
then nobs is used to check the number of observations in `x`.
convert_dummies : bool, optional
Flat indicating whether pandas categoricals or string input data
@@ -61,8 +61,14 @@ class IVData(object):
Flag indicating to drop first dummy category
"""
- def __init__(self, x: OptionalArrayLike, var_name: str = 'x', nobs: int = None,
- convert_dummies: bool = True, drop_first: bool = True):
+ def __init__(
+ self,
+ x: OptionalArrayLike,
+ var_name: str = "x",
+ nobs: int = None,
+ convert_dummies: bool = True,
+ drop_first: bool = True,
+ ):
if isinstance(x, IVData):
self.__dict__.update(copy.deepcopy(x.__dict__))
@@ -70,7 +76,7 @@ def __init__(self, x: OptionalArrayLike, var_name: str = 'x', nobs: int = None,
if x is None and nobs is not None:
x = np.empty((nobs, 0))
elif x is None:
- raise ValueError('nobs required when x is None')
+ raise ValueError("nobs required when x is None")
self.original = x
xndim = x.ndim
@@ -87,7 +93,7 @@ def __init__(self, x: OptionalArrayLike, var_name: str = 'x', nobs: int = None,
if x.shape[1] == 1:
cols = [var_name]
else:
- cols = [var_name + '.{0}'.format(i) for i in range(x.shape[1])]
+ cols = [var_name + ".{0}".format(i) for i in range(x.shape[1])]
self._pandas = pd.DataFrame(x, index=index, columns=cols)
self._row_labels = index
self._col_labels = cols
@@ -99,13 +105,15 @@ def __init__(self, x: OptionalArrayLike, var_name: str = 'x', nobs: int = None,
copied = False
columns = list(x.columns)
if len(set(columns)) != len(columns):
- raise ValueError('DataFrame contains duplicate column names. '
- 'All column names must be distinct')
+ raise ValueError(
+ "DataFrame contains duplicate column names. "
+ "All column names must be distinct"
+ )
all_numeric = True
for col in x:
c = x[col]
if is_string_dtype(c.dtype) and c.map(is_string_like).all():
- c = c.astype('category')
+ c = c.astype("category")
if not copied:
x = x.copy()
copied = True
@@ -113,8 +121,9 @@ def __init__(self, x: OptionalArrayLike, var_name: str = 'x', nobs: int = None,
dt = c.dtype
all_numeric = all_numeric and is_numeric_dtype(dt)
if not (is_numeric_dtype(dt) or is_categorical_dtype(dt)):
- raise ValueError('Only numeric, string or categorical '
- 'data permitted')
+ raise ValueError(
+ "Only numeric, string or categorical " "data permitted"
+ )
if convert_dummies:
x = expand_categoricals(x, drop_first)
@@ -138,7 +147,7 @@ def __init__(self, x: OptionalArrayLike, var_name: str = 'x', nobs: int = None,
index = list(x.coords[x.dims[0]].values)
xr_cols = x.coords[x.dims[1]].values
if is_numeric_dtype(xr_cols.dtype):
- xr_cols = [var_name + '.{0}'.format(i) for i in range(x.shape[1])]
+ xr_cols = [var_name + ".{0}".format(i) for i in range(x.shape[1])]
xr_cols = list(xr_cols)
self._ndarray = x.values.astype(np.float64)
self._pandas = pd.DataFrame(self._ndarray, columns=xr_cols, index=index)
@@ -149,8 +158,9 @@ def __init__(self, x: OptionalArrayLike, var_name: str = 'x', nobs: int = None,
if nobs is not None:
if self._ndarray.shape[0] != nobs:
- msg = 'Array required to have {nobs} obs, has ' \
- '{act}'.format(nobs=nobs, act=self._ndarray.shape[0])
+ msg = "Array required to have {nobs} obs, has " "{act}".format(
+ nobs=nobs, act=self._ndarray.shape[0]
+ )
raise ValueError(msg)
@property
diff --git a/linearmodels/iv/gmm.py b/linearmodels/iv/gmm.py
index 6d859ca8b2..70a5d9f850 100644
--- a/linearmodels/iv/gmm.py
+++ b/linearmodels/iv/gmm.py
@@ -53,7 +53,7 @@ def weight_matrix(self, x, z, eps):
Returns
-------
- weight : ndarray
+ ndarray
Covariance of GMM moment conditions.
"""
nobs, nvar = x.shape
@@ -70,11 +70,10 @@ def config(self):
Returns
-------
- config : dict
+ dict
Dictionary containing weight estimator configuration information
"""
- return {'center': self._center,
- 'debiased': self._debiased}
+ return {"center": self._center, "debiased": self._debiased}
class HeteroskedasticWeightMatrix(HomoskedasticWeightMatrix):
@@ -117,7 +116,7 @@ def weight_matrix(self, x, z, eps):
Returns
-------
- weight : ndarray
+ ndarray
Covariance of GMM moment conditions.
"""
nobs, nvar = x.shape
@@ -173,8 +172,14 @@ class KernelWeightMatrix(HomoskedasticWeightMatrix):
linearmodels.iv.covariance.kernel_weight_quadratic_spectral
"""
- def __init__(self, kernel='bartlett', bandwidth=None, center=False,
- debiased=False, optimal_bw=False):
+ def __init__(
+ self,
+ kernel="bartlett",
+ bandwidth=None,
+ center=False,
+ debiased=False,
+ optimal_bw=False,
+ ):
super(KernelWeightMatrix, self).__init__(center, debiased)
self._bandwidth = bandwidth
self._orig_bandwidth = bandwidth
@@ -195,7 +200,7 @@ def weight_matrix(self, x, z, eps):
Returns
-------
- weight : ndarray
+ ndarray
Covariance of GMM moment conditions.
"""
nobs, nvar = x.shape
@@ -224,13 +229,15 @@ def config(self):
Returns
-------
- config : dict
+ dict
Dictionary containing weight estimator configuration information
"""
- return {'center': self._center,
- 'bandwidth': self._bandwidth,
- 'kernel': self._kernel,
- 'debiased': self._debiased}
+ return {
+ "center": self._center,
+ "bandwidth": self._bandwidth,
+ "kernel": self._kernel,
+ "debiased": self._debiased,
+ }
@property
def bandwidth(self):
@@ -270,7 +277,7 @@ def weight_matrix(self, x, z, eps):
Returns
-------
- weight : ndarray
+ ndarray
Covariance of GMM moment conditions.
"""
nobs, nvar = x.shape
@@ -281,8 +288,10 @@ def weight_matrix(self, x, z, eps):
clusters = self._clusters
if clusters.shape[0] != nobs:
- raise ValueError('clusters has the wrong nobs. Expected {0}, '
- 'got {1}'.format(nobs, clusters.shape[0]))
+ raise ValueError(
+ "clusters has the wrong nobs. Expected {0}, "
+ "got {1}".format(nobs, clusters.shape[0])
+ )
clusters = asarray(clusters).copy().squeeze()
s = _cov_cluster(ze, clusters)
@@ -301,12 +310,14 @@ def config(self):
Returns
-------
- config : dict
+ dict
Dictionary containing weight estimator configuration information
"""
- return {'center': self._center,
- 'clusters': self._clusters,
- 'debiased': self._debiased}
+ return {
+ "center": self._center,
+ "clusters": self._clusters,
+ "debiased": self._debiased,
+ }
class IVGMMCovariance(HomoskedasticCovariance):
@@ -366,46 +377,47 @@ class IVGMMCovariance(HomoskedasticCovariance):
"""
# TODO: 2-way clustering
- def __init__(self, x, y, z, params, w, cov_type='robust', debiased=False,
- **cov_config):
+ def __init__(
+ self, x, y, z, params, w, cov_type="robust", debiased=False, **cov_config
+ ):
super(IVGMMCovariance, self).__init__(x, y, z, params, debiased)
self._cov_type = cov_type
self._cov_config = cov_config
self.w = w
- self._bandwidth = cov_config.get('bandwidth', None)
- self._kernel = cov_config.get('kernel', '')
- self._name = 'GMM Covariance'
- if cov_type in ('robust', 'heteroskedastic'):
+ self._bandwidth = cov_config.get("bandwidth", None)
+ self._kernel = cov_config.get("kernel", "")
+ self._name = "GMM Covariance"
+ if cov_type in ("robust", "heteroskedastic"):
score_cov_estimator = HeteroskedasticWeightMatrix
- elif cov_type in ('unadjusted', 'homoskedastic'):
+ elif cov_type in ("unadjusted", "homoskedastic"):
score_cov_estimator = HomoskedasticWeightMatrix
- elif cov_type == 'clustered':
+ elif cov_type == "clustered":
score_cov_estimator = OneWayClusteredWeightMatrix
- elif cov_type == 'kernel':
+ elif cov_type == "kernel":
score_cov_estimator = KernelWeightMatrix
else:
- raise ValueError('Unknown cov_type')
+ raise ValueError("Unknown cov_type")
self._score_cov_estimator = score_cov_estimator
def __str__(self):
out = super(IVGMMCovariance, self).__str__()
cov_type = self._cov_type
- if cov_type in ('robust', 'heteroskedastic'):
- out += '\nRobust (Heteroskedastic)'
- elif cov_type in ('unadjusted', 'homoskedastic'):
- out += '\nUnadjusted (Homoskedastic)'
- elif cov_type == 'clustered':
- out += '\nClustered (One-way)'
- clusters = self._cov_config.get('clusters', None)
+ if cov_type in ("robust", "heteroskedastic"):
+ out += "\nRobust (Heteroskedastic)"
+ elif cov_type in ("unadjusted", "homoskedastic"):
+ out += "\nUnadjusted (Homoskedastic)"
+ elif cov_type == "clustered":
+ out += "\nClustered (One-way)"
+ clusters = self._cov_config.get("clusters", None)
if clusters is not None:
nclusters = len(unique(asarray(clusters)))
- out += '\nNum Clusters: {0}'.format(nclusters)
+ out += "\nNum Clusters: {0}".format(nclusters)
else: # kernel
- out += '\nKernel (HAC)'
- if self._cov_config.get('kernel', False):
- out += '\nKernel: {0}'.format(self._cov_config['kernel'])
- if self._cov_config.get('bandwidth', False):
- out += '\nBandwidth: {0}'.format(self._cov_config['bandwidth'])
+ out += "\nKernel (HAC)"
+ if self._cov_config.get("kernel", False):
+ out += "\nKernel: {0}".format(self._cov_config["kernel"])
+ if self._cov_config.get("bandwidth", False):
+ out += "\nBandwidth: {0}".format(self._cov_config["bandwidth"])
return out
@property
@@ -416,8 +428,9 @@ def cov(self):
xpzw = xpz @ w
xpzwzpx_inv = inv(xpzw @ xpz.T)
- score_cov = self._score_cov_estimator(debiased=self.debiased,
- **self._cov_config)
+ score_cov = self._score_cov_estimator(
+ debiased=self.debiased, **self._cov_config
+ )
s = score_cov.weight_matrix(x, z, eps)
self._cov_config = score_cov.config
@@ -426,6 +439,6 @@ def cov(self):
@property
def config(self):
- conf = {'debiased': self.debiased}
+ conf = {"debiased": self.debiased}
conf.update(self._cov_config)
return conf
diff --git a/linearmodels/iv/model.py b/linearmodels/iv/model.py
index e407730a41..e1d9220b0d 100644
--- a/linearmodels/iv/model.py
+++ b/linearmodels/iv/model.py
@@ -28,31 +28,41 @@
IVResultType = Type[Union[IVResults, IVGMMResults, OLSResults]]
-__all__ = ['COVARIANCE_ESTIMATORS', 'WEIGHT_MATRICES', 'IVGMM', 'IVLIML', 'IV2SLS',
- 'IVGMMCUE', '_OLS']
-
-COVARIANCE_ESTIMATORS = {'homoskedastic': HomoskedasticCovariance,
- 'unadjusted': HomoskedasticCovariance,
- 'HomoskedasticCovariance': HomoskedasticCovariance,
- 'homo': HomoskedasticCovariance,
- 'robust': HeteroskedasticCovariance,
- 'heteroskedastic': HeteroskedasticCovariance,
- 'HeteroskedasticCovariance': HeteroskedasticCovariance,
- 'hccm': HeteroskedasticCovariance,
- 'kernel': KernelCovariance,
- 'KernelCovariance': KernelCovariance,
- 'one-way': ClusteredCovariance,
- 'clustered': ClusteredCovariance,
- 'OneWayClusteredCovariance': ClusteredCovariance}
-
-WEIGHT_MATRICES = {'unadjusted': HomoskedasticWeightMatrix,
- 'homoskedastic': HomoskedasticWeightMatrix,
- 'robust': HeteroskedasticWeightMatrix,
- 'heteroskedastic': HeteroskedasticWeightMatrix,
- 'kernel': KernelWeightMatrix,
- 'clustered': OneWayClusteredWeightMatrix,
- 'one-way': OneWayClusteredWeightMatrix
- }
+__all__ = [
+ "COVARIANCE_ESTIMATORS",
+ "WEIGHT_MATRICES",
+ "IVGMM",
+ "IVLIML",
+ "IV2SLS",
+ "IVGMMCUE",
+ "_OLS",
+]
+
+COVARIANCE_ESTIMATORS = {
+ "homoskedastic": HomoskedasticCovariance,
+ "unadjusted": HomoskedasticCovariance,
+ "HomoskedasticCovariance": HomoskedasticCovariance,
+ "homo": HomoskedasticCovariance,
+ "robust": HeteroskedasticCovariance,
+ "heteroskedastic": HeteroskedasticCovariance,
+ "HeteroskedasticCovariance": HeteroskedasticCovariance,
+ "hccm": HeteroskedasticCovariance,
+ "kernel": KernelCovariance,
+ "KernelCovariance": KernelCovariance,
+ "one-way": ClusteredCovariance,
+ "clustered": ClusteredCovariance,
+ "OneWayClusteredCovariance": ClusteredCovariance,
+}
+
+WEIGHT_MATRICES = {
+ "unadjusted": HomoskedasticWeightMatrix,
+ "homoskedastic": HomoskedasticWeightMatrix,
+ "robust": HeteroskedasticWeightMatrix,
+ "heteroskedastic": HeteroskedasticWeightMatrix,
+ "kernel": KernelWeightMatrix,
+ "clustered": OneWayClusteredWeightMatrix,
+ "one-way": OneWayClusteredWeightMatrix,
+}
class IVLIML(object):
@@ -61,15 +71,15 @@ class IVLIML(object):
Parameters
----------
- dependent : array-like
+ dependent : array_like
Endogenous variables (nobs by 1)
- exog : array-like
+ exog : array_like
Exogenous regressors (nobs by nexog)
- endog : array-like
+ endog : array_like
Endogenous regressors (nobs by nendog)
- instruments : array-like
+ instruments : array_like
Instrumental variables (nobs by ninstr)
- weights : array-like, optional
+ weights : array_like, optional
Observation weights used in estimation
fuller : float, optional
Fuller's alpha to modify LIML estimator. Default returns unmodified
@@ -112,24 +122,31 @@ class IVLIML(object):
IV2SLS, IVGMM, IVGMMCUE
"""
- def __init__(self, dependent: ArrayLike, exog: OptionalArrayLike,
- endog: OptionalArrayLike, instruments: OptionalArrayLike, *,
- weights: OptionalArrayLike = None, fuller: Numeric = 0,
- kappa: OptionalNumeric = None):
-
- self.dependent = IVData(dependent, var_name='dependent')
+ def __init__(
+ self,
+ dependent: ArrayLike,
+ exog: OptionalArrayLike,
+ endog: OptionalArrayLike,
+ instruments: OptionalArrayLike,
+ *,
+ weights: OptionalArrayLike = None,
+ fuller: Numeric = 0,
+ kappa: OptionalNumeric = None
+ ):
+
+ self.dependent = IVData(dependent, var_name="dependent")
nobs = self.dependent.shape[0] # type: int
- self.exog = IVData(exog, var_name='exog', nobs=nobs)
- self.endog = IVData(endog, var_name='endog', nobs=nobs)
- self.instruments = IVData(instruments, var_name='instruments', nobs=nobs)
+ self.exog = IVData(exog, var_name="exog", nobs=nobs)
+ self.endog = IVData(endog, var_name="endog", nobs=nobs)
+ self.instruments = IVData(instruments, var_name="instruments", nobs=nobs)
self._original_index = self.dependent.pandas.index
if weights is None:
weights = ones(self.dependent.shape)
weights = IVData(weights).ndarray
if any(weights <= 0):
- raise ValueError('weights must be strictly positive.')
+ raise ValueError("weights must be strictly positive.")
weights = weights / nanmean(weights)
- self.weights = IVData(weights, var_name='weights', nobs=nobs)
+ self.weights = IVData(weights, var_name="weights", nobs=nobs)
self._drop_locs = self._drop_missing()
# dependent variable
@@ -144,44 +161,55 @@ def __init__(self, dependent: ArrayLike, exog: OptionalArrayLike,
self._wz = self._z * w
self._has_constant = False
- self._regressor_is_exog = array([True] * self.exog.shape[1] +
- [False] * self.endog.shape[1])
+ self._regressor_is_exog = array(
+ [True] * self.exog.shape[1] + [False] * self.endog.shape[1]
+ )
self._columns = self.exog.cols + self.endog.cols
self._instr_columns = self.exog.cols + self.instruments.cols
self._index = self.dependent.rows
self._validate_inputs()
- if not hasattr(self, '_method'):
- self._method = 'IV-LIML'
+ if not hasattr(self, "_method"):
+ self._method = "IV-LIML"
additional = []
if fuller != 0:
- additional.append('fuller(alpha={0})'.format(fuller))
+ additional.append("fuller(alpha={0})".format(fuller))
if kappa is not None:
- additional.append('kappa={0}'.format(kappa))
+ additional.append("kappa={0}".format(kappa))
if additional:
- self._method += '(' + ', '.join(additional) + ')'
- if not hasattr(self, '_result_container'):
+ self._method += "(" + ", ".join(additional) + ")"
+ if not hasattr(self, "_result_container"):
self._result_container = IVResults # type: IVResultType
self._kappa = kappa
self._fuller = fuller
if kappa is not None and not isscalar(kappa):
- raise ValueError('kappa must be None or a scalar')
+ raise ValueError("kappa must be None or a scalar")
if not isscalar(fuller):
- raise ValueError('fuller must be None or a scalar')
+ raise ValueError("fuller must be None or a scalar")
if kappa is not None and fuller != 0:
import warnings
- warnings.warn('kappa and fuller should not normally be used '
- 'simultaneously. Identical results can be computed '
- 'using kappa only', UserWarning)
+
+ warnings.warn(
+ "kappa and fuller should not normally be used "
+ "simultaneously. Identical results can be computed "
+ "using kappa only",
+ UserWarning,
+ )
if endog is None and instruments is None:
self._result_container = OLSResults
- self._method = 'OLS'
- self._formula = ''
+ self._method = "OLS"
+ self._formula = ""
@staticmethod
- def from_formula(formula: str, data: DataFrame, *, weights: OptionalArrayLike = None,
- fuller: float = 0, kappa: OptionalNumeric = None):
+ def from_formula(
+ formula: str,
+ data: DataFrame,
+ *,
+ weights: OptionalArrayLike = None,
+ fuller: float = 0,
+ kappa: OptionalNumeric = None
+ ):
"""
Parameters
----------
@@ -190,7 +218,7 @@ def from_formula(formula: str, data: DataFrame, *, weights: OptionalArrayLike =
section
data : DataFrame
DataFrame containing the variables used in the formula
- weights : array-like, optional
+ weights : array_like, optional
Observation weights used in estimation
fuller : float, optional
Fuller's alpha to modify LIML estimator. Default returns unmodified
@@ -201,7 +229,7 @@ def from_formula(formula: str, data: DataFrame, *, weights: OptionalArrayLike =
Returns
-------
- model : IVLIML
+ IVLIML
Model instance
Notes
@@ -227,24 +255,31 @@ def from_formula(formula: str, data: DataFrame, *, weights: OptionalArrayLike =
"""
parser = IVFormulaParser(formula, data)
dep, exog, endog, instr = parser.data
- mod = IVLIML(dep, exog, endog, instr, weights=weights,
- fuller=fuller, kappa=kappa)
+ mod = IVLIML(
+ dep, exog, endog, instr, weights=weights, fuller=fuller, kappa=kappa
+ )
mod.formula = formula
return mod
- def predict(self, params: ArrayLike, *, exog: OptionalArrayLike = None,
- endog: OptionalArrayLike = None, data: DataFrame = None,
- eval_env: int = 4) -> DataFrame:
+ def predict(
+ self,
+ params: ArrayLike,
+ *,
+ exog: OptionalArrayLike = None,
+ endog: OptionalArrayLike = None,
+ data: DataFrame = None,
+ eval_env: int = 4
+ ) -> DataFrame:
"""
Predict values for additional data
Parameters
----------
- params : array-like
+ params : array_like
Model parameters (nvar by 1)
- exog : array-like
+ exog : array_like
Exogenous regressors (nobs by nexog)
- endog : array-like
+ endog : array_like
Endogenous regressors (nobs by nendog)
data : DataFrame
Values to use when making predictions from a model constructed
@@ -254,7 +289,7 @@ def predict(self, params: ArrayLike, *, exog: OptionalArrayLike = None,
Returns
-------
- predictions : DataFrame
+ DataFrame
Fitted values from supplied data and parameters
Notes
@@ -273,11 +308,14 @@ def predict(self, params: ArrayLike, *, exog: OptionalArrayLike = None,
values corresponding to the original model specification.
"""
if data is not None and self.formula is None:
- raise ValueError('Unable to use data when the model was not '
- 'created using a formula.')
+ raise ValueError(
+ "Unable to use data when the model was not " "created using a formula."
+ )
if data is not None and (exog is not None or endog is not None):
- raise ValueError('Predictions can only be constructed using one '
- 'of exog/endog or data, but not both.')
+ raise ValueError(
+ "Predictions can only be constructed using one "
+ "of exog/endog or data, but not both."
+ )
if exog is not None or endog is not None:
exog = IVData(exog).pandas
endog = IVData(endog).pandas
@@ -290,7 +328,7 @@ def predict(self, params: ArrayLike, *, exog: OptionalArrayLike = None,
params = atleast_2d(asarray(params))
if params.shape[0] == 1:
params = params.T
- pred = DataFrame(x @ params, index=exog_endog.index, columns=['predictions'])
+ pred = DataFrame(x @ params, index=exog_endog.index, columns=["predictions"])
return pred
@@ -307,18 +345,19 @@ def formula(self, value: str):
def _validate_inputs(self):
x, z = self._x, self._z
if x.shape[1] == 0:
- raise ValueError('Model must contain at least one regressor.')
+ raise ValueError("Model must contain at least one regressor.")
if self.instruments.shape[1] < self.endog.shape[1]:
- raise ValueError('The number of instruments ({0}) must be at least '
- 'as large as the number of endogenous regressors'
- ' ({1}).'.format(self.instruments.shape[1],
- self.endog.shape[1]))
+ raise ValueError(
+ "The number of instruments ({0}) must be at least "
+ "as large as the number of endogenous regressors"
+ " ({1}).".format(self.instruments.shape[1], self.endog.shape[1])
+ )
if matrix_rank(x) < x.shape[1]:
- raise ValueError('regressors [exog endog] do not have full '
- 'column rank')
+ raise ValueError("regressors [exog endog] do not have full " "column rank")
if matrix_rank(z) < z.shape[1]:
- raise ValueError('instruments [exog instruments] do not have '
- 'full column rank')
+ raise ValueError(
+ "instruments [exog instruments] do not have " "full column rank"
+ )
self._has_constant, self._const_loc = has_constant(x)
def _drop_missing(self) -> ndarray:
@@ -326,8 +365,10 @@ def _drop_missing(self) -> ndarray:
missing = any(c_[[dh.isnull for dh in data]], 0) # type: ndarray
if any(missing):
if npall(missing):
- raise ValueError('All observations contain missing data. '
- 'Model cannot be estimated.')
+ raise ValueError(
+ "All observations contain missing data. "
+ "Model cannot be estimated."
+ )
self.dependent.drop(missing)
self.exog.drop(missing)
self.endog.drop(missing)
@@ -338,7 +379,9 @@ def _drop_missing(self) -> ndarray:
return missing
@staticmethod
- def estimate_parameters(x: ndarray, y: ndarray, z: ndarray, kappa: Numeric) -> ndarray:
+ def estimate_parameters(
+ x: ndarray, y: ndarray, z: ndarray, kappa: Numeric
+ ) -> ndarray:
"""
Parameter estimation without error checking
@@ -355,7 +398,7 @@ def estimate_parameters(x: ndarray, y: ndarray, z: ndarray, kappa: Numeric) -> n
Returns
-------
- params : ndarray
+ ndarray
Estimated parameters (nvar by 1)
Notes
@@ -384,8 +427,9 @@ def _estimate_kappa(self) -> float:
q = vpmzv_sqinv @ (ex1.T @ ex1) @ vpmzv_sqinv
return min(eigvalsh(q))
- def fit(self, *, cov_type: str = 'robust', debiased: bool = False,
- **cov_config: Any):
+ def fit(
+ self, *, cov_type: str = "robust", debiased: bool = False, **cov_config: Any
+ ):
"""
Estimate model parameters
@@ -413,7 +457,7 @@ def fit(self, *, cov_type: str = 'robust', debiased: bool = False,
Returns
-------
- results : IVResults
+ IVResults
Results container
Notes
@@ -445,15 +489,14 @@ def fit(self, *, cov_type: str = 'robust', debiased: bool = False,
params = self.estimate_parameters(wx, wy, wz, est_kappa)
cov_estimator = COVARIANCE_ESTIMATORS[cov_type]
- cov_config['debiased'] = debiased
- cov_config['kappa'] = est_kappa
+ cov_config["debiased"] = debiased
+ cov_config["kappa"] = est_kappa
cov_config_copy = {k: v for k, v in cov_config.items()}
- if 'center' in cov_config_copy:
- del cov_config_copy['center']
+ if "center" in cov_config_copy:
+ del cov_config_copy["center"]
cov_estimator = cov_estimator(wx, wy, wz, params, **cov_config_copy)
- results = {'kappa': est_kappa,
- 'liml_kappa': liml_kappa}
+ results = {"kappa": est_kappa, "liml_kappa": liml_kappa}
pe = self._post_estimation(params, cov_estimator, cov_type)
results.update(pe)
@@ -470,7 +513,7 @@ def wresids(self, params: ndarray):
Returns
-------
- wresids : ndarray
+ ndarray
Weighted model residuals
Notes
@@ -491,7 +534,7 @@ def resids(self, params: ndarray):
Returns
-------
- resids : ndarray
+ ndarray
Model residuals
"""
return self._y - self._x @ params
@@ -521,12 +564,12 @@ def _post_estimation(self, params: ndarray, cov_estimator, cov_type: str):
index = self._index
eps = self.resids(params)
y = self.dependent.pandas
- fitted = DataFrame(asarray(y) - eps, y.index, ['fitted_values'])
+ fitted = DataFrame(asarray(y) - eps, y.index, ["fitted_values"])
weps = self.wresids(params)
cov = cov_estimator.cov
debiased = cov_estimator.debiased
- residual_ss = (weps.T @ weps)
+ residual_ss = weps.T @ weps
w = self.weights.ndarray
e = self._wy
@@ -537,24 +580,26 @@ def _post_estimation(self, params: ndarray, cov_estimator, cov_type: str):
r2 = 1 - residual_ss / total_ss
fstat = self._f_statistic(params, cov, debiased)
- out = {'params': Series(params.squeeze(), columns, name='parameter'),
- 'eps': Series(eps.squeeze(), index=index, name='residual'),
- 'weps': Series(weps.squeeze(), index=index, name='weighted residual'),
- 'cov': DataFrame(cov, columns=columns, index=columns),
- 's2': float(cov_estimator.s2),
- 'debiased': debiased,
- 'residual_ss': float(residual_ss),
- 'total_ss': float(total_ss),
- 'r2': float(r2),
- 'fstat': fstat,
- 'vars': columns,
- 'instruments': self._instr_columns,
- 'cov_config': cov_estimator.config,
- 'cov_type': cov_type,
- 'method': self._method,
- 'cov_estimator': cov_estimator,
- 'fitted': fitted,
- 'original_index': self._original_index}
+ out = {
+ "params": Series(params.squeeze(), columns, name="parameter"),
+ "eps": Series(eps.squeeze(), index=index, name="residual"),
+ "weps": Series(weps.squeeze(), index=index, name="weighted residual"),
+ "cov": DataFrame(cov, columns=columns, index=columns),
+ "s2": float(cov_estimator.s2),
+ "debiased": debiased,
+ "residual_ss": float(residual_ss),
+ "total_ss": float(total_ss),
+ "r2": float(r2),
+ "fstat": fstat,
+ "vars": columns,
+ "instruments": self._instr_columns,
+ "cov_config": cov_estimator.config,
+ "cov_type": cov_type,
+ "method": self._method,
+ "cov_estimator": cov_estimator,
+ "fitted": fitted,
+ "original_index": self._original_index,
+ }
return out
@@ -565,15 +610,15 @@ class IV2SLS(IVLIML):
Parameters
----------
- dependent : array-like
+ dependent : array_like
Endogenous variables (nobs by 1)
- exog : array-like
+ exog : array_like
Exogenous regressors (nobs by nexog)
- endog : array-like
+ endog : array_like
Endogenous regressors (nobs by nendog)
- instruments : array-like
+ instruments : array_like
Instrumental variables (nobs by ninstr)
- weights : array-like, optional
+ weights : array_like, optional
Observation weights used in estimation
Notes
@@ -598,15 +643,24 @@ class IV2SLS(IVLIML):
IVLIML, IVGMM, IVGMMCUE
"""
- def __init__(self, dependent: ArrayLike, exog: OptionalArrayLike,
- endog: OptionalArrayLike, instruments: OptionalArrayLike, *,
- weights: OptionalArrayLike = None):
- self._method = 'IV-2SLS'
- super(IV2SLS, self).__init__(dependent, exog, endog, instruments,
- weights=weights, fuller=0, kappa=1)
+ def __init__(
+ self,
+ dependent: ArrayLike,
+ exog: OptionalArrayLike,
+ endog: OptionalArrayLike,
+ instruments: OptionalArrayLike,
+ *,
+ weights: OptionalArrayLike = None
+ ):
+ self._method = "IV-2SLS"
+ super(IV2SLS, self).__init__(
+ dependent, exog, endog, instruments, weights=weights, fuller=0, kappa=1
+ )
@staticmethod
- def from_formula(formula: str, data: DataFrame, *, weights: OptionalArrayLike = None):
+ def from_formula(
+ formula: str, data: DataFrame, *, weights: OptionalArrayLike = None
+ ):
"""
Parameters
----------
@@ -615,12 +669,12 @@ def from_formula(formula: str, data: DataFrame, *, weights: OptionalArrayLike =
section
data : DataFrame
DataFrame containing the variables used in the formula
- weights : array-like, optional
+ weights : array_like, optional
Observation weights used in estimation
Returns
-------
- model : IV2SLS
+ IV2SLS
Model instance
Notes
@@ -657,15 +711,15 @@ class IVGMM(IVLIML):
Parameters
----------
- dependent : array-like
+ dependent : array_like
Endogenous variables (nobs by 1)
- exog : array-like
+ exog : array_like
Exogenous regressors (nobs by nexog)
- endog : array-like
+ endog : array_like
Endogenous regressors (nobs by nendog)
- instruments : array-like
+ instruments : array_like
Instrumental variables (nobs by ninstr)
- weights : array-like, optional
+ weights : array_like, optional
Observation weights used in estimation
weight_type : str
Name of moment condition weight function to use in the GMM estimation
@@ -702,21 +756,36 @@ class IVGMM(IVLIML):
IV2SLS, IVLIML, IVGMMCUE
"""
- def __init__(self, dependent: ArrayLike, exog: OptionalArrayLike,
- endog: OptionalArrayLike, instruments: OptionalArrayLike, *,
- weights: OptionalArrayLike = None,
- weight_type: str = 'robust', **weight_config):
- self._method = 'IV-GMM'
+ def __init__(
+ self,
+ dependent: ArrayLike,
+ exog: OptionalArrayLike,
+ endog: OptionalArrayLike,
+ instruments: OptionalArrayLike,
+ *,
+ weights: OptionalArrayLike = None,
+ weight_type: str = "robust",
+ **weight_config
+ ):
+ self._method = "IV-GMM"
self._result_container = IVGMMResults
- super(IVGMM, self).__init__(dependent, exog, endog, instruments, weights=weights)
+ super(IVGMM, self).__init__(
+ dependent, exog, endog, instruments, weights=weights
+ )
weight_matrix_estimator = WEIGHT_MATRICES[weight_type]
self._weight = weight_matrix_estimator(**weight_config)
self._weight_type = weight_type
self._weight_config = self._weight.config
@staticmethod
- def from_formula(formula: str, data: DataFrame, *, weights: OptionalArrayLike = None,
- weight_type: str = 'robust', **weight_config: Any):
+ def from_formula(
+ formula: str,
+ data: DataFrame,
+ *,
+ weights: OptionalArrayLike = None,
+ weight_type: str = "robust",
+ **weight_config: Any
+ ):
"""
Parameters
----------
@@ -725,7 +794,7 @@ def from_formula(formula: str, data: DataFrame, *, weights: OptionalArrayLike =
section
data : DataFrame
DataFrame containing the variables used in the formula
- weights : array-like, optional
+ weights : array_like, optional
Observation weights used in estimation
weight_type : str
Name of moment condition weight function to use in the GMM estimation
@@ -747,7 +816,7 @@ def from_formula(formula: str, data: DataFrame, *, weights: OptionalArrayLike =
Returns
-------
- model : IVGMM
+ IVGMM
Model instance
Examples
@@ -761,8 +830,15 @@ def from_formula(formula: str, data: DataFrame, *, weights: OptionalArrayLike =
"""
parser = IVFormulaParser(formula, data)
dep, exog, endog, instr = parser.data
- mod = IVGMM(dep, exog, endog, instr, weights=weights, weight_type=weight_type,
- **weight_config)
+ mod = IVGMM(
+ dep,
+ exog,
+ endog,
+ instr,
+ weights=weights,
+ weight_type=weight_type,
+ **weight_config
+ )
mod.formula = formula
return mod
@@ -782,7 +858,7 @@ def estimate_parameters(x: ndarray, y: ndarray, z: ndarray, w: ndarray):
Returns
-------
- params : ndarray
+ ndarray
Estimated parameters (nvar by 1)
Notes
@@ -794,8 +870,16 @@ def estimate_parameters(x: ndarray, y: ndarray, z: ndarray, w: ndarray):
zpy = z.T @ y
return inv(xpz @ w @ xpz.T) @ (xpz @ w @ zpy)
- def fit(self, *, iter_limit: int = 2, tol: float = 1e-4, initial_weight: ndarray = None,
- cov_type: str = 'robust', debiased: bool = False, **cov_config: Any):
+ def fit(
+ self,
+ *,
+ iter_limit: int = 2,
+ tol: float = 1e-4,
+ initial_weight: ndarray = None,
+ cov_type: str = "robust",
+ debiased: bool = False,
+ **cov_config: Any
+ ):
"""
Estimate model parameters
@@ -837,7 +921,7 @@ def fit(self, *, iter_limit: int = 2, tol: float = 1e-4, initial_weight: ndarray
Returns
-------
- results : IVGMMResults
+ IVGMMResults
Results container
See also
@@ -848,8 +932,13 @@ def fit(self, *, iter_limit: int = 2, tol: float = 1e-4, initial_weight: ndarray
nobs = wy.shape[0]
weight_matrix = self._weight.weight_matrix
wmat = inv(wz.T @ wz / nobs) if initial_weight is None else initial_weight
- sv = IV2SLS(self.dependent, self.exog, self.endog, self.instruments,
- weights=self.weights)
+ sv = IV2SLS(
+ self.dependent,
+ self.exog,
+ self.endog,
+ self.instruments,
+ weights=self.weights,
+ )
_params = params = asarray(sv.fit().params)[:, None]
# _params = params = self.estimate_parameters(wx, wy, wz, wmat)
@@ -867,9 +956,10 @@ def fit(self, *, iter_limit: int = 2, tol: float = 1e-4, initial_weight: ndarray
norm = delta.T @ vinv @ delta
iters += 1
- cov_config['debiased'] = debiased
- cov_estimator = IVGMMCovariance(wx, wy, wz, params, wmat,
- cov_type, **cov_config)
+ cov_config["debiased"] = debiased
+ cov_estimator = IVGMMCovariance(
+ wx, wy, wz, params, wmat, cov_type, **cov_config
+ )
results = self._post_estimation(params, cov_estimator, cov_type)
gmm_pe = self._gmm_post_estimation(params, wmat, iters)
@@ -881,11 +971,13 @@ def fit(self, *, iter_limit: int = 2, tol: float = 1e-4, initial_weight: ndarray
def _gmm_post_estimation(self, params: ndarray, weight_mat: ndarray, iters: int):
"""GMM-specific post-estimation results"""
instr = self._instr_columns
- gmm_specific = {'weight_mat': DataFrame(weight_mat, columns=instr, index=instr),
- 'weight_type': self._weight_type,
- 'weight_config': self._weight_type,
- 'iterations': iters,
- 'j_stat': self._j_statistic(params, weight_mat)}
+ gmm_specific = {
+ "weight_mat": DataFrame(weight_mat, columns=instr, index=instr),
+ "weight_type": self._weight_type,
+ "weight_config": self._weight_type,
+ "iterations": iters,
+ "j_stat": self._j_statistic(params, weight_mat),
+ }
return gmm_specific
@@ -896,7 +988,7 @@ def _j_statistic(self, params: ndarray, weight_mat: ndarray):
eps = y - x @ params
g_bar = (z * eps).mean(0)
stat = float(nobs * g_bar.T @ weight_mat @ g_bar.T)
- null = 'Expected moment conditions are equal to 0'
+ null = "Expected moment conditions are equal to 0"
return WaldTestStatistic(stat, null, ninstr - nvar)
@@ -906,15 +998,15 @@ class IVGMMCUE(IVGMM):
Parameters
----------
- dependent : array-like
+ dependent : array_like
Endogenous variables (nobs by 1)
- exog : array-like
+ exog : array_like
Exogenous regressors (nobs by nexog)
- endog : array-like
+ endog : array_like
Endogenous regressors (nobs by nendog)
- instruments : array-like
+ instruments : array_like
Instrumental variables (nobs by ninstr)
- weights : array-like, optional
+ weights : array_like, optional
Observation weights used in estimation
weight_type : str
Name of moment condition weight function to use in the GMM estimation
@@ -949,19 +1041,39 @@ class IVGMMCUE(IVGMM):
IV2SLS, IVLIML, IVGMM
"""
- def __init__(self, dependent: ArrayLike, exog: OptionalArrayLike,
- endog: OptionalArrayLike, instruments: OptionalArrayLike, *,
- weights: OptionalArrayLike = None,
- weight_type: str = 'robust', **weight_config):
- self._method = 'IV-GMM-CUE'
- super(IVGMMCUE, self).__init__(dependent, exog, endog, instruments, weights=weights,
- weight_type=weight_type, **weight_config)
- if 'center' not in weight_config:
- weight_config['center'] = True
+ def __init__(
+ self,
+ dependent: ArrayLike,
+ exog: OptionalArrayLike,
+ endog: OptionalArrayLike,
+ instruments: OptionalArrayLike,
+ *,
+ weights: OptionalArrayLike = None,
+ weight_type: str = "robust",
+ **weight_config
+ ):
+ self._method = "IV-GMM-CUE"
+ super(IVGMMCUE, self).__init__(
+ dependent,
+ exog,
+ endog,
+ instruments,
+ weights=weights,
+ weight_type=weight_type,
+ **weight_config
+ )
+ if "center" not in weight_config:
+ weight_config["center"] = True
@staticmethod
- def from_formula(formula: str, data: DataFrame, *, weights: OptionalArrayLike = None,
- weight_type: str = 'robust', **weight_config: Any):
+ def from_formula(
+ formula: str,
+ data: DataFrame,
+ *,
+ weights: OptionalArrayLike = None,
+ weight_type: str = "robust",
+ **weight_config: Any
+ ):
"""
Parameters
----------
@@ -970,7 +1082,7 @@ def from_formula(formula: str, data: DataFrame, *, weights: OptionalArrayLike =
section
data : DataFrame
DataFrame containing the variables used in the formula
- weights : array-like, optional
+ weights : array_like, optional
Observation weights used in estimation
weight_type : str
Name of moment condition weight function to use in the GMM estimation
@@ -980,7 +1092,7 @@ def from_formula(formula: str, data: DataFrame, *, weights: OptionalArrayLike =
Returns
-------
- model : IVGMMCUE
+ IVGMMCUE
Model instance
Notes
@@ -1005,8 +1117,15 @@ def from_formula(formula: str, data: DataFrame, *, weights: OptionalArrayLike =
"""
parser = IVFormulaParser(formula, data)
dep, exog, endog, instr = parser.data
- mod = IVGMMCUE(dep, exog, endog, instr, weights=weights, weight_type=weight_type,
- **weight_config)
+ mod = IVGMMCUE(
+ dep,
+ exog,
+ endog,
+ instr,
+ weights=weights,
+ weight_type=weight_type,
+ **weight_config
+ )
mod.formula = formula
return mod
@@ -1027,7 +1146,7 @@ def j(self, params: ndarray, x: ndarray, y: ndarray, z: ndarray):
Returns
-------
- j : float
+ float
GMM objective function, also known as the J statistic
Notes
@@ -1055,8 +1174,15 @@ def j(self, params: ndarray, x: ndarray, y: ndarray, z: ndarray):
g_bar = (z * eps).mean(0)
return nobs * g_bar.T @ w @ g_bar.T
- def estimate_parameters(self, starting: ndarray, x: ndarray, y: ndarray, z: ndarray,
- display: bool = False, opt_options: Dict[str, Any] = None):
+ def estimate_parameters(
+ self,
+ starting: ndarray,
+ x: ndarray,
+ y: ndarray,
+ z: ndarray,
+ display: bool = False,
+ opt_options: Dict[str, Any] = None,
+ ):
r"""
Parameters
----------
@@ -1076,7 +1202,7 @@ def estimate_parameters(self, starting: ndarray, x: ndarray, y: ndarray, z: ndar
Returns
-------
- params : ndarray
+ ndarray
Estimated parameters (nvar by 1)
Notes
@@ -1090,18 +1216,25 @@ def estimate_parameters(self, starting: ndarray, x: ndarray, y: ndarray, z: ndar
"""
args = (x, y, z)
opt_options = {} if opt_options is None else opt_options
- options = {'disp': display}
- if 'options' in opt_options:
+ options = {"disp": display}
+ if "options" in opt_options:
opt_options = opt_options.copy()
- options.update(opt_options.pop('options'))
+ options.update(opt_options.pop("options"))
res = minimize(self.j, starting, args=args, options=options, **opt_options)
return res.x[:, None], res.nit
- def fit(self, *, starting: ndarray = None, display: bool = False, cov_type: str = 'robust',
- debiased: bool = False, opt_options: Dict[str, Any] = None,
- **cov_config: Any):
+ def fit(
+ self,
+ *,
+ starting: ndarray = None,
+ display: bool = False,
+ cov_type: str = "robust",
+ debiased: bool = False,
+ opt_options: Dict[str, Any] = None,
+ **cov_config: Any
+ ):
r"""
Estimate model parameters
@@ -1130,7 +1263,7 @@ def fit(self, *, starting: ndarray = None, display: bool = False, cov_type: str
Returns
-------
- results : IVGMMResults
+ IVGMMResults
Results container
Notes
@@ -1146,26 +1279,34 @@ def fit(self, *, starting: ndarray = None, display: bool = False, cov_type: str
if starting is None:
exog = None if self.exog.shape[1] == 0 else self.exog
endog = None if self.endog.shape[1] == 0 else self.endog
- instr = None if self.instruments.shape[1] == 0 else \
- self.instruments
-
- res = IVGMM(self.dependent, exog, endog, instr,
- weights=self.weights, weight_type=self._weight_type,
- **self._weight_config).fit()
+ instr = None if self.instruments.shape[1] == 0 else self.instruments
+
+ res = IVGMM(
+ self.dependent,
+ exog,
+ endog,
+ instr,
+ weights=self.weights,
+ weight_type=self._weight_type,
+ **self._weight_config
+ ).fit()
starting = asarray(res.params)
else:
starting = asarray(starting)
if len(starting) != self.exog.shape[1] + self.endog.shape[1]:
- raise ValueError('starting does not have the correct number '
- 'of values')
- params, iters = self.estimate_parameters(starting, wx, wy, wz, display,
- opt_options=opt_options)
+ raise ValueError(
+ "starting does not have the correct number " "of values"
+ )
+ params, iters = self.estimate_parameters(
+ starting, wx, wy, wz, display, opt_options=opt_options
+ )
eps = wy - wx @ params
wmat = inv(weight_matrix(wx, wz, eps))
- cov_config['debiased'] = debiased
- cov_estimator = IVGMMCovariance(wx, wy, wz, params, wmat, cov_type,
- **cov_config)
+ cov_config["debiased"] = debiased
+ cov_estimator = IVGMMCovariance(
+ wx, wy, wz, params, wmat, cov_type, **cov_config
+ )
results = self._post_estimation(params, cov_estimator, cov_type)
gmm_pe = self._gmm_post_estimation(params, wmat, iters)
results.update(gmm_pe)
@@ -1179,11 +1320,11 @@ class _OLS(IVLIML):
Parameters
----------
- dependent : array-like
+ dependent : array_like
Endogenous variables (nobs by 1)
- exog : array-like
+ exog : array_like
Exogenous regressors (nobs by nexog)
- weights : array-like, optional
+ weights : array_like, optional
Observation weights used in estimation
Notes
@@ -1197,7 +1338,14 @@ class _OLS(IVLIML):
statsmodels.regression.linear_model.GLS
"""
- def __init__(self, dependent: ArrayLike, exog: OptionalArrayLike, *,
- weights: OptionalArrayLike = None):
- super(_OLS, self).__init__(dependent, exog, None, None, weights=weights, kappa=0.0)
+ def __init__(
+ self,
+ dependent: ArrayLike,
+ exog: OptionalArrayLike,
+ *,
+ weights: OptionalArrayLike = None
+ ):
+ super(_OLS, self).__init__(
+ dependent, exog, None, None, weights=weights, kappa=0.0
+ )
self._result_container = OLSResults
diff --git a/linearmodels/iv/results.py b/linearmodels/iv/results.py
index cb4369212a..6c19ce367b 100644
--- a/linearmodels/iv/results.py
+++ b/linearmodels/iv/results.py
@@ -7,11 +7,11 @@
import datetime as dt
from typing import Any, Dict, List, Union
-from property_cached import cached_property
from numpy import (array, c_, diag, empty, isnan, log, ndarray, ones, sqrt,
zeros)
from numpy.linalg import inv, pinv
from pandas import DataFrame, Series, concat, to_numeric
+from property_cached import cached_property
import scipy.stats as stats
from statsmodels.iolib.summary import SimpleTable, fmt_2cols, fmt_params
from statsmodels.iolib.table import default_txt_fmt
@@ -22,7 +22,7 @@
pval_format, quadratic_form_test)
-def stub_concat(lists, sep='='):
+def stub_concat(lists, sep="="):
col_size = max([max(map(len, l)) for l in lists])
out = []
for l in lists:
@@ -31,7 +31,7 @@ def stub_concat(lists, sep='='):
return out[:-1]
-def table_concat(lists, sep='='):
+def table_concat(lists, sep="="):
col_sizes = []
for l in lists:
size = list(map(lambda r: list(map(len, r)), l))
@@ -58,27 +58,27 @@ class OLSResults(_SummaryStr):
"""
def __init__(self, results: Dict[str, Any], model):
- self._resid = results['eps']
- self._wresid = results['weps']
- self._params = results['params']
- self._cov = results['cov']
+ self._resid = results["eps"]
+ self._wresid = results["weps"]
+ self._params = results["params"]
+ self._cov = results["cov"]
self.model = model
- self._r2 = results['r2']
- self._cov_type = results['cov_type']
- self._rss = results['residual_ss']
- self._tss = results['total_ss']
- self._s2 = results['s2']
- self._debiased = results['debiased']
- self._f_statistic = results['fstat']
- self._vars = results['vars']
- self._cov_config = results['cov_config']
- self._method = results['method']
- self._kappa = results.get('kappa', None)
+ self._r2 = results["r2"]
+ self._cov_type = results["cov_type"]
+ self._rss = results["residual_ss"]
+ self._tss = results["total_ss"]
+ self._s2 = results["s2"]
+ self._debiased = results["debiased"]
+ self._f_statistic = results["fstat"]
+ self._vars = results["vars"]
+ self._cov_config = results["cov_config"]
+ self._method = results["method"]
+ self._kappa = results.get("kappa", None)
self._datetime = dt.datetime.now()
- self._cov_estimator = results['cov_estimator']
- self._original_index = results['original_index']
- self._fitted = results['fitted']
- self._df_model = results.get('df_model', self._params.shape[0])
+ self._cov_estimator = results["cov_estimator"]
+ self._original_index = results["original_index"]
+ self._fitted = results["fitted"]
+ self._df_model = results.get("df_model", self._params.shape[0])
@property
def cov_config(self) -> Dict[str, Any]:
@@ -128,23 +128,33 @@ def idiosyncratic(self) -> Series:
def _out_of_sample(self, exog, endog, data, fitted, missing):
"""Interface between model predict and predict for OOS fits"""
if not (exog is None and endog is None) and data is not None:
- raise ValueError('Predictions can only be constructed using one '
- 'of exog/endog or data, but not both.')
+ raise ValueError(
+ "Predictions can only be constructed using one "
+ "of exog/endog or data, but not both."
+ )
pred = self.model.predict(self.params, exog=exog, endog=endog, data=data)
if not missing:
pred = pred.loc[pred.notnull().all(1)]
return pred
- def predict(self, exog=None, endog=None, *, data=None, fitted=True,
- idiosyncratic=False, missing=False):
+ def predict(
+ self,
+ exog=None,
+ endog=None,
+ *,
+ data=None,
+ fitted=True,
+ idiosyncratic=False,
+ missing=False
+ ):
"""
In- and out-of-sample predictions
Parameters
----------
- exog : array-like
+ exog : array_like
Exogenous values to use in out-of-sample prediction (nobs by nexog)
- endog : array-like
+ endog : array_like
Endogenous values to use in out-of-sample prediction (nobs by nendog)
data : DataFrame, optional
DataFrame to use for out-of-sample predictions when model was
@@ -161,7 +171,7 @@ def predict(self, exog=None, endog=None, *, data=None, fitted=True,
Returns
-------
- predictions : DataFrame
+ DataFrame
DataFrame containing columns for all selected outputs
Notes
@@ -184,7 +194,7 @@ def predict(self, exog=None, endog=None, *, data=None, fitted=True,
if idiosyncratic:
out.append(self.idiosyncratic)
if len(out) == 0:
- raise ValueError('At least one output must be selected')
+ raise ValueError("At least one output must be selected")
out = concat(out, 1) # type: DataFrame
if missing:
index = self._original_index
@@ -241,12 +251,12 @@ def cov_type(self) -> str:
def std_errors(self) -> Series:
"""Estimated parameter standard errors"""
std_errors = sqrt(diag(self.cov))
- return Series(std_errors, index=self._vars, name='stderr')
+ return Series(std_errors, index=self._vars, name="stderr")
@cached_property
def tstats(self) -> Series:
"""Parameter t-statistics"""
- return Series(self._params / self.std_errors, name='tstat')
+ return Series(self._params / self.std_errors, name="tstat")
@cached_property
def pvalues(self) -> Series:
@@ -258,7 +268,7 @@ def pvalues(self) -> Series:
else:
pvals = 2 - 2 * stats.norm.cdf(abs(self.tstats))
- return Series(pvals, index=self._vars, name='pvalue')
+ return Series(pvals, index=self._vars, name="pvalue")
@property
def total_ss(self) -> float:
@@ -292,7 +302,7 @@ def f_statistic(self) -> WaldTestStatistic:
Returns
-------
- f : WaldTestStatistic
+ WaldTestStatistic
Test statistic for null all coefficients excluding constant terms
are zero.
@@ -324,7 +334,7 @@ def conf_int(self, level=0.95) -> DataFrame:
Returns
-------
- ci : DataFrame
+ DataFrame
Confidence interval of the form [lower, upper] for each parameters
Notes
@@ -338,20 +348,22 @@ def conf_int(self, level=0.95) -> DataFrame:
q = stats.norm.ppf(ci_quantiles)
q = q[None, :]
ci = self.params[:, None] + self.std_errors[:, None] * q
- return DataFrame(ci, index=self._vars, columns=['lower', 'upper'])
+ return DataFrame(ci, index=self._vars, columns=["lower", "upper"])
def _top_right(self):
f_stat = _str(self.f_statistic.stat)
if isnan(self.f_statistic.stat):
- f_stat = ' N/A'
-
- return [('R-squared:', _str(self.rsquared)),
- ('Adj. R-squared:', _str(self.rsquared_adj)),
- ('F-statistic:', f_stat),
- ('P-value (F-stat)', pval_format(self.f_statistic.pval)),
- ('Distribution:', str(self.f_statistic.dist_name)),
- ('', ''),
- ('', '')]
+ f_stat = " N/A"
+
+ return [
+ ("R-squared:", _str(self.rsquared)),
+ ("Adj. R-squared:", _str(self.rsquared_adj)),
+ ("F-statistic:", f_stat),
+ ("P-value (F-stat)", pval_format(self.f_statistic.pval)),
+ ("Distribution:", str(self.f_statistic.dist_name)),
+ ("", ""),
+ ("", ""),
+ ]
@property
def summary(self) -> Summary:
@@ -361,15 +373,17 @@ def summary(self) -> Summary:
``summary.as_html()`` and ``summary.as_latex()``.
"""
- title = self._method + ' Estimation Summary'
+ title = self._method + " Estimation Summary"
mod = self.model
- top_left = [('Dep. Variable:', mod.dependent.cols[0]),
- ('Estimator:', self._method),
- ('No. Observations:', self.nobs),
- ('Date:', self._datetime.strftime('%a, %b %d %Y')),
- ('Time:', self._datetime.strftime('%H:%M:%S')),
- ('Cov. Estimator:', self._cov_type),
- ('', '')]
+ top_left = [
+ ("Dep. Variable:", mod.dependent.cols[0]),
+ ("Estimator:", self._method),
+ ("No. Observations:", self.nobs),
+ ("Date:", self._datetime.strftime("%a, %b %d %Y")),
+ ("Time:", self._datetime.strftime("%H:%M:%S")),
+ ("Cov. Estimator:", self._cov_type),
+ ("", ""),
+ ]
top_right = self._top_right()
@@ -385,9 +399,9 @@ def summary(self) -> Summary:
# Top Table
# Parameter table
fmt = fmt_2cols
- fmt['data_fmts'][1] = '%18s'
+ fmt["data_fmts"][1] = "%18s"
- top_right = [('%-21s' % (' ' + k), v) for k, v in top_right]
+ top_right = [("%-21s" % (" " + k), v) for k, v in top_right]
stubs = []
vals = []
for stub, val in top_right:
@@ -396,11 +410,13 @@ def summary(self) -> Summary:
table.extend_right(SimpleTable(vals, stubs=stubs))
smry.tables.append(table)
- param_data = c_[self.params.values[:, None],
- self.std_errors.values[:, None],
- self.tstats.values[:, None],
- self.pvalues.values[:, None],
- self.conf_int()]
+ param_data = c_[
+ self.params.values[:, None],
+ self.std_errors.values[:, None],
+ self.tstats.values[:, None],
+ self.pvalues.values[:, None],
+ self.conf_int(),
+ ]
data = []
for row in param_data:
txt_row = []
@@ -410,35 +426,42 @@ def summary(self) -> Summary:
f = pval_format
txt_row.append(f(v))
data.append(txt_row)
- title = 'Parameter Estimates'
+ title = "Parameter Estimates"
table_stubs = list(self.params.index)
extra_text = []
if table_stubs:
- header = ['Parameter', 'Std. Err.', 'T-stat', 'P-value', 'Lower CI', 'Upper CI']
- table = SimpleTable(data,
- stubs=table_stubs,
- txt_fmt=fmt_params,
- headers=header,
- title=title)
+ header = [
+ "Parameter",
+ "Std. Err.",
+ "T-stat",
+ "P-value",
+ "Lower CI",
+ "Upper CI",
+ ]
+ table = SimpleTable(
+ data, stubs=table_stubs, txt_fmt=fmt_params, headers=header, title=title
+ )
smry.tables.append(table)
else:
- extra_text.append('Model contains no parameters')
+ extra_text.append("Model contains no parameters")
instruments = self.model.instruments
if instruments.shape[1] > 0:
endog = self.model.endog
- extra_text.append('Endogenous: ' + ', '.join(endog.cols))
- extra_text.append('Instruments: ' + ', '.join(instruments.cols))
+ extra_text.append("Endogenous: " + ", ".join(endog.cols))
+ extra_text.append("Instruments: " + ", ".join(instruments.cols))
cov_descr = str(self._cov_estimator)
- for line in cov_descr.split('\n'):
+ for line in cov_descr.split("\n"):
extra_text.append(line)
if extra_text:
smry.add_extra_txt(extra_text)
return smry
- def wald_test(self, restriction=None, value=None, *, formula=None) -> WaldTestStatistic:
+ def wald_test(
+ self, restriction=None, value=None, *, formula=None
+ ) -> WaldTestStatistic:
r"""
Test linear equality constraints using a Wald test
@@ -463,7 +486,7 @@ def wald_test(self, restriction=None, value=None, *, formula=None) -> WaldTestSt
Returns
-------
- t: WaldTestStatistic
+ WaldTestStatistic
Test statistic for null that restrictions are valid.
Notes
@@ -497,29 +520,35 @@ def wald_test(self, restriction=None, value=None, *, formula=None) -> WaldTestSt
>>> res.wald_test(formula=['exper = 0', 'I(exper ** 2) = 0'])
"""
- return quadratic_form_test(self._params, self.cov, restriction=restriction,
- value=value, formula=formula)
+ return quadratic_form_test(
+ self._params,
+ self.cov,
+ restriction=restriction,
+ value=value,
+ formula=formula,
+ )
class AbsorbingLSResults(OLSResults):
def __init__(self, results: Dict[str, Any], model):
super(AbsorbingLSResults, self).__init__(results, model)
- self._absorbed_rsquared = results['absorbed_r2']
- self._absorbed_effects = results['absorbed_effects']
+ self._absorbed_rsquared = results["absorbed_r2"]
+ self._absorbed_effects = results["absorbed_effects"]
def _top_right(self):
f_stat = _str(self.f_statistic.stat)
if isnan(self.f_statistic.stat):
- f_stat = ' N/A'
-
- return [('R-squared:', _str(self.rsquared)),
- ('Adj. R-squared:', _str(self.rsquared_adj)),
- ('F-statistic:', f_stat),
- ('P-value (F-stat):', pval_format(self.f_statistic.pval)),
- ('Distribution:', str(self.f_statistic.dist_name)),
- ('R-squared (No Effects):', _str(round(self.absorbed_rsquared, 5))),
- ('Varaibles Absorbed:', _str(self.df_absorbed))
- ]
+ f_stat = " N/A"
+
+ return [
+ ("R-squared:", _str(self.rsquared)),
+ ("Adj. R-squared:", _str(self.rsquared_adj)),
+ ("F-statistic:", f_stat),
+ ("P-value (F-stat):", pval_format(self.f_statistic.pval)),
+ ("Distribution:", str(self.f_statistic.dist_name)),
+ ("R-squared (No Effects):", _str(round(self.absorbed_rsquared, 5))),
+ ("Varaibles Absorbed:", _str(self.df_absorbed)),
+ ]
@property
def absorbed_rsquared(self) -> float:
@@ -561,7 +590,7 @@ def diagnostics(self) -> DataFrame:
Returns
-------
- res : DataFrame
+ DataFrame
DataFrame where each endogenous variable appears as a row and
the columns contain alternative measures. The columns are:
@@ -584,6 +613,7 @@ def diagnostics(self) -> DataFrame:
is with respect to the other included variables in the model.
"""
from linearmodels.iv.model import _OLS, IV2SLS
+
endog, exog, instr, weights = self.endog, self.exog, self.instr, self.weights
w = sqrt(weights.ndarray)
z = w * instr.ndarray
@@ -606,25 +636,36 @@ def diagnostics(self) -> DataFrame:
params = params[:, None]
stat = params.T @ inv(res.cov) @ params
stat = float(stat.squeeze())
- w_test = WaldTestStatistic(stat, null='', df=params.shape[0])
- inner = {'rsquared': individual_results[col].rsquared,
- 'partial.rsquared': res.rsquared,
- 'f.stat': w_test.stat,
- 'f.pval': w_test.pval,
- 'f.dist': w_test.dist_name}
+ w_test = WaldTestStatistic(stat, null="", df=params.shape[0])
+ inner = {
+ "rsquared": individual_results[col].rsquared,
+ "partial.rsquared": res.rsquared,
+ "f.stat": w_test.stat,
+ "f.pval": w_test.pval,
+ "f.dist": w_test.dist_name,
+ }
out[col] = Series(inner)
out_df = DataFrame(out).T
dep = self.dep
- r2sls = IV2SLS(dep, exog, endog, instr, weights=weights).fit(cov_type='unadjusted')
- rols = _OLS(dep, self._reg, weights=weights).fit(cov_type='unadjusted')
+ r2sls = IV2SLS(dep, exog, endog, instr, weights=weights).fit(
+ cov_type="unadjusted"
+ )
+ rols = _OLS(dep, self._reg, weights=weights).fit(cov_type="unadjusted")
shea = (rols.std_errors / r2sls.std_errors) ** 2
shea *= (1 - r2sls.rsquared) / (1 - rols.rsquared)
- out_df['shea.rsquared'] = shea[out_df.index]
- cols = ['rsquared', 'partial.rsquared', 'shea.rsquared', 'f.stat', 'f.pval', 'f.dist']
+ out_df["shea.rsquared"] = shea[out_df.index]
+ cols = [
+ "rsquared",
+ "partial.rsquared",
+ "shea.rsquared",
+ "f.stat",
+ "f.pval",
+ "f.dist",
+ ]
out_df = out_df[cols]
for c in out_df:
- out_df[c] = to_numeric(out_df[c], errors='ignore')
+ out_df[c] = to_numeric(out_df[c], errors="ignore")
return out_df
@@ -635,11 +676,12 @@ def individual(self) -> Dict[str, OLSResults]:
Returns
-------
- res : dict
+ dict
Dictionary containing first stage estimation results. Keys are
the variable names of the endogenous regressors.
"""
from linearmodels.iv.model import _OLS
+
w = sqrt(self.weights.ndarray)
exog_instr = w * c_[self.exog.ndarray, self.instr.ndarray]
exog_instr = DataFrame(exog_instr, columns=self.exog.cols + self.instr.cols)
@@ -658,17 +700,19 @@ def summary(self) -> Summary:
Supports export to csv, html and latex using the methods ``summary.as_csv()``,
``summary.as_html()`` and ``summary.as_latex()``.
"""
- stubs_lookup = {'rsquared': 'R-squared',
- 'partial.rsquared': 'Partial R-squared',
- 'shea.rsquared': 'Shea\'s R-squared',
- 'f.stat': 'Partial F-statistic',
- 'f.pval': 'P-value (Partial F-stat)',
- 'f.dist': 'Partial F-stat Distn'}
+ stubs_lookup = {
+ "rsquared": "R-squared",
+ "partial.rsquared": "Partial R-squared",
+ "shea.rsquared": "Shea's R-squared",
+ "f.stat": "Partial F-statistic",
+ "f.pval": "P-value (Partial F-stat)",
+ "f.dist": "Partial F-stat Distn",
+ }
smry = Summary()
diagnostics = self.diagnostics
vals = []
for c in diagnostics:
- if c != 'f.dist':
+ if c != "f.dist":
vals.append([_str(v) for v in diagnostics[c]])
else:
vals.append([v for v in diagnostics[c]])
@@ -684,24 +728,28 @@ def summary(self) -> Summary:
params_fmt = [[_str(val) for val in row] for row in params_arr.T]
for i in range(1, len(params_fmt), 2):
for j in range(len(params_fmt[i])):
- params_fmt[i][j] = '({0})'.format(params_fmt[i][j])
+ params_fmt[i][j] = "({0})".format(params_fmt[i][j])
params_stub = []
for var in res.params.index:
- params_stub.extend([var, ''])
+ params_stub.extend([var, ""])
- title = 'First Stage Estimation Results'
+ title = "First Stage Estimation Results"
vals = table_concat((vals, params_fmt))
stubs = stub_concat((stubs, params_stub))
txt_fmt = default_txt_fmt.copy()
- txt_fmt['data_aligns'] = 'r'
- txt_fmt['header_align'] = 'r'
- table = SimpleTable(vals, headers=header, title=title, stubs=stubs, txt_fmt=txt_fmt)
+ txt_fmt["data_aligns"] = "r"
+ txt_fmt["header_align"] = "r"
+ table = SimpleTable(
+ vals, headers=header, title=title, stubs=stubs, txt_fmt=txt_fmt
+ )
smry.tables.append(table)
- extra_txt = ['T-stats reported in parentheses',
- 'T-stats use same covariance type as original model']
+ extra_txt = [
+ "T-stats reported in parentheses",
+ "T-stats use same covariance type as original model",
+ ]
smry.add_extra_txt(extra_txt)
return smry
@@ -713,7 +761,7 @@ class _CommonIVResults(OLSResults):
def __init__(self, results: Dict[str, Any], model):
super(_CommonIVResults, self).__init__(results, model)
- self._liml_kappa = results.get('liml_kappa', None)
+ self._liml_kappa = results.get("liml_kappa", None)
@property
def first_stage(self) -> FirstStageResults:
@@ -722,13 +770,18 @@ def first_stage(self) -> FirstStageResults:
Returns
-------
- first : FirstStageResults
+ FirstStageResults
Object containing results for diagnosing instrument relevance issues.
"""
- return FirstStageResults(self.model.dependent, self.model.exog,
- self.model.endog, self.model.instruments,
- self.model.weights, self._cov_type,
- self._cov_config)
+ return FirstStageResults(
+ self.model.dependent,
+ self.model.exog,
+ self.model.endog,
+ self.model.instruments,
+ self.model.weights,
+ self._cov_type,
+ self._cov_config,
+ )
class IVResults(_CommonIVResults):
@@ -745,7 +798,7 @@ class IVResults(_CommonIVResults):
def __init__(self, results: Dict[str, Any], model):
super(IVResults, self).__init__(results, model)
- self._kappa = results.get('kappa', 1)
+ self._kappa = results.get("kappa", 1)
@cached_property
def sargan(self) -> Union[InvalidTestStatistic, WaldTestStatistic]:
@@ -754,7 +807,7 @@ def sargan(self) -> Union[InvalidTestStatistic, WaldTestStatistic]:
Returns
-------
- t : WaldTestStatistic
+ WaldTestStatistic
Object containing test statistic, p-value, distribution and null
Notes
@@ -781,15 +834,17 @@ def sargan(self) -> Union[InvalidTestStatistic, WaldTestStatistic]:
z = self.model.instruments.ndarray
nobs, ninstr = z.shape
nendog = self.model.endog.shape[1]
- name = 'Sargan\'s test of overidentification'
+ name = "Sargan's test of overidentification"
if ninstr - nendog == 0:
- return InvalidTestStatistic('Test requires more instruments than '
- 'endogenous variables.', name=name)
+ return InvalidTestStatistic(
+ "Test requires more instruments than " "endogenous variables.",
+ name=name,
+ )
eps = self.resids.values[:, None]
u = annihilate(eps, self.model._z)
stat = nobs * (1 - (u.T @ u) / (eps.T @ eps)).squeeze()
- null = 'The model is not overidentified.'
+ null = "The model is not overidentified."
return WaldTestStatistic(stat, null, ninstr - nendog, name=name)
@@ -800,7 +855,7 @@ def basmann(self) -> Union[InvalidTestStatistic, WaldTestStatistic]:
Returns
-------
- t : WaldTestStatistic
+ WaldTestStatistic
Object containing test statistic, p-value, distribution and null
Notes
@@ -828,10 +883,12 @@ def basmann(self) -> Union[InvalidTestStatistic, WaldTestStatistic]:
ninstr = mod.instruments.shape[1]
nobs, nendog = mod.endog.shape
nz = mod._z.shape[1]
- name = 'Basmann\'s test of overidentification'
+ name = "Basmann's test of overidentification"
if ninstr - nendog == 0:
- return InvalidTestStatistic('Test requires more instruments than '
- 'endogenous variables.', name=name)
+ return InvalidTestStatistic(
+ "Test requires more instruments than " "endogenous variables.",
+ name=name,
+ )
sargan_test = self.sargan
s = sargan_test.stat
stat = s * (nobs - nz) / (nobs - s)
@@ -856,8 +913,10 @@ def _endogeneity_setup(self, var_names=None):
ntested = assumed_exog.shape[1]
from linearmodels.iv import IV2SLS
- mod = IV2SLS(self.model.dependent, aug_exog, still_endog,
- self.model.instruments)
+
+ mod = IV2SLS(
+ self.model.dependent, aug_exog, still_endog, self.model.instruments
+ )
e0 = mod.fit().resids.values[:, None]
z2 = c_[self.model.exog.ndarray, self.model.instruments.ndarray]
@@ -879,7 +938,7 @@ def durbin(self, variables=None) -> WaldTestStatistic:
Returns
-------
- t : WaldTestStatistic
+ WaldTestStatistic
Object containing test statistic, p-value, distribution and null
Notes
@@ -910,15 +969,15 @@ def durbin(self, variables=None) -> WaldTestStatistic:
where :math:`q` is the number of variables tested.
"""
- null = 'All endogenous variables are exogenous'
+ null = "All endogenous variables are exogenous"
if variables is not None:
- null = 'Variables {0} are exogenous'.format(', '.join(variables))
+ null = "Variables {0} are exogenous".format(", ".join(variables))
e0, e1, e2, nobs, _, _, ntested = self._endogeneity_setup(variables)
stat = e1.T @ e1 - e2.T @ e2
stat /= (e0.T @ e0) / nobs
- name = 'Durbin test of exogeneity'
+ name = "Durbin test of exogeneity"
df = ntested
return WaldTestStatistic(float(stat), null, df, name=name)
@@ -934,7 +993,7 @@ def wu_hausman(self, variables=None) -> WaldTestStatistic:
Returns
-------
- t : WaldTestStatistic
+ WaldTestStatistic
Object containing test statistic, p-value, distribution and null
Notes
@@ -967,20 +1026,20 @@ def wu_hausman(self, variables=None) -> WaldTestStatistic:
:math:`v = n - n_{endog} - n_{exog} - q`. The test statistic has a
:math:`F_{q, v}` distribution.
"""
- null = 'All endogenous variables are exogenous'
+ null = "All endogenous variables are exogenous"
if variables is not None:
- null = 'Variables {0} are exogenous'.format(', '.join(variables))
+ null = "Variables {0} are exogenous".format(", ".join(variables))
e0, e1, e2, nobs, nexog, nendog, ntested = self._endogeneity_setup(variables)
df = ntested
df_denom = nobs - nexog - nendog - ntested
- delta = (e1.T @ e1 - e2.T @ e2)
+ delta = e1.T @ e1 - e2.T @ e2
stat = delta / df
stat /= (e0.T @ e0 - delta) / df_denom
stat = float(stat)
- name = 'Wu-Hausman test of exogeneity'
+ name = "Wu-Hausman test of exogeneity"
return WaldTestStatistic(stat, null, df, df_denom, name=name)
@cached_property
@@ -990,7 +1049,7 @@ def wooldridge_score(self) -> WaldTestStatistic:
Returns
-------
- t : WaldTestStatistic
+ WaldTestStatistic
Object containing test statistic, p-value, distribution and null
Notes
@@ -1020,11 +1079,11 @@ def wooldridge_score(self) -> WaldTestStatistic:
r = annihilate(self.model.endog.ndarray, self.model._z)
nobs = e.shape[0]
r = annihilate(r, self.model._x)
- res = _OLS(ones((nobs, 1)), r * e).fit(cov_type='unadjusted')
+ res = _OLS(ones((nobs, 1)), r * e).fit(cov_type="unadjusted")
stat = res.nobs - res.resid_ss
df = self.model.endog.shape[1]
- null = 'Endogenous variables are exogenous'
- name = 'Wooldridge\'s score test of exogeneity'
+ null = "Endogenous variables are exogenous"
+ name = "Wooldridge's score test of exogeneity"
return WaldTestStatistic(stat, null, df, name=name)
@cached_property
@@ -1034,7 +1093,7 @@ def wooldridge_regression(self) -> WaldTestStatistic:
Returns
-------
- t : WaldTestStatistic
+ WaldTestStatistic
Object containing test statistic, p-value, distribution and null
Notes
@@ -1057,6 +1116,7 @@ def wooldridge_regression(self) -> WaldTestStatistic:
identical to the covariance estimator used with ``fit``.
"""
from linearmodels.iv.model import _OLS
+
r = annihilate(self.model.endog.ndarray, self.model._z)
augx = c_[self.model._x, r]
mod = _OLS(self.model.dependent, augx)
@@ -1066,8 +1126,8 @@ def wooldridge_regression(self) -> WaldTestStatistic:
test_cov = res.cov.values[norig:, norig:]
stat = test_params.T @ inv(test_cov) @ test_params
df = len(test_params)
- null = 'Endogenous variables are exogenous'
- name = 'Wooldridge\'s regression test of exogeneity'
+ null = "Endogenous variables are exogenous"
+ name = "Wooldridge's regression test of exogeneity"
return WaldTestStatistic(stat, null, df, name=name)
@cached_property
@@ -1077,7 +1137,7 @@ def wooldridge_overid(self) -> Union[InvalidTestStatistic, WaldTestStatistic]:
Returns
-------
- t : WaldTestStatistic
+ WaldTestStatistic
Object containing test statistic, p-value, distribution and null
Notes
@@ -1100,24 +1160,27 @@ def wooldridge_overid(self) -> Union[InvalidTestStatistic, WaldTestStatistic]:
The order of the instruments does not affect this test.
"""
from linearmodels.iv.model import _OLS
+
exog, endog = self.model.exog, self.model.endog
instruments = self.model.instruments
nobs, nendog = endog.shape
ninstr = instruments.shape[1]
- name = 'Wooldridge\'s score test of overidentification'
+ name = "Wooldridge's score test of overidentification"
if ninstr - nendog == 0:
- return InvalidTestStatistic('Test requires more instruments than '
- 'endogenous variables.', name=name)
+ return InvalidTestStatistic(
+ "Test requires more instruments than " "endogenous variables.",
+ name=name,
+ )
endog_hat = proj(endog.ndarray, c_[exog.ndarray, instruments.ndarray])
- q = instruments.ndarray[:, :(ninstr - nendog)]
+ q = instruments.ndarray[:, : (ninstr - nendog)]
q_res = annihilate(q, c_[self.model.exog.ndarray, endog_hat])
test_functions = q_res * self.resids.values[:, None]
- res = _OLS(ones((nobs, 1)), test_functions).fit(cov_type='unadjusted')
+ res = _OLS(ones((nobs, 1)), test_functions).fit(cov_type="unadjusted")
stat = res.nobs * res.rsquared
df = ninstr - nendog
- null = 'Model is not overidentified.'
+ null = "Model is not overidentified."
return WaldTestStatistic(stat, null, df, name=name)
@cached_property
@@ -1127,7 +1190,7 @@ def anderson_rubin(self) -> Union[InvalidTestStatistic, WaldTestStatistic]:
Returns
-------
- t : WaldTestStatistic
+ WaldTestStatistic
Object containing test statistic, p-value, distribution and null
Notes
@@ -1144,13 +1207,15 @@ def anderson_rubin(self) -> Union[InvalidTestStatistic, WaldTestStatistic]:
"""
nobs, ninstr = self.model.instruments.shape
nendog = self.model.endog.shape[1]
- name = 'Anderson-Rubin test of overidentification'
+ name = "Anderson-Rubin test of overidentification"
if ninstr - nendog == 0:
- return InvalidTestStatistic('Test requires more instruments than '
- 'endogenous variables.', name=name)
+ return InvalidTestStatistic(
+ "Test requires more instruments than " "endogenous variables.",
+ name=name,
+ )
stat = nobs * log(self._liml_kappa)
df = ninstr - nendog
- null = 'The model is not overidentified.'
+ null = "The model is not overidentified."
return WaldTestStatistic(stat, null, df, name=name)
@cached_property
@@ -1160,7 +1225,7 @@ def basmann_f(self) -> Union[InvalidTestStatistic, WaldTestStatistic]:
Returns
-------
- t : WaldTestStatistic
+ WaldTestStatistic
Object containing test statistic, p-value, distribution and null
Notes
@@ -1177,14 +1242,16 @@ def basmann_f(self) -> Union[InvalidTestStatistic, WaldTestStatistic]:
"""
nobs, ninstr = self.model.instruments.shape
nendog, nexog = self.model.endog.shape[1], self.model.exog.shape[1]
- name = 'Basmann\' F test of overidentification'
+ name = "Basmann' F test of overidentification"
if ninstr - nendog == 0:
- return InvalidTestStatistic('Test requires more instruments than '
- 'endogenous variables.', name=name)
+ return InvalidTestStatistic(
+ "Test requires more instruments than " "endogenous variables.",
+ name=name,
+ )
df = ninstr - nendog
df_denom = nobs - (nexog + ninstr)
stat = (self._liml_kappa - 1) * df_denom / df
- null = 'The model is not overidentified.'
+ null = "The model is not overidentified."
return WaldTestStatistic(stat, null, df, df_denom=df_denom, name=name)
@@ -1202,11 +1269,11 @@ class IVGMMResults(_CommonIVResults):
def __init__(self, results, model):
super(IVGMMResults, self).__init__(results, model)
- self._weight_mat = results['weight_mat']
- self._weight_type = results['weight_type']
- self._weight_config = results['weight_config']
- self._iterations = results['iterations']
- self._j_stat = results['j_stat']
+ self._weight_mat = results["weight_mat"]
+ self._weight_type = results["weight_type"]
+ self._weight_config = results["weight_config"]
+ self._iterations = results["iterations"]
+ self._j_stat = results["j_stat"]
@property
def weight_matrix(self) -> ndarray:
@@ -1235,7 +1302,7 @@ def j_stat(self) -> Union[InvalidTestStatistic, WaldTestStatistic]:
Returns
-------
- j : WaldTestStatistic
+ WaldTestStatistic
J statistic test of overidentifying restrictions
Notes
@@ -1268,7 +1335,7 @@ def c_stat(self, variables=None) -> WaldTestStatistic:
Returns
-------
- t : WaldTestStatistic
+ WaldTestStatistic
Object containing test statistic, p-value, distribution and null
Notes
@@ -1308,15 +1375,16 @@ def c_stat(self, variables=None) -> WaldTestStatistic:
exog_e = c_[exog.ndarray, endog.ndarray]
nobs = exog_e.shape[0]
endog_e = empty((nobs, 0))
- null = 'All endogenous variables are exogenous'
+ null = "All endogenous variables are exogenous"
else:
if not isinstance(variables, list):
variables = [variables]
exog_e = c_[exog.ndarray, endog.pandas[variables].values]
ex = [c for c in endog.pandas if c not in variables]
endog_e = endog.pandas[ex].values
- null = 'Variables {0} are exogenous'.format(', '.join(variables))
+ null = "Variables {0} are exogenous".format(", ".join(variables))
from linearmodels.iv import IVGMM
+
mod = IVGMM(dependent, exog_e, endog_e, instruments)
res_e = mod.fit(cov_type=self.cov_type, **self.cov_config)
j_e = res_e.j_stat.stat
@@ -1331,7 +1399,7 @@ def c_stat(self, variables=None) -> WaldTestStatistic:
stat = j_e - j_c
df = exog_e.shape[1] - exog.shape[1]
- return WaldTestStatistic(stat, null, df, name='C-statistic')
+ return WaldTestStatistic(stat, null, df, name="C-statistic")
AnyResult = Union[IVResults, IVGMMResults, OLSResults]
@@ -1350,26 +1418,31 @@ class IVModelComparison(_ModelComparison):
Estimator precision estimator to include in the comparison output.
Default is 'tstats'.
"""
+
_supported = (IVResults, IVGMMResults, OLSResults)
- def __init__(self, results: Union[List[AnyResult], Dict[str, AnyResult]], *,
- precision: str = 'tstats'):
+ def __init__(
+ self,
+ results: Union[List[AnyResult], Dict[str, AnyResult]],
+ *,
+ precision: str = "tstats"
+ ):
super(IVModelComparison, self).__init__(results, precision=precision)
@property
def rsquared_adj(self) -> float:
"""Sample-size adjusted coefficients of determination (R**2)"""
- return self._get_property('rsquared_adj')
+ return self._get_property("rsquared_adj")
@property
def estimator_method(self) -> str:
"""Estimation methods"""
- return self._get_property('_method')
+ return self._get_property("_method")
@property
def cov_estimator(self) -> str:
"""Covariance estimator descriptions"""
- return self._get_property('cov_estimator')
+ return self._get_property("cov_estimator")
@property
def summary(self) -> Summary:
@@ -1380,16 +1453,36 @@ def summary(self) -> Summary:
"""
smry = Summary()
models = list(self._results.keys())
- title = 'Model Comparison'
- stubs = ['Dep. Variable', 'Estimator', 'No. Observations', 'Cov. Est.', 'R-squared',
- 'Adj. R-squared', 'F-statistic', 'P-value (F-stat)']
- dep_name = OrderedDict() # type: Dict[str, Union[IVResults, IVGMMResults, OLSResults]]
+ title = "Model Comparison"
+ stubs = [
+ "Dep. Variable",
+ "Estimator",
+ "No. Observations",
+ "Cov. Est.",
+ "R-squared",
+ "Adj. R-squared",
+ "F-statistic",
+ "P-value (F-stat)",
+ ]
+ dep_name = (
+ OrderedDict()
+ ) # type: Dict[str, Union[IVResults, IVGMMResults, OLSResults]]
for key in self._results:
dep_name[key] = self._results[key].model.dependent.cols[0]
dep_name = Series(dep_name)
- vals = concat([dep_name, self.estimator_method, self.nobs, self.cov_estimator,
- self.rsquared, self.rsquared_adj, self.f_statistic], 1)
+ vals = concat(
+ [
+ dep_name,
+ self.estimator_method,
+ self.nobs,
+ self.cov_estimator,
+ self.rsquared,
+ self.rsquared_adj,
+ self.f_statistic,
+ ],
+ 1,
+ )
vals = [[i for i in v] for v in vals.T.values]
vals[2] = [str(v) for v in vals[2]]
for i in range(4, len(vals)):
@@ -1404,11 +1497,11 @@ def summary(self) -> Summary:
precision_fmt = []
for v in precision.values[i]:
v_str = _str(v)
- v_str = '({0})'.format(v_str) if v_str.strip() else v_str
+ v_str = "({0})".format(v_str) if v_str.strip() else v_str
precision_fmt.append(v_str)
params_fmt.append(precision_fmt)
params_stub.append(params.index[i])
- params_stub.append(' ')
+ params_stub.append(" ")
vals = table_concat((vals, params_fmt))
stubs = stub_concat((stubs, params_stub))
@@ -1419,33 +1512,35 @@ def summary(self) -> Summary:
all_instr.append(res.model.instruments.cols)
ninstr = max(map(len, all_instr))
instruments = []
- instrument_stub = ['Instruments']
+ instrument_stub = ["Instruments"]
for i in range(ninstr):
if i > 0:
- instrument_stub.append('')
+ instrument_stub.append("")
row = []
for j in range(len(self._results)):
instr = all_instr[j]
if len(instr) > i:
row.append(instr[i])
else:
- row.append('')
+ row.append("")
instruments.append(row)
if instruments:
vals = table_concat((vals, instruments))
stubs = stub_concat((stubs, instrument_stub))
txt_fmt = default_txt_fmt.copy()
- txt_fmt['data_aligns'] = 'r'
- txt_fmt['header_align'] = 'r'
- table = SimpleTable(vals, headers=models, title=title, stubs=stubs, txt_fmt=txt_fmt)
+ txt_fmt["data_aligns"] = "r"
+ txt_fmt["header_align"] = "r"
+ table = SimpleTable(
+ vals, headers=models, title=title, stubs=stubs, txt_fmt=txt_fmt
+ )
smry.tables.append(table)
prec_type = self._PRECISION_TYPES[self._precision]
- smry.add_extra_txt(['{0} reported in parentheses'.format(prec_type)])
+ smry.add_extra_txt(["{0} reported in parentheses".format(prec_type)])
return smry
-def compare(results, *, precision='tstats') -> IVModelComparison:
+def compare(results, *, precision="tstats") -> IVModelComparison:
"""
Compare the results of multiple models
@@ -1460,6 +1555,7 @@ def compare(results, *, precision='tstats') -> IVModelComparison:
Returns
-------
- comparison : IVModelComparison
+ IVModelComparison
+ The model comparison object.
"""
return IVModelComparison(results, precision=precision)
diff --git a/linearmodels/panel/__init__.py b/linearmodels/panel/__init__.py
index 9d8febe1d3..60536bba79 100644
--- a/linearmodels/panel/__init__.py
+++ b/linearmodels/panel/__init__.py
@@ -3,5 +3,12 @@
RandomEffects)
from linearmodels.panel.results import compare
-__all__ = ['PanelOLS', 'PooledOLS', 'RandomEffects', 'FirstDifferenceOLS', 'BetweenOLS',
- 'FamaMacBeth', 'compare']
+__all__ = [
+ "PanelOLS",
+ "PooledOLS",
+ "RandomEffects",
+ "FirstDifferenceOLS",
+ "BetweenOLS",
+ "FamaMacBeth",
+ "compare",
+]
diff --git a/linearmodels/panel/covariance.py b/linearmodels/panel/covariance.py
index 570b5eb5f1..3e06b7ef6b 100644
--- a/linearmodels/panel/covariance.py
+++ b/linearmodels/panel/covariance.py
@@ -1,14 +1,19 @@
-from property_cached import cached_property
import numpy as np
from numpy.linalg import inv
from pandas import DataFrame
+from property_cached import cached_property
from linearmodels.iv.covariance import (CLUSTER_ERR, KERNEL_LOOKUP,
_cov_cluster, _cov_kernel,
kernel_optimal_bandwidth)
-__all__ = ['HomoskedasticCovariance', 'HeteroskedasticCovariance',
- 'ClusteredCovariance', 'DriscollKraay', 'CovarianceManager']
+__all__ = [
+ "HomoskedasticCovariance",
+ "HeteroskedasticCovariance",
+ "ClusteredCovariance",
+ "DriscollKraay",
+ "CovarianceManager",
+]
class HomoskedasticCovariance(object):
@@ -56,7 +61,9 @@ class HomoskedasticCovariance(object):
``True``.
"""
- def __init__(self, y, x, params, entity_ids, time_ids, *, debiased=False, extra_df=0):
+ def __init__(
+ self, y, x, params, entity_ids, time_ids, *, debiased=False, extra_df=0
+ ):
self._y = y
self._x = x
self._params = params
@@ -69,7 +76,7 @@ def __init__(self, y, x, params, entity_ids, time_ids, *, debiased=False, extra_
if debiased:
self._nobs_eff -= self._nvar
self._scale = self._nobs / self._nobs_eff
- self._name = 'Unadjusted'
+ self._name = "Unadjusted"
@property
def name(self):
@@ -146,10 +153,13 @@ class HeteroskedasticCovariance(HomoskedasticCovariance):
``True``.
"""
- def __init__(self, y, x, params, entity_ids, time_ids, *, debiased=False, extra_df=0):
- super(HeteroskedasticCovariance, self).__init__(y, x, params, entity_ids, time_ids,
- debiased=debiased, extra_df=extra_df)
- self._name = 'Robust'
+ def __init__(
+ self, y, x, params, entity_ids, time_ids, *, debiased=False, extra_df=0
+ ):
+ super(HeteroskedasticCovariance, self).__init__(
+ y, x, params, entity_ids, time_ids, debiased=debiased, extra_df=extra_df
+ )
+ self._name = "Robust"
@cached_property
def cov(self):
@@ -196,7 +206,7 @@ class ClusteredCovariance(HomoskedasticCovariance):
Returns
-------
- cov : array
+ ndarray
Estimated parameter covariance
Notes
@@ -231,23 +241,34 @@ class ClusteredCovariance(HomoskedasticCovariance):
observations.
"""
- def __init__(self, y, x, params, entity_ids, time_ids, *, debiased=False, extra_df=0,
- clusters=None,
- group_debias=False):
- super(ClusteredCovariance, self).__init__(y, x, params, entity_ids, time_ids,
- debiased=debiased, extra_df=extra_df)
+ def __init__(
+ self,
+ y,
+ x,
+ params,
+ entity_ids,
+ time_ids,
+ *,
+ debiased=False,
+ extra_df=0,
+ clusters=None,
+ group_debias=False
+ ):
+ super(ClusteredCovariance, self).__init__(
+ y, x, params, entity_ids, time_ids, debiased=debiased, extra_df=extra_df
+ )
if clusters is None:
clusters = np.arange(self._x.shape[0])
clusters = np.asarray(clusters).squeeze()
self._group_debias = group_debias
dim1 = 1 if clusters.ndim == 1 else clusters.shape[1]
if clusters.ndim > 2 or dim1 > 2:
- raise ValueError('Only 1 or 2-way clustering supported.')
+ raise ValueError("Only 1 or 2-way clustering supported.")
nobs = y.shape[0]
if clusters.shape[0] != nobs:
raise ValueError(CLUSTER_ERR.format(nobs, clusters.shape[0]))
self._clusters = clusters
- self._name = 'Clustered'
+ self._name = "Clustered"
@staticmethod
def _calc_group_debias(clusters):
@@ -362,11 +383,23 @@ class DriscollKraay(HomoskedasticCovariance):
# TODO: Test
- def __init__(self, y, x, params, entity_ids, time_ids, *, debiased=False, extra_df=0,
- kernel='newey-west', bandwidth=None):
- super(DriscollKraay, self).__init__(y, x, params, entity_ids, time_ids,
- debiased=debiased, extra_df=extra_df)
- self._name = 'Driscoll-Kraay'
+ def __init__(
+ self,
+ y,
+ x,
+ params,
+ entity_ids,
+ time_ids,
+ *,
+ debiased=False,
+ extra_df=0,
+ kernel="newey-west",
+ bandwidth=None
+ ):
+ super(DriscollKraay, self).__init__(
+ y, x, params, entity_ids, time_ids, debiased=debiased, extra_df=extra_df
+ )
+ self._name = "Driscoll-Kraay"
self._kernel = kernel
self._bandwidth = bandwidth
@@ -464,11 +497,23 @@ class ACCovariance(HomoskedasticCovariance):
# TODO: Docstring
- def __init__(self, y, x, params, entity_ids, time_ids, *, debiased=False, extra_df=0,
- kernel='newey-west', bandwidth=None):
- super(ACCovariance, self).__init__(y, x, params, entity_ids, time_ids,
- debiased=debiased, extra_df=extra_df)
- self._name = 'Autocorrelation Rob. Cov.'
+ def __init__(
+ self,
+ y,
+ x,
+ params,
+ entity_ids,
+ time_ids,
+ *,
+ debiased=False,
+ extra_df=0,
+ kernel="newey-west",
+ bandwidth=None
+ ):
+ super(ACCovariance, self).__init__(
+ y, x, params, entity_ids, time_ids, debiased=debiased, extra_df=extra_df
+ )
+ self._name = "Autocorrelation Rob. Cov."
self._kernel = kernel
self._bandwidth = bandwidth
@@ -511,17 +556,19 @@ def cov(self):
class CovarianceManager(object):
- COVARIANCE_ESTIMATORS = {'unadjusted': HomoskedasticCovariance,
- 'conventional': HomoskedasticCovariance,
- 'homoskedastic': HomoskedasticCovariance,
- 'robust': HeteroskedasticCovariance,
- 'heteroskedastic': HeteroskedasticCovariance,
- 'clustered': ClusteredCovariance,
- 'driscoll-kraay': DriscollKraay,
- 'dk': DriscollKraay,
- 'kernel': DriscollKraay,
- 'ac': ACCovariance,
- 'autocorrelated': ACCovariance}
+ COVARIANCE_ESTIMATORS = {
+ "unadjusted": HomoskedasticCovariance,
+ "conventional": HomoskedasticCovariance,
+ "homoskedastic": HomoskedasticCovariance,
+ "robust": HeteroskedasticCovariance,
+ "heteroskedastic": HeteroskedasticCovariance,
+ "clustered": ClusteredCovariance,
+ "driscoll-kraay": DriscollKraay,
+ "dk": DriscollKraay,
+ "kernel": DriscollKraay,
+ "ac": ACCovariance,
+ "autocorrelated": ACCovariance,
+ }
def __init__(self, estimator, *cov_estimators):
self._estimator = estimator
@@ -529,11 +576,13 @@ def __init__(self, estimator, *cov_estimators):
def __getitem__(self, item):
if item not in self.COVARIANCE_ESTIMATORS:
- raise KeyError('Unknown covariance estimator type.')
+ raise KeyError("Unknown covariance estimator type.")
cov_est = self.COVARIANCE_ESTIMATORS[item]
if cov_est not in self._supported:
- raise ValueError('Requested covariance estimator is not supported '
- 'for the {0}.'.format(self._estimator))
+ raise ValueError(
+ "Requested covariance estimator is not supported "
+ "for the {0}.".format(self._estimator)
+ )
return cov_est
@@ -561,9 +610,11 @@ class FamaMacBethCovariance(HomoskedasticCovariance):
"""
def __init__(self, y, x, params, all_params, *, debiased=False):
- super(FamaMacBethCovariance, self).__init__(y, x, params, None, None, debiased=debiased)
+ super(FamaMacBethCovariance, self).__init__(
+ y, x, params, None, None, debiased=debiased
+ )
self._all_params = all_params
- self._name = 'Fama-MacBeth Std Cov'
+ self._name = "Fama-MacBeth Std Cov"
@cached_property
def cov(self):
@@ -571,7 +622,7 @@ def cov(self):
e = self._all_params - self._params.T
e = e[np.all(np.isfinite(e), 1)]
nobs = e.shape[0]
- cov = (e.T @ e / nobs)
+ cov = e.T @ e / nobs
return cov / (nobs - int(bool(self._debiased)))
@@ -602,11 +653,21 @@ class FamaMacBethKernelCovariance(FamaMacBethCovariance):
Covariance is a Kernel covariance of all estimated parameters.
"""
- def __init__(self, y, x, params, all_params, *, debiased=False, kernel='newey-west',
- bandwidth=None):
- super(FamaMacBethKernelCovariance, self).__init__(y, x, params, all_params,
- debiased=debiased)
- self._name = 'Fama-MacBeth Kernel Cov'
+ def __init__(
+ self,
+ y,
+ x,
+ params,
+ all_params,
+ *,
+ debiased=False,
+ kernel="newey-west",
+ bandwidth=None
+ ):
+ super(FamaMacBethKernelCovariance, self).__init__(
+ y, x, params, all_params, debiased=debiased
+ )
+ self._name = "Fama-MacBeth Kernel Cov"
self._bandwidth = bandwidth
self._kernel = kernel
diff --git a/linearmodels/panel/data.py b/linearmodels/panel/data.py
index e4f1496c4f..7418647ee0 100644
--- a/linearmodels/panel/data.py
+++ b/linearmodels/panel/data.py
@@ -12,7 +12,7 @@
from linearmodels.utility import ensure_unique_column, panel_to_frame
-__all__ = ['PanelData']
+__all__ = ["PanelData"]
class _Panel(object):
@@ -35,14 +35,15 @@ def __init__(self, df):
index = df.index
self._major_axis = Index(index.levels[1][get_codes(index)[1]]).unique()
self._minor_axis = Index(index.levels[0][get_codes(index)[0]]).unique()
- self._full_index = MultiIndex.from_product([self._minor_axis,
- self._major_axis])
+ self._full_index = MultiIndex.from_product([self._minor_axis, self._major_axis])
new_df = df.reindex(self._full_index)
new_df.index.names = df.index.names
self._frame = new_df
i, j, k = len(self._items), len(self._major_axis), len(self.minor_axis)
self._shape = (i, j, k)
- self._values = np.swapaxes(np.reshape(np.asarray(new_df).copy().T, (i, k, j)), 1, 2)
+ self._values = np.swapaxes(
+ np.reshape(np.asarray(new_df).copy().T, (i, k, j)), 1, 2
+ )
@classmethod
def from_array(cls, values, items, major_axis, minor_axis):
@@ -81,11 +82,11 @@ def to_frame(self):
def convert_columns(s, drop_first):
if is_string_dtype(s.dtype) and s.map(is_string_like).all():
- s = s.astype('category')
+ s = s.astype("category")
if is_categorical(s):
out = get_dummies(s, drop_first=drop_first)
- out.columns = [str(s.name) + '.' + str(c) for c in out]
+ out.columns = [str(s.name) + "." + str(c) for c in out]
return out
return s
@@ -140,13 +141,15 @@ class PanelData(object):
DataFrame does not have 2 levels
"""
- def __init__(self, x, var_name='x', convert_dummies=True, drop_first=True, copy=True):
+ def __init__(
+ self, x, var_name="x", convert_dummies=True, drop_first=True, copy=True
+ ):
self._var_name = var_name
self._convert_dummies = convert_dummies
self._drop_first = drop_first
self._panel = None
self._shape = None
- index_names = ['entity', 'time']
+ index_names = ["entity", "time"]
if isinstance(x, PanelData):
x = x.dataframe
self._original = x
@@ -154,9 +157,10 @@ def __init__(self, x, var_name='x', convert_dummies=True, drop_first=True, copy=
if not isinstance(x, (Series, DataFrame, np.ndarray)):
try:
from xarray import DataArray
+
if isinstance(x, DataArray):
if x.ndim not in (2, 3):
- raise ValueError('Only 2-d or 3-d DataArrays are supported')
+ raise ValueError("Only 2-d or 3-d DataArrays are supported")
if x.ndim == 2:
x = x.to_pandas()
else:
@@ -171,14 +175,15 @@ def __init__(self, x, var_name='x', convert_dummies=True, drop_first=True, copy=
if isinstance(x, Series) and isinstance(x.index, MultiIndex):
x = DataFrame(x)
elif isinstance(x, Series):
- raise ValueError('Series can only be used with a 2-level MultiIndex')
+ raise ValueError("Series can only be used with a 2-level MultiIndex")
if isinstance(x, DataFrame):
if isinstance(x, DataFrame):
if isinstance(x.index, MultiIndex):
if len(x.index.levels) != 2:
- raise ValueError('DataFrame input must have a '
- 'MultiIndex with 2 levels')
+ raise ValueError(
+ "DataFrame input must have a " "MultiIndex with 2 levels"
+ )
if isinstance(self._original, (DataFrame, PanelData, Series)):
for i in range(2):
index_names[i] = x.index.levels[i].name or index_names[i]
@@ -191,33 +196,36 @@ def __init__(self, x, var_name='x', convert_dummies=True, drop_first=True, copy=
self._frame = x.swapaxes(1, 2).to_frame(filter_observations=False)
elif isinstance(x, np.ndarray):
if x.ndim not in (2, 3):
- raise ValueError('2 or 3-d array required for numpy input')
+ raise ValueError("2 or 3-d array required for numpy input")
if x.ndim == 2:
x = x[None, :, :]
k, t, n = x.shape
- var_str = var_name + '.{0:0>' + str(int(np.log10(k) + .01)) + '}'
+ var_str = var_name + ".{0:0>" + str(int(np.log10(k) + 0.01)) + "}"
variables = [var_name] if k == 1 else [var_str.format(i) for i in range(k)]
- entity_str = 'entity.{0:0>' + str(int(np.log10(n) + .01)) + '}'
+ entity_str = "entity.{0:0>" + str(int(np.log10(n) + 0.01)) + "}"
entities = [entity_str.format(i) for i in range(n)]
time = list(range(t))
x = x.astype(np.float64, copy=False)
- panel = _Panel.from_array(x, items=variables, major_axis=time,
- minor_axis=entities)
+ panel = _Panel.from_array(
+ x, items=variables, major_axis=time, minor_axis=entities
+ )
self._fake_panel = panel
self._frame = panel.to_frame()
else:
- raise TypeError('Only ndarrays, DataFrames or DataArrays are '
- 'supported')
+ raise TypeError("Only ndarrays, DataFrames or DataArrays are " "supported")
if convert_dummies:
self._frame = expand_categoricals(self._frame, drop_first)
self._frame = self._frame.astype(np.float64, copy=False)
time_index = Series(self._frame.index.levels[1])
- if not (is_numeric_dtype(time_index.dtype) or
- is_datetime64_any_dtype(time_index.dtype)):
- raise ValueError('The index on the time dimension must be either '
- 'numeric or date-like')
+ if not (
+ is_numeric_dtype(time_index.dtype)
+ or is_datetime64_any_dtype(time_index.dtype)
+ ):
+ raise ValueError(
+ "The index on the time dimension must be either " "numeric or date-like"
+ )
# self._k, self._t, self._n = self.panel.shape
self._k, self._t, self._n = self.shape
self._frame.index.set_names(index_names, inplace=True)
@@ -320,7 +328,7 @@ def entity_ids(self):
Returns
-------
- id : ndarray
+ ndarray
2d array containing entity ids corresponding dataframe view
"""
return np.asarray(get_codes(self._frame.index)[0])[:, None]
@@ -332,16 +340,17 @@ def time_ids(self):
Returns
-------
- id : ndarray
+ ndarray
2d array containing time ids corresponding dataframe view
"""
return np.asarray(get_codes(self._frame.index)[1])[:, None]
def _demean_both_low_mem(self, weights):
- groups = PanelData(DataFrame(np.c_[self.entity_ids, self.time_ids],
- index=self._frame.index),
- convert_dummies=False,
- copy=False)
+ groups = PanelData(
+ DataFrame(np.c_[self.entity_ids, self.time_ids], index=self._frame.index),
+ convert_dummies=False,
+ copy=False,
+ )
return self.general_demean(groups, weights=weights)
def _demean_both(self, weights):
@@ -354,11 +363,11 @@ def _demean_both(self, weights):
Weights to use in demeaning
"""
if self.nentity > self.nobs:
- group = 'entity'
- dummy = 'time'
+ group = "entity"
+ dummy = "time"
else:
- group = 'time'
- dummy = 'entity'
+ group = "time"
+ dummy = "entity"
e = self.demean(group, weights=weights)
d = self.dummies(dummy, drop_first=True)
d.index = e.index
@@ -366,8 +375,7 @@ def _demean_both(self, weights):
d = d.values2d
e = e.values2d
resid = e - d @ lstsq(d, e)[0]
- resid = DataFrame(resid, index=self._frame.index,
- columns=self._frame.columns)
+ resid = DataFrame(resid, index=self._frame.index, columns=self._frame.columns)
return PanelData(resid)
@@ -384,7 +392,7 @@ def general_demean(self, groups, weights=None):
Returns
-------
- demeaned : PanelData
+ PanelData
Weighted, demeaned data according to groups
Notes
@@ -394,20 +402,24 @@ def general_demean(self, groups, weights=None):
if not isinstance(groups, PanelData):
groups = PanelData(groups)
if weights is None:
- weights = PanelData(DataFrame(np.ones((self._frame.shape[0], 1)),
- index=self.index,
- columns=['weights']))
+ weights = PanelData(
+ DataFrame(
+ np.ones((self._frame.shape[0], 1)),
+ index=self.index,
+ columns=["weights"],
+ )
+ )
weights = weights.values2d
groups = groups.values2d.astype(np.int64, copy=False)
weight_sum = {}
def weighted_group_mean(df, weights, root_w, level):
- num = (root_w * df).groupby(level=level).transform('sum')
+ num = (root_w * df).groupby(level=level).transform("sum")
if level in weight_sum:
denom = weight_sum[level]
else:
- denom = weights.groupby(level=level).transform('sum')
+ denom = weights.groupby(level=level).transform("sum")
weight_sum[level] = denom
return np.asarray(num) / np.asarray(denom)
@@ -452,7 +464,7 @@ def demean_pass(frame, weights, root_w):
return PanelData(current)
- def demean(self, group='entity', weights=None, return_panel=True, low_memory=False):
+ def demean(self, group="entity", weights=None, return_panel=True, low_memory=False):
"""
Demeans data by either entity or time group
@@ -472,7 +484,7 @@ def demean(self, group='entity', weights=None, return_panel=True, low_memory=Fal
Returns
-------
- demeaned : PanelData
+ PanelData
Demeaned data according to type
Notes
@@ -481,17 +493,17 @@ def demean(self, group='entity', weights=None, return_panel=True, low_memory=Fal
the square root of the weights so that they can be used in WLS
estimation.
"""
- if group not in ('entity', 'time', 'both'):
+ if group not in ("entity", "time", "both"):
raise ValueError
- if group == 'both':
+ if group == "both":
if not low_memory:
return self._demean_both(weights)
else:
return self._demean_both_low_mem(weights)
- level = 0 if group == 'entity' else 1
+ level = 0 if group == "entity" else 1
if weights is None:
- group_mu = self._frame.groupby(level=level).transform('mean')
+ group_mu = self._frame.groupby(level=level).transform("mean")
out = self._frame - group_mu
if not return_panel:
return np.asarray(out)
@@ -500,9 +512,9 @@ def demean(self, group='entity', weights=None, return_panel=True, low_memory=Fal
w = weights.values2d
frame = self._frame.copy()
frame = w * frame
- weighted_sum = frame.groupby(level=level).transform('sum')
+ weighted_sum = frame.groupby(level=level).transform("sum")
frame.iloc[:, :] = w
- sum_weights = frame.groupby(level=level).transform('sum')
+ sum_weights = frame.groupby(level=level).transform("sum")
group_mu = weighted_sum / sum_weights
out = np.sqrt(w) * (self._frame - group_mu)
if not return_panel:
@@ -510,15 +522,21 @@ def demean(self, group='entity', weights=None, return_panel=True, low_memory=Fal
return PanelData(out)
def __str__(self):
- return self.__class__.__name__ + '\n' + str(self._frame)
+ return self.__class__.__name__ + "\n" + str(self._frame)
def __repr__(self):
- return self.__str__() + '\n' + self.__class__.__name__ + ' object, id: ' + hex(id(self))
+ return (
+ self.__str__()
+ + "\n"
+ + self.__class__.__name__
+ + " object, id: "
+ + hex(id(self))
+ )
def _repr_html_(self):
- return self.__class__.__name__ + '
' + self._frame._repr_html_()
+ return self.__class__.__name__ + "
" + self._frame._repr_html_()
- def count(self, group='entity'):
+ def count(self, group="entity"):
"""
Count number of observations by entity or time
@@ -529,11 +547,11 @@ def count(self, group='entity'):
Returns
-------
- count : DataFrame
+ DataFrame
Counts according to type. Either (entity by var) or (time by var)
"""
- level = 0 if group == 'entity' else 1
- reindex = self.entities if group == 'entity' else self.time
+ level = 0 if group == "entity" else 1
+ reindex = self.entities if group == "entity" else self.time
out = self._frame.groupby(level=level).count()
return out.reindex(reindex)
@@ -545,10 +563,14 @@ def index(self):
def copy(self):
"""Return a deep copy"""
- return PanelData(self._frame.copy(), var_name=self._var_name,
- convert_dummies=self._convert_dummies, drop_first=self._drop_first)
-
- def mean(self, group='entity', weights=None):
+ return PanelData(
+ self._frame.copy(),
+ var_name=self._var_name,
+ convert_dummies=self._convert_dummies,
+ drop_first=self._drop_first,
+ )
+
+ def mean(self, group="entity", weights=None):
"""
Compute data mean by either entity or time group
@@ -561,10 +583,10 @@ def mean(self, group='entity', weights=None):
Returns
-------
- mean : DataFrame
+ DataFrame
Data mean according to type. Either (entity by var) or (time by var)
"""
- level = 0 if group == 'entity' else 1
+ level = 0 if group == "entity" else 1
if weights is None:
mu = self._frame.groupby(level=level).mean()
else:
@@ -576,7 +598,7 @@ def mean(self, group='entity', weights=None):
sum_weights = frame.groupby(level=level).sum()
mu = weighted_sum / sum_weights
- reindex = self.entities if group == 'entity' else self.time
+ reindex = self.entities if group == "entity" else self.time
out = mu.reindex(reindex)
return out
@@ -587,14 +609,19 @@ def first_difference(self):
Returns
-------
- diffs : PanelData
+ PanelData
Differenced values
"""
diffs = self.panel.values
diffs = diffs[:, 1:] - diffs[:, :-1]
- diffs = panel_to_frame(diffs, self.panel.items, self.panel.major_axis[1:],
- self.panel.minor_axis, True)
- diffs = diffs.reindex(self._frame.index).dropna(how='any')
+ diffs = panel_to_frame(
+ diffs,
+ self.panel.items,
+ self.panel.major_axis[1:],
+ self.panel.minor_axis,
+ True,
+ )
+ diffs = diffs.reindex(self._frame.index).dropna(how="any")
return PanelData(diffs)
@staticmethod
@@ -610,7 +637,7 @@ def _minimize_multiindex(df):
df.index.names = orig_names
return df
- def dummies(self, group='entity', drop_first=False):
+ def dummies(self, group="entity", drop_first=False):
"""
Generate entity or time dummies
@@ -624,15 +651,15 @@ def dummies(self, group='entity', drop_first=False):
Returns
-------
- dummies : DataFrame
+ DataFrame
Dummy variables
"""
- if group not in ('entity', 'time'):
+ if group not in ("entity", "time"):
raise ValueError
- axis = 0 if group == 'entity' else 1
+ axis = 0 if group == "entity" else 1
labels = get_codes(self._frame.index)
levels = self._frame.index.levels
cat = Categorical(levels[axis][labels[axis]])
dummies = get_dummies(cat, drop_first=drop_first)
- cols = self.entities if group == 'entity' else self.time
+ cols = self.entities if group == "entity" else self.time
return dummies[[c for c in cols if c in dummies]].astype(np.float64, copy=False)
diff --git a/linearmodels/panel/model.py b/linearmodels/panel/model.py
index 1c6a7d5bbb..5a333d99a0 100644
--- a/linearmodels/panel/model.py
+++ b/linearmodels/panel/model.py
@@ -18,8 +18,10 @@
from linearmodels.panel.data import PanelData
from linearmodels.panel.results import (PanelEffectsResults, PanelResults,
RandomEffectsResults)
-from linearmodels.panel.utility import (check_absorbed, dummy_matrix, in_2core_graph,
- not_absorbed, AbsorbingEffectWarning, absorbing_warn_msg)
+from linearmodels.panel.utility import (AbsorbingEffectWarning,
+ absorbing_warn_msg, check_absorbed,
+ dummy_matrix, in_2core_graph,
+ not_absorbed)
from linearmodels.utility import (AttrDict, InapplicableTestStatistic,
InferenceUnavailableWarning,
InvalidTestStatistic, MemoryWarning,
@@ -31,7 +33,7 @@
def panel_structure_stats(ids, name):
bc = np.bincount(ids)
- index = ['mean', 'median', 'max', 'min', 'total']
+ index = ["mean", "median", "max", "min", "total"]
out = [bc.mean(), np.median(bc), bc.max(), bc.min(), bc.shape[0]]
return pd.Series(out, index=index, name=name)
@@ -57,19 +59,19 @@ class PanelFormulaParser(object):
def __init__(self, formula, data, eval_env=2):
self._formula = formula
self._data = PanelData(data, convert_dummies=False, copy=False)
- self._na_action = NAAction(on_NA='raise', NA_types=[])
+ self._na_action = NAAction(on_NA="raise", NA_types=[])
self._eval_env = eval_env
self._dependent = self._exog = None
self._parse()
def _parse(self):
- parts = self._formula.split('~')
- parts[1] = ' 0 + ' + parts[1]
- cln_formula = '~'.join(parts)
+ parts = self._formula.split("~")
+ parts[1] = " 0 + " + parts[1]
+ cln_formula = "~".join(parts)
mod_descr = ModelDesc.from_formula(cln_formula)
rm_list = []
- effects = {'EntityEffects': False, 'FixedEffects': False, 'TimeEffects': False}
+ effects = {"EntityEffects": False, "FixedEffects": False, "TimeEffects": False}
for term in mod_descr.rhs_termlist:
if term.name() in effects:
effects[term.name()] = True
@@ -77,13 +79,13 @@ def _parse(self):
for term in rm_list:
mod_descr.rhs_termlist.remove(term)
- if effects['EntityEffects'] and effects['FixedEffects']:
- raise ValueError('Cannot use both FixedEffects and EntityEffects')
- self._entity_effect = effects['EntityEffects'] or effects['FixedEffects']
- self._time_effect = effects['TimeEffects']
+ if effects["EntityEffects"] and effects["FixedEffects"]:
+ raise ValueError("Cannot use both FixedEffects and EntityEffects")
+ self._entity_effect = effects["EntityEffects"] or effects["FixedEffects"]
+ self._time_effect = effects["TimeEffects"]
cln_formula = mod_descr.describe()
- self._lhs, self._rhs = map(lambda s: s.strip(), cln_formula.split('~'))
- self._lhs = '0 + ' + self._lhs
+ self._lhs, self._rhs = map(lambda s: s.strip(), cln_formula.split("~"))
+ self._lhs = "0 + " + self._lhs
@property
def entity_effect(self):
@@ -115,14 +117,24 @@ def data(self):
@property
def dependent(self):
"""DataFrame containing the dependent variable"""
- return dmatrix(self._lhs, self._data.dataframe, eval_env=self._eval_env,
- return_type='dataframe', NA_action=self._na_action)
+ return dmatrix(
+ self._lhs,
+ self._data.dataframe,
+ eval_env=self._eval_env,
+ return_type="dataframe",
+ NA_action=self._na_action,
+ )
@property
def exog(self):
"""DataFrame containing the exogenous variables"""
- out = dmatrix(self._rhs, self._data.dataframe, eval_env=self._eval_env,
- return_type='dataframe', NA_action=self._na_action)
+ out = dmatrix(
+ self._rhs,
+ self._data.dataframe,
+ eval_env=self._eval_env,
+ return_type="dataframe",
+ NA_action=self._na_action,
+ )
return out
@@ -130,9 +142,15 @@ class AmbiguityError(Exception):
pass
-__all__ = ['PanelOLS', 'PooledOLS', 'RandomEffects', 'FirstDifferenceOLS',
- 'BetweenOLS', 'AmbiguityError',
- 'FamaMacBeth']
+__all__ = [
+ "PanelOLS",
+ "PooledOLS",
+ "RandomEffects",
+ "FirstDifferenceOLS",
+ "BetweenOLS",
+ "AmbiguityError",
+ "FamaMacBeth",
+]
# Likely
@@ -150,11 +168,11 @@ class PooledOLS(object):
Parameters
----------
- dependent : array-like
+ dependent : array_like
Dependent (left-hand-side) variable (time by entity)
- exog : array-like
+ exog : array_like
Exogenous or right-hand-side variables (variable by time by entity).
- weights : array-like, optional
+ weights : array_like, optional
Weights to use in estimation. Assumes residual variance is
proportional to inverse of weight to that the residual time
the weight should be homoskedastic.
@@ -169,8 +187,8 @@ class PooledOLS(object):
"""
def __init__(self, dependent, exog, *, weights=None):
- self.dependent = PanelData(dependent, 'Dep')
- self.exog = PanelData(exog, 'Exog')
+ self.dependent = PanelData(dependent, "Dep")
+ self.exog = PanelData(exog, "Exog")
self._original_shape = self.dependent.shape
self._constant = None
self._formula = None
@@ -178,21 +196,28 @@ def __init__(self, dependent, exog, *, weights=None):
self._name = self.__class__.__name__
self.weights = self._adapt_weights(weights)
self._not_null = np.ones(self.dependent.values2d.shape[0], dtype=np.bool)
- self._cov_estimators = CovarianceManager(self.__class__.__name__, HomoskedasticCovariance,
- HeteroskedasticCovariance, ClusteredCovariance,
- DriscollKraay, ACCovariance)
+ self._cov_estimators = CovarianceManager(
+ self.__class__.__name__,
+ HomoskedasticCovariance,
+ HeteroskedasticCovariance,
+ ClusteredCovariance,
+ DriscollKraay,
+ ACCovariance,
+ )
self._original_index = self.dependent.index.copy()
self._validate_data()
self._singleton_index = None
def __str__(self):
- out = '{name} \nNum exog: {num_exog}, Constant: {has_constant}'
- return out.format(name=self.__class__.__name__,
- num_exog=self.exog.dataframe.shape[1],
- has_constant=self.has_constant)
+ out = "{name} \nNum exog: {num_exog}, Constant: {has_constant}"
+ return out.format(
+ name=self.__class__.__name__,
+ num_exog=self.exog.dataframe.shape[1],
+ has_constant=self.has_constant,
+ )
def __repr__(self):
- return self.__str__() + '\nid: ' + str(hex(id(self)))
+ return self.__str__() + "\nid: " + str(hex(id(self)))
def reformat_clusters(self, clusters):
"""
@@ -200,12 +225,12 @@ def reformat_clusters(self, clusters):
Parameters
----------
- clusters : array-like
+ clusters : array_like
Values to use for variance clustering
Returns
-------
- reformatted : PanelData
+ PanelData
Original data with matching axis and observation dropped where
missing in the model data.
@@ -213,20 +238,24 @@ def reformat_clusters(self, clusters):
-----
This is exposed for testing and is not normally needed for estimation
"""
- clusters = PanelData(clusters, var_name='cov.cluster', convert_dummies=False)
+ clusters = PanelData(clusters, var_name="cov.cluster", convert_dummies=False)
if clusters.shape[1:] != self._original_shape[1:]:
- raise ValueError('clusters must have the same number of entities '
- 'and time periods as the model data.')
+ raise ValueError(
+ "clusters must have the same number of entities "
+ "and time periods as the model data."
+ )
clusters.drop(~self.not_null)
return clusters
def _info(self):
"""Information about panel structure"""
- entity_info = panel_structure_stats(self.dependent.entity_ids.squeeze(),
- 'Observations per entity')
- time_info = panel_structure_stats(self.dependent.time_ids.squeeze(),
- 'Observations per time period')
+ entity_info = panel_structure_stats(
+ self.dependent.entity_ids.squeeze(), "Observations per entity"
+ )
+ time_info = panel_structure_stats(
+ self.dependent.time_ids.squeeze(), "Observations per time period"
+ )
other_info = None
return entity_info, time_info, other_info
@@ -237,11 +266,10 @@ def _adapt_weights(self, weights):
self._is_weighted = False
frame = self.dependent.dataframe.copy()
frame.iloc[:, :] = 1
- frame.columns = ['weight']
+ frame.columns = ["weight"]
return PanelData(frame)
- frame = pd.DataFrame(columns=self.dependent.entities,
- index=self.dependent.time)
+ frame = pd.DataFrame(columns=self.dependent.entities, index=self.dependent.time)
nobs, nentity = self.exog.nobs, self.exog.nentity
if weights.ndim == 3 or weights.shape == (nobs, nentity):
@@ -249,11 +277,15 @@ def _adapt_weights(self, weights):
weights = np.squeeze(weights)
if weights.shape[0] == nobs and nobs == nentity:
- raise AmbiguityError('Unable to distinguish nobs form nentity since they are '
- 'equal. You must use an 2-d array to avoid ambiguity.')
- if (isinstance(weights, (pd.Series, pd.DataFrame)) and
- isinstance(weights.index, pd.MultiIndex) and
- weights.shape[0] == self.dependent.dataframe.shape[0]):
+ raise AmbiguityError(
+ "Unable to distinguish nobs form nentity since they are "
+ "equal. You must use an 2-d array to avoid ambiguity."
+ )
+ if (
+ isinstance(weights, (pd.Series, pd.DataFrame))
+ and isinstance(weights.index, pd.MultiIndex)
+ and weights.shape[0] == self.dependent.dataframe.shape[0]
+ ):
frame = weights
elif weights.shape[0] == nobs:
weights = np.asarray(weights)[:, None]
@@ -267,14 +299,14 @@ def _adapt_weights(self, weights):
frame = self.dependent.dataframe.copy()
frame.iloc[:, :] = weights[:, None]
else:
- raise ValueError('Weights do not have a supported shape.')
+ raise ValueError("Weights do not have a supported shape.")
return PanelData(frame)
def _check_exog_rank(self):
x = self.exog.values2d
rank_of_x = matrix_rank(x)
if rank_of_x < x.shape[1]:
- raise ValueError('exog does not have full column rank.')
+ raise ValueError("exog does not have full column rank.")
return rank_of_x
def _validate_data(self):
@@ -283,16 +315,20 @@ def _validate_data(self):
x = self._x = self.exog.values2d
w = self._w = self.weights.values2d
if y.shape[0] != x.shape[0]:
- raise ValueError('dependent and exog must have the same number of '
- 'observations.')
+ raise ValueError(
+ "dependent and exog must have the same number of " "observations."
+ )
if y.shape[0] != w.shape[0]:
- raise ValueError('weights must have the same number of '
- 'observations as dependent.')
+ raise ValueError(
+ "weights must have the same number of " "observations as dependent."
+ )
all_missing = np.any(np.isnan(y), axis=1) & np.all(np.isnan(x), axis=1)
- missing = (np.any(np.isnan(y), axis=1) |
- np.any(np.isnan(x), axis=1) |
- np.any(np.isnan(w), axis=1))
+ missing = (
+ np.any(np.isnan(y), axis=1)
+ | np.any(np.isnan(x), axis=1)
+ | np.any(np.isnan(w), axis=1)
+ )
missing_warning(all_missing ^ missing)
if np.any(missing):
@@ -305,7 +341,7 @@ def _validate_data(self):
w = self.weights.dataframe
if np.any(np.asarray(w) <= 0):
- raise ValueError('weights must be strictly positive.')
+ raise ValueError("weights must be strictly positive.")
w = w / w.mean()
self.weights = PanelData(w)
rank_of_x = self._check_exog_rank()
@@ -329,11 +365,10 @@ def _f_statistic(self, weps, y, x, root_w, df_resid):
"""Compute model F-statistic"""
weps_const = y
num_df = x.shape[1]
- name = 'Model F-statistic (homoskedastic)'
+ name = "Model F-statistic (homoskedastic)"
if self.has_constant:
if num_df == 1:
- return InvalidTestStatistic('Model contains only a constant',
- name=name)
+ return InvalidTestStatistic("Model contains only a constant", name=name)
num_df -= 1
weps_const = y - float((root_w.T @ y) / (root_w.T @ root_w))
@@ -343,17 +378,21 @@ def _f_statistic(self, weps, y, x, root_w, df_resid):
denom = resid_ss
denom_df = df_resid
stat = float((num / num_df) / (denom / denom_df))
- return WaldTestStatistic(stat, null='All parameters ex. constant not zero',
- df=num_df, df_denom=denom_df, name=name)
+ return WaldTestStatistic(
+ stat,
+ null="All parameters ex. constant not zero",
+ df=num_df,
+ df_denom=denom_df,
+ name=name,
+ )
def _f_statistic_robust(self, params, cov_est, debiased, df_resid):
"""Compute Wald test that all parameters are 0, ex. constant"""
sel = np.ones(params.shape[0], dtype=np.bool)
- name = 'Model F-statistic (robust)'
+ name = "Model F-statistic (robust)"
def invalid_f():
- return InvalidTestStatistic('Model contains only a constant',
- name=name)
+ return InvalidTestStatistic("Model contains only a constant", name=name)
if self.has_constant:
if len(sel) == 1:
@@ -366,11 +405,10 @@ def deferred_f():
test_stat = test_params.T @ np.linalg.inv(test_cov) @ test_params
test_stat = float(test_stat)
df = sel.sum()
- null = 'All parameters ex. constant not zero'
+ null = "All parameters ex. constant not zero"
if debiased:
- wald = WaldTestStatistic(test_stat / df, null, df, df_resid,
- name=name)
+ wald = WaldTestStatistic(test_stat / df, null, df, df_resid, name=name)
else:
wald = WaldTestStatistic(test_stat, null, df, name=name)
return wald
@@ -380,10 +418,10 @@ def deferred_f():
def _prepare_between(self):
"""Prepare values for between estimation of R2"""
weights = self.weights if self._is_weighted else None
- y = self.dependent.mean('entity', weights=weights).values
- x = self.exog.mean('entity', weights=weights).values
+ y = self.dependent.mean("entity", weights=weights).values
+ x = self.exog.mean("entity", weights=weights).values
# Weight transformation
- wcount, wmean = self.weights.count('entity'), self.weights.mean('entity')
+ wcount, wmean = self.weights.count("entity"), self.weights.mean("entity")
wsum = wcount * wmean
w = wsum.values
w = w / w.mean()
@@ -435,9 +473,8 @@ def _rsquared(self, params, reweight=False):
# R2 - Within
#############################################
weights = self.weights if self._is_weighted else None
- wy = self.dependent.demean('entity', weights=weights,
- return_panel=False)
- wx = self.exog.demean('entity', weights=weights, return_panel=False)
+ wy = self.dependent.demean("entity", weights=weights, return_panel=False)
+ wx = self.exog.demean("entity", weights=weights, return_panel=False)
weps = wy - wx @ params
residual_ss = float(weps.T @ weps)
total_ss = float(wy.T @ wy)
@@ -453,21 +490,38 @@ def _postestimation(self, params, cov, debiased, df_resid, weps, y, x, root_w):
deferred_f = self._f_statistic_robust(params, cov, debiased, df_resid)
f_stat = self._f_statistic(weps, y, x, root_w, df_resid)
r2o, r2w, r2b = self._rsquared(params)
- f_pooled = InapplicableTestStatistic(reason='Model has no effects',
- name='Pooled F-stat')
+ f_pooled = InapplicableTestStatistic(
+ reason="Model has no effects", name="Pooled F-stat"
+ )
entity_info, time_info, other_info = self._info()
nobs = weps.shape[0]
sigma2 = float(weps.T @ weps / nobs)
loglik = -0.5 * nobs * (np.log(2 * np.pi) + np.log(sigma2) + 1)
- res = AttrDict(params=params, deferred_cov=cov.deferred_cov,
- deferred_f=deferred_f, f_stat=f_stat,
- debiased=debiased, name=self._name, var_names=self.exog.vars,
- r2w=r2w, r2b=r2b, r2=r2w, r2o=r2o, s2=cov.s2,
- model=self, cov_type=cov.name, index=self.dependent.index,
- entity_info=entity_info, time_info=time_info, other_info=other_info,
- f_pooled=f_pooled, loglik=loglik, not_null=self._not_null,
- original_index=self._original_index)
+ res = AttrDict(
+ params=params,
+ deferred_cov=cov.deferred_cov,
+ deferred_f=deferred_f,
+ f_stat=f_stat,
+ debiased=debiased,
+ name=self._name,
+ var_names=self.exog.vars,
+ r2w=r2w,
+ r2b=r2b,
+ r2=r2w,
+ r2o=r2o,
+ s2=cov.s2,
+ model=self,
+ cov_type=cov.name,
+ index=self.dependent.index,
+ entity_info=entity_info,
+ time_info=time_info,
+ other_info=other_info,
+ f_pooled=f_pooled,
+ loglik=loglik,
+ not_null=self._not_null,
+ original_index=self._original_index,
+ )
return res
@property
@@ -484,18 +538,18 @@ def from_formula(cls, formula, data, *, weights=None):
----------
formula : str
Formula to transform into model. Conforms to patsy formula rules.
- data : array-like
+ data : array_like
Data structure that can be coerced into a PanelData. In most
cases, this should be a multi-index DataFrame where the level 0
index contains the entities and the level 1 contains the time.
- weights: array-like, optional
+ weights: array_like, optional
Weights to use in estimation. Assumes residual variance is
proportional to inverse of weight to that the residual times
the weight should be homoskedastic.
Returns
-------
- model : PooledOLS
+ PooledOLS
Model specified using the formula
Notes
@@ -518,12 +572,12 @@ def from_formula(cls, formula, data, *, weights=None):
def _choose_cov(self, cov_type, **cov_config):
cov_est = self._cov_estimators[cov_type]
- if cov_type != 'clustered':
+ if cov_type != "clustered":
return cov_est, cov_config
cov_config_upd = {k: v for k, v in cov_config.items()}
- clusters = cov_config.get('clusters', None)
+ clusters = cov_config.get("clusters", None)
if clusters is not None:
clusters = self.reformat_clusters(clusters).copy()
for col in clusters.dataframe:
@@ -531,25 +585,21 @@ def _choose_cov(self, cov_type, **cov_config):
clusters.dataframe[col] = cat.codes.astype(np.int64)
clusters = clusters.dataframe
- cluster_entity = cov_config_upd.pop('cluster_entity', False)
+ cluster_entity = cov_config_upd.pop("cluster_entity", False)
if cluster_entity:
group_ids = self.dependent.entity_ids.squeeze()
- name = 'cov.cluster.entity'
- group_ids = pd.Series(group_ids,
- index=self.dependent.index,
- name=name)
+ name = "cov.cluster.entity"
+ group_ids = pd.Series(group_ids, index=self.dependent.index, name=name)
if clusters is not None:
clusters[name] = group_ids
else:
clusters = pd.DataFrame(group_ids)
- cluster_time = cov_config_upd.pop('cluster_time', False)
+ cluster_time = cov_config_upd.pop("cluster_time", False)
if cluster_time:
group_ids = self.dependent.time_ids.squeeze()
- name = 'cov.cluster.time'
- group_ids = pd.Series(group_ids,
- index=self.dependent.index,
- name=name)
+ name = "cov.cluster.time"
+ group_ids = pd.Series(group_ids, index=self.dependent.index, name=name)
if clusters is not None:
clusters[name] = group_ids
else:
@@ -557,11 +607,13 @@ def _choose_cov(self, cov_type, **cov_config):
if self._singleton_index is not None and clusters is not None:
clusters = clusters.loc[~self._singleton_index]
- cov_config_upd['clusters'] = np.asarray(clusters) if clusters is not None else clusters
+ cov_config_upd["clusters"] = (
+ np.asarray(clusters) if clusters is not None else clusters
+ )
return cov_est, cov_config_upd
- def fit(self, *, cov_type='unadjusted', debiased=True, **cov_config):
+ def fit(self, *, cov_type="unadjusted", debiased=True, **cov_config):
"""
Estimate model parameters
@@ -577,7 +629,7 @@ def fit(self, *, cov_type='unadjusted', debiased=True, **cov_config):
Returns
-------
- results : PanelResults
+ PanelResults
Estimation results
Examples
@@ -623,14 +675,23 @@ def fit(self, *, cov_type='unadjusted', debiased=True, **cov_config):
df_model = x.shape[1]
df_resid = nobs - df_model
cov_est, cov_config = self._choose_cov(cov_type, **cov_config)
- cov = cov_est(wy, wx, params, self.dependent.entity_ids, self.dependent.time_ids,
- debiased=debiased, **cov_config)
+ cov = cov_est(
+ wy,
+ wx,
+ params,
+ self.dependent.entity_ids,
+ self.dependent.time_ids,
+ debiased=debiased,
+ **cov_config
+ )
weps = wy - wx @ params
index = self.dependent.index
- fitted = pd.DataFrame(x @ params, index, ['fitted_values'])
- effects = pd.DataFrame(np.full_like(fitted.values, np.nan), index, ['estimated_effects'])
+ fitted = pd.DataFrame(x @ params, index, ["fitted_values"])
+ effects = pd.DataFrame(
+ np.full_like(fitted.values, np.nan), index, ["estimated_effects"]
+ )
eps = y - fitted.values
- idiosyncratic = pd.DataFrame(eps, index, ['idiosyncratic'])
+ idiosyncratic = pd.DataFrame(eps, index, ["idiosyncratic"])
residual_ss = float(weps.T @ weps)
e = y
if self._constant:
@@ -639,11 +700,25 @@ def fit(self, *, cov_type='unadjusted', debiased=True, **cov_config):
total_ss = float(w.T @ (e ** 2))
r2 = 1 - residual_ss / total_ss
- res = self._postestimation(params, cov, debiased, df_resid, weps, wy, wx, root_w)
- res.update(dict(df_resid=df_resid, df_model=df_model, nobs=y.shape[0],
- residual_ss=residual_ss, total_ss=total_ss, r2=r2, wresids=weps,
- resids=eps, index=self.dependent.index, fitted=fitted, effects=effects,
- idiosyncratic=idiosyncratic))
+ res = self._postestimation(
+ params, cov, debiased, df_resid, weps, wy, wx, root_w
+ )
+ res.update(
+ dict(
+ df_resid=df_resid,
+ df_model=df_model,
+ nobs=y.shape[0],
+ residual_ss=residual_ss,
+ total_ss=total_ss,
+ r2=r2,
+ wresids=weps,
+ resids=eps,
+ index=self.dependent.index,
+ fitted=fitted,
+ effects=effects,
+ idiosyncratic=idiosyncratic,
+ )
+ )
return PanelResults(res)
@@ -653,9 +728,9 @@ def predict(self, params, *, exog=None, data=None, eval_env=4):
Parameters
----------
- params : array-like
+ params : array_like
Model parameters (nvar by 1)
- exog : array-like
+ exog : array_like
Exogenous regressors (nobs by nvar)
data : DataFrame
Values to use when making predictions from a model constructed
@@ -665,7 +740,7 @@ def predict(self, params, *, exog=None, data=None, eval_env=4):
Returns
-------
- predictions : DataFrame
+ DataFrame
Fitted values from supplied data and parameters
Notes
@@ -678,11 +753,14 @@ def predict(self, params, *, exog=None, data=None, eval_env=4):
values corresponding to the original model specification.
"""
if data is not None and self.formula is None:
- raise ValueError('Unable to use data when the model was not '
- 'created using a formula.')
+ raise ValueError(
+ "Unable to use data when the model was not " "created using a formula."
+ )
if data is not None and exog is not None:
- raise ValueError('Predictions can only be constructed using one '
- 'of exog or data, but not both.')
+ raise ValueError(
+ "Predictions can only be constructed using one "
+ "of exog or data, but not both."
+ )
if exog is not None:
exog = PanelData(exog).dataframe
else:
@@ -692,7 +770,7 @@ def predict(self, params, *, exog=None, data=None, eval_env=4):
params = np.atleast_2d(np.asarray(params))
if params.shape[0] == 1:
params = params.T
- pred = pd.DataFrame(x @ params, index=exog.index, columns=['predictions'])
+ pred = pd.DataFrame(x @ params, index=exog.index, columns=["predictions"])
return pred
@@ -703,11 +781,11 @@ class PanelOLS(PooledOLS):
Parameters
----------
- dependent : array-like
+ dependent : array_like
Dependent (left-hand-side) variable (time by entity).
- exog : array-like
+ exog : array_like
Exogenous or right-hand-side variables (variable by time by entity).
- weights : array-like, optional
+ weights : array_like, optional
Weights to use in estimation. Assumes residual variance is
proportional to inverse of weight to that the residual time
the weight should be homoskedastic.
@@ -715,7 +793,7 @@ class PanelOLS(PooledOLS):
Flag whether to include entity (fixed) effects in the model
time_effects : bool, optional
Flag whether to include time effects in the model
- other_effects : array-like, optional
+ other_effects : array_like, optional
Category codes to use for any effects that are not entity or time
effects. Each variable is treated as an effect.
singletons : bool, optional
@@ -757,8 +835,18 @@ class PanelOLS(PooledOLS):
2 other.
"""
- def __init__(self, dependent, exog, *, weights=None, entity_effects=False, time_effects=False,
- other_effects=None, singletons=True, drop_absorbed=False):
+ def __init__(
+ self,
+ dependent,
+ exog,
+ *,
+ weights=None,
+ entity_effects=False,
+ time_effects=False,
+ other_effects=None,
+ singletons=True,
+ drop_absorbed=False
+ ):
super(PanelOLS, self).__init__(dependent, exog, weights=weights)
self._entity_effects = entity_effects
@@ -795,9 +883,12 @@ def _drop_singletons(self):
return
import warnings as warn
+
nobs = retain.shape[0]
ndropped = nobs - retain.sum()
- warn.warn('{0} singleton observations dropped'.format(ndropped), SingletonWarning)
+ warn.warn(
+ "{0} singleton observations dropped".format(ndropped), SingletonWarning
+ )
drop = ~retain
self._singleton_index = drop
self.dependent.drop(drop)
@@ -810,11 +901,15 @@ def _drop_singletons(self):
def __str__(self):
out = super(PanelOLS, self).__str__()
- additional = '\nEntity Effects: {ee}, Time Effects: {te}, Num Other Effects: {oe}'
+ additional = (
+ "\nEntity Effects: {ee}, Time Effects: {te}, Num Other Effects: {oe}"
+ )
oe = 0
if self.other_effects:
oe = self._other_effect_cats.nvar
- additional = additional.format(ee=self.entity_effects, te=self.time_effects, oe=oe)
+ additional = additional.format(
+ ee=self.entity_effects, te=self.time_effects, oe=oe
+ )
out += additional
return out
@@ -822,16 +917,17 @@ def _validate_effects(self, effects):
"""Check model effects"""
if effects is None:
return False
- effects = PanelData(effects, var_name='OtherEffect',
- convert_dummies=False)
+ effects = PanelData(effects, var_name="OtherEffect", convert_dummies=False)
if effects.shape[1:] != self._original_shape[1:]:
- raise ValueError('other_effects must have the same number of '
- 'entities and time periods as dependent.')
+ raise ValueError(
+ "other_effects must have the same number of "
+ "entities and time periods as dependent."
+ )
num_effects = effects.nvar
if num_effects + self.entity_effects + self.time_effects > 2:
- raise ValueError('At most two effects supported.')
+ raise ValueError("At most two effects supported.")
cats = {}
effects_frame = effects.dataframe
for col in effects_frame:
@@ -847,18 +943,20 @@ def _validate_effects(self, effects):
if cats.shape[1] == 2:
nested = self._is_effect_nested(cats[:, [0]], cats[:, [1]])
nested |= self._is_effect_nested(cats[:, [1]], cats[:, [0]])
- nesting_effect = 'other effects'
+ nesting_effect = "other effects"
elif self.entity_effects:
nested = self._is_effect_nested(cats[:, [0]], self.dependent.entity_ids)
nested |= self._is_effect_nested(self.dependent.entity_ids, cats[:, [0]])
- nesting_effect = 'entity effects'
+ nesting_effect = "entity effects"
elif self.time_effects:
nested = self._is_effect_nested(cats[:, [0]], self.dependent.time_ids)
nested |= self._is_effect_nested(self.dependent.time_ids, cats[:, [0]])
- nesting_effect = 'time effects'
+ nesting_effect = "time effects"
if nested:
- raise ValueError('Included other effects nest or are nested '
- 'by {effect}'.format(effect=nesting_effect))
+ raise ValueError(
+ "Included other effects nest or are nested "
+ "by {effect}".format(effect=nesting_effect)
+ )
return True
@@ -878,8 +976,16 @@ def other_effects(self):
return self._other_effects
@classmethod
- def from_formula(cls, formula, data, *, weights=None, other_effects=None,
- singletons=True, drop_absorbed=False):
+ def from_formula(
+ cls,
+ formula,
+ data,
+ *,
+ weights=None,
+ other_effects=None,
+ singletons=True,
+ drop_absorbed=False
+ ):
"""
Create a model from a formula
@@ -890,15 +996,15 @@ def from_formula(cls, formula, data, *, weights=None, other_effects=None,
with two special variable names, EntityEffects and TimeEffects
which can be used to specify that the model should contain an
entity effect or a time effect, respectively. See Examples.
- data : array-like
+ data : array_like
Data structure that can be coerced into a PanelData. In most
cases, this should be a multi-index DataFrame where the level 0
index contains the entities and the level 1 contains the time.
- weights: array-like
+ weights: array_like
Weights to use in estimation. Assumes residual variance is
proportional to inverse of weight to that the residual time
the weight should be homoskedastic.
- other_effects : array-like, optional
+ other_effects : array_like, optional
Category codes to use for any effects that are not entity or time
effects. Each variable is treated as an effect.
singletons : bool, optional
@@ -910,7 +1016,7 @@ def from_formula(cls, formula, data, *, weights=None, other_effects=None,
Returns
-------
- model : PanelOLS
+ PanelOLS
Model specified using the formula
Examples
@@ -923,9 +1029,16 @@ def from_formula(cls, formula, data, *, weights=None, other_effects=None,
entity_effect = parser.entity_effect
time_effect = parser.time_effect
dependent, exog = parser.data
- mod = cls(dependent, exog, entity_effects=entity_effect, time_effects=time_effect,
- weights=weights, other_effects=other_effects, singletons=singletons,
- drop_absorbed=drop_absorbed)
+ mod = cls(
+ dependent,
+ exog,
+ entity_effects=entity_effect,
+ time_effects=time_effect,
+ weights=weights,
+ other_effects=other_effects,
+ singletons=singletons,
+ drop_absorbed=drop_absorbed,
+ )
mod.formula = formula
return mod
@@ -972,7 +1085,7 @@ def _lsmr_path(self):
wy_mean = csc_matrix(wy_mean)
# Purge fitted, weighted values
- sp_cond = diags(cond, format='csc')
+ sp_cond = diags(cond, format="csc")
wx = wx - (wd @ sp_cond @ wx_mean).A
wy = wy - (wd @ sp_cond @ wy_mean).A
@@ -1001,15 +1114,17 @@ def _slow_path(self):
drop_first = self._constant
d = []
if self.entity_effects:
- d.append(self.dependent.dummies('entity', drop_first=drop_first).values)
+ d.append(self.dependent.dummies("entity", drop_first=drop_first).values)
drop_first = True
if self.time_effects:
- d.append(self.dependent.dummies('time', drop_first=drop_first).values)
+ d.append(self.dependent.dummies("time", drop_first=drop_first).values)
drop_first = True
if self.other_effects:
oe = self._other_effect_cats.dataframe
for c in oe:
- dummies = pd.get_dummies(oe[c], drop_first=drop_first).astype(np.float64)
+ dummies = pd.get_dummies(oe[c], drop_first=drop_first).astype(
+ np.float64
+ )
d.append(dummies.values)
drop_first = True
@@ -1046,10 +1161,14 @@ def _choose_twoway_algo(self):
low_memory = reg_size > 2 ** 10
if low_memory:
import warnings
- warnings.warn('Using low-memory algorithm to estimate two-way model. Explicitly set '
- 'low_memory=True to silence this message. Set low_memory=False to use '
- 'the standard algorithm that creates dummy variables for the smaller of '
- 'the number of entities or number of time periods.', MemoryWarning)
+
+ warnings.warn(
+ "Using low-memory algorithm to estimate two-way model. Explicitly set "
+ "low_memory=True to silence this message. Set low_memory=False to use "
+ "the standard algorithm that creates dummy variables for the smaller of "
+ "the number of entities or number of time periods.",
+ MemoryWarning,
+ )
return low_memory
def _fast_path(self, low_memory):
@@ -1075,19 +1194,19 @@ def _fast_path(self, low_memory):
effect = self.dependent.entity_ids
else:
effect = self.dependent.time_ids
- col = ensure_unique_column('additional.effect', groups.dataframe)
+ col = ensure_unique_column("additional.effect", groups.dataframe)
groups.dataframe[col] = effect
y = y.general_demean(groups)
x = x.general_demean(groups)
elif self.entity_effects and self.time_effects:
- y = y.demean('both', low_memory=low_memory)
- x = x.demean('both', low_memory=low_memory)
+ y = y.demean("both", low_memory=low_memory)
+ x = x.demean("both", low_memory=low_memory)
elif self.entity_effects:
- y = y.demean('entity')
- x = x.demean('entity')
+ y = y.demean("entity")
+ x = x.demean("entity")
else: # self.time_effects
- y = y.demean('time')
- x = x.demean('time')
+ y = y.demean("time")
+ x = x.demean("time")
y = y.values2d
x = x.values2d
@@ -1127,19 +1246,19 @@ def _weighted_fast_path(self, low_memory):
effect = self.dependent.entity_ids
else:
effect = self.dependent.time_ids
- col = ensure_unique_column('additional.effect', groups.dataframe)
+ col = ensure_unique_column("additional.effect", groups.dataframe)
groups.dataframe[col] = effect
wy = y.general_demean(groups, weights=self.weights)
wx = x.general_demean(groups, weights=self.weights)
elif self.entity_effects and self.time_effects:
- wy = y.demean('both', weights=self.weights, low_memory=low_memory)
- wx = x.demean('both', weights=self.weights, low_memory=low_memory)
+ wy = y.demean("both", weights=self.weights, low_memory=low_memory)
+ wx = x.demean("both", weights=self.weights, low_memory=low_memory)
elif self.entity_effects:
- wy = y.demean('entity', weights=self.weights)
- wx = x.demean('entity', weights=self.weights)
+ wy = y.demean("entity", weights=self.weights)
+ wx = x.demean("entity", weights=self.weights)
else: # self.time_effects
- wy = y.demean('time', weights=self.weights)
- wx = x.demean('time', weights=self.weights)
+ wy = y.demean("time", weights=self.weights)
+ wx = x.demean("time", weights=self.weights)
wy = wy.values2d
wx = wx.values2d
@@ -1164,8 +1283,10 @@ def _info(self):
other_info = []
oe = self._other_effect_cats.dataframe
for c in oe:
- name = 'Observations per group (' + str(c) + ')'
- other_info.append(panel_structure_stats(oe[c].values.astype(np.int32), name))
+ name = "Observations per group (" + str(c) + ")"
+ other_info.append(
+ panel_structure_stats(oe[c].values.astype(np.int32), name)
+ )
other_info = pd.DataFrame(other_info)
return entity_info, time_info, other_info
@@ -1185,13 +1306,13 @@ def _is_effect_nested(effects, clusters):
return np.all(is_nested)
def _determine_df_adjustment(self, cov_type, **cov_config):
- if cov_type != 'clustered' or not self._has_effect:
+ if cov_type != "clustered" or not self._has_effect:
return True
num_effects = self.entity_effects + self.time_effects
if self.other_effects:
num_effects += self._other_effect_cats.shape[1]
- clusters = cov_config.get('clusters', None)
+ clusters = cov_config.get("clusters", None)
if clusters is None: # No clusters
return True
@@ -1200,8 +1321,18 @@ def _determine_df_adjustment(self, cov_type, **cov_config):
return not self._is_effect_nested(effects, clusters)
return True # Default case for 2-way -- not completely clear
- def fit(self, *, use_lsdv=False, use_lsmr=False, low_memory=None, cov_type='unadjusted',
- debiased=True, auto_df=True, count_effects=True, **cov_config):
+ def fit(
+ self,
+ *,
+ use_lsdv=False,
+ use_lsmr=False,
+ low_memory=None,
+ cov_type="unadjusted",
+ debiased=True,
+ auto_df=True,
+ count_effects=True,
+ **cov_config
+ ):
"""
Estimate model parameters
@@ -1240,7 +1371,7 @@ def fit(self, *, use_lsdv=False, use_lsmr=False, low_memory=None, cov_type='unad
Returns
-------
- results : PanelEffectsResults
+ PanelEffectsResults
Estimation results
Examples
@@ -1281,13 +1412,17 @@ def fit(self, *, use_lsdv=False, use_lsmr=False, low_memory=None, cov_type='unad
elif use_lsdv:
y, x, ybar, y_effects, x_effects = self._slow_path()
else:
- low_memory = self._choose_twoway_algo() if low_memory is None else low_memory
+ low_memory = (
+ self._choose_twoway_algo() if low_memory is None else low_memory
+ )
if not weighted:
y, x, ybar = self._fast_path(low_memory=low_memory)
y_effects = 0.0
x_effects = np.zeros(x.shape[1])
else:
- y, x, ybar, y_effects, x_effects = self._weighted_fast_path(low_memory=low_memory)
+ y, x, ybar, y_effects, x_effects = self._weighted_fast_path(
+ low_memory=low_memory
+ )
neffects = 0
drop_first = self.has_constant
@@ -1310,10 +1445,13 @@ def fit(self, *, use_lsdv=False, use_lsmr=False, low_memory=None, cov_type='unad
retain = not_absorbed(x)
if len(retain) != x.shape[1]:
drop = set(range(x.shape[1])).difference(retain)
- dropped = ', '.join([self.exog.vars[i] for i in drop])
+ dropped = ", ".join([self.exog.vars[i] for i in drop])
import warnings
- warnings.warn(absorbing_warn_msg.format(absorbed_variables=dropped),
- AbsorbingEffectWarning)
+
+ warnings.warn(
+ absorbing_warn_msg.format(absorbed_variables=dropped),
+ AbsorbingEffectWarning,
+ )
x = x[:, retain]
# Adjust exog
self.exog = PanelData(self.exog.dataframe.iloc[:, retain])
@@ -1329,8 +1467,16 @@ def fit(self, *, use_lsdv=False, use_lsmr=False, low_memory=None, cov_type='unad
count_effects = self._determine_df_adjustment(cov_type, **cov_config)
extra_df = neffects if count_effects else 0
- cov = cov_est(y, x, params, self.dependent.entity_ids, self.dependent.time_ids,
- debiased=debiased, extra_df=extra_df, **cov_config)
+ cov = cov_est(
+ y,
+ x,
+ params,
+ self.dependent.entity_ids,
+ self.dependent.time_ids,
+ debiased=debiased,
+ extra_df=extra_df,
+ **cov_config
+ )
weps = y - x @ params
eps = weps
_y = self.dependent.values2d
@@ -1342,8 +1488,8 @@ def fit(self, *, use_lsdv=False, use_lsmr=False, low_memory=None, cov_type='unad
w = self.weights.values2d
eps -= (w * eps).sum() / w.sum()
index = self.dependent.index
- fitted = pd.DataFrame(_x @ params, index, ['fitted_values'])
- idiosyncratic = pd.DataFrame(eps, index, ['idiosyncratic'])
+ fitted = pd.DataFrame(_x @ params, index, ["fitted_values"])
+ idiosyncratic = pd.DataFrame(eps, index, ["idiosyncratic"])
eps_effects = _y - fitted.values
sigma2_tot = float(eps_effects.T @ eps_effects / nobs)
@@ -1362,7 +1508,12 @@ def fit(self, *, use_lsdv=False, use_lsmr=False, low_memory=None, cov_type='unad
root_w = np.sqrt(self.weights.values2d)
y_ex = root_w * self.dependent.values2d
mu_ex = 0
- if self.has_constant or self.entity_effects or self.time_effects or self.other_effects:
+ if (
+ self.has_constant
+ or self.entity_effects
+ or self.time_effects
+ or self.other_effects
+ ):
mu_ex = root_w * ((root_w.T @ y_ex) / (root_w.T @ root_w))
total_ss_ex_effect = float((y_ex - mu_ex).T @ (y_ex - mu_ex))
r2_ex_effects = 1 - resid_ss / total_ss_ex_effect
@@ -1385,22 +1536,48 @@ def fit(self, *, use_lsdv=False, use_lsmr=False, low_memory=None, cov_type='unad
denom = resid_ss / df_denom
stat = num / denom
- f_pooled = WaldTestStatistic(stat, 'Effects are zero',
- df_num, df_denom=df_denom,
- name='Pooled F-statistic')
+ f_pooled = WaldTestStatistic(
+ stat,
+ "Effects are zero",
+ df_num,
+ df_denom=df_denom,
+ name="Pooled F-statistic",
+ )
res.update(f_pooled=f_pooled)
- effects = pd.DataFrame(eps_effects - eps, columns=['estimated_effects'],
- index=self.dependent.index)
+ effects = pd.DataFrame(
+ eps_effects - eps,
+ columns=["estimated_effects"],
+ index=self.dependent.index,
+ )
else:
- effects = pd.DataFrame(np.zeros_like(eps), columns=['estimated_effects'],
- index=self.dependent.index)
-
- res.update(dict(df_resid=df_resid, df_model=df_model, nobs=y.shape[0],
- residual_ss=resid_ss, total_ss=total_ss, wresids=weps, resids=eps,
- r2=r2, entity_effects=self.entity_effects, time_effects=self.time_effects,
- other_effects=self.other_effects, sigma2_eps=sigma2_eps,
- sigma2_effects=sigma2_effects, rho=rho, r2_ex_effects=r2_ex_effects,
- effects=effects, fitted=fitted, idiosyncratic=idiosyncratic))
+ effects = pd.DataFrame(
+ np.zeros_like(eps),
+ columns=["estimated_effects"],
+ index=self.dependent.index,
+ )
+
+ res.update(
+ dict(
+ df_resid=df_resid,
+ df_model=df_model,
+ nobs=y.shape[0],
+ residual_ss=resid_ss,
+ total_ss=total_ss,
+ wresids=weps,
+ resids=eps,
+ r2=r2,
+ entity_effects=self.entity_effects,
+ time_effects=self.time_effects,
+ other_effects=self.other_effects,
+ sigma2_eps=sigma2_eps,
+ sigma2_effects=sigma2_effects,
+ rho=rho,
+ r2_ex_effects=r2_ex_effects,
+ effects=effects,
+ fitted=fitted,
+ idiosyncratic=idiosyncratic,
+ )
+ )
return PanelEffectsResults(res)
@@ -1411,11 +1588,11 @@ class BetweenOLS(PooledOLS):
Parameters
----------
- dependent : array-like
+ dependent : array_like
Dependent (left-hand-side) variable (time by entity)
- exog : array-like
+ exog : array_like
Exogenous or right-hand-side variables (variable by time by entity).
- weights : array-like, optional
+ weights : array_like, optional
Weights to use in estimation. Assumes residual variance is
proportional to inverse of weight to that the residual time
the weight should be homoskedastic.
@@ -1433,33 +1610,39 @@ class BetweenOLS(PooledOLS):
def __init__(self, dependent, exog, *, weights=None):
super(BetweenOLS, self).__init__(dependent, exog, weights=weights)
- self._cov_estimators = CovarianceManager(self.__class__.__name__, HomoskedasticCovariance,
- HeteroskedasticCovariance, ClusteredCovariance)
+ self._cov_estimators = CovarianceManager(
+ self.__class__.__name__,
+ HomoskedasticCovariance,
+ HeteroskedasticCovariance,
+ ClusteredCovariance,
+ )
def _choose_cov(self, cov_type, **cov_config):
"""Return covariance estimator reformat clusters"""
cov_est = self._cov_estimators[cov_type]
- if cov_type != 'clustered':
+ if cov_type != "clustered":
return cov_est, cov_config
cov_config_upd = {k: v for k, v in cov_config.items()}
- clusters = cov_config.get('clusters', None)
+ clusters = cov_config.get("clusters", None)
if clusters is not None:
clusters = self.reformat_clusters(clusters).copy()
cluster_max = np.nanmax(clusters.values3d, axis=1)
delta = cluster_max - np.nanmin(clusters.values3d, axis=1)
if np.any(delta != 0):
- raise ValueError('clusters must not vary within an entity')
+ raise ValueError("clusters must not vary within an entity")
index = clusters.panel.minor_axis
reindex = clusters.entities
clusters = pd.DataFrame(cluster_max.T, index=index, columns=clusters.vars)
clusters = clusters.loc[reindex].astype(np.int64)
- cov_config_upd['clusters'] = clusters
+ cov_config_upd["clusters"] = clusters
return cov_est, cov_config_upd
- def fit(self, *, reweight=False, cov_type='unadjusted', debiased=True, **cov_config):
+ def fit(
+ self, *, reweight=False, cov_type="unadjusted", debiased=True, **cov_config
+ ):
"""
Estimate model parameters
@@ -1479,7 +1662,7 @@ def fit(self, *, reweight=False, cov_type='unadjusted', debiased=True, **cov_con
Returns
-------
- results : PanelResults
+ PanelResults
Estimation results
Examples
@@ -1515,24 +1698,34 @@ def fit(self, *, reweight=False, cov_type='unadjusted', debiased=True, **cov_con
params = lstsq(wx, wy)[0]
df_resid = y.shape[0] - x.shape[1]
- df_model = x.shape[1],
+ df_model = (x.shape[1],)
nobs = y.shape[0]
cov_est, cov_config = self._choose_cov(cov_type, **cov_config)
- cov = cov_est(wy, wx, params, self.dependent.entity_ids, self.dependent.time_ids,
- debiased=debiased, **cov_config)
+ cov = cov_est(
+ wy,
+ wx,
+ params,
+ self.dependent.entity_ids,
+ self.dependent.time_ids,
+ debiased=debiased,
+ **cov_config
+ )
weps = wy - wx @ params
index = self.dependent.index
- fitted = pd.DataFrame(self.exog.values2d @ params, index, ['fitted_values'])
+ fitted = pd.DataFrame(self.exog.values2d @ params, index, ["fitted_values"])
eps = y - x @ params
- effects = pd.DataFrame(eps, self.dependent.entities, ['estimated_effects'])
+ effects = pd.DataFrame(eps, self.dependent.entities, ["estimated_effects"])
entities = fitted.index.levels[0][get_codes(fitted.index)[0]]
effects = effects.loc[entities]
effects.index = fitted.index
dep = self.dependent.dataframe
fitted = fitted.reindex(dep.index)
effects = effects.reindex(dep.index)
- idiosyncratic = pd.DataFrame(np.asarray(dep) - np.asarray(fitted) - np.asarray(effects),
- dep.index, ['idiosyncratic'])
+ idiosyncratic = pd.DataFrame(
+ np.asarray(dep) - np.asarray(fitted) - np.asarray(effects),
+ dep.index,
+ ["idiosyncratic"],
+ )
residual_ss = float(weps.T @ weps)
e = y
@@ -1542,11 +1735,25 @@ def fit(self, *, reweight=False, cov_type='unadjusted', debiased=True, **cov_con
total_ss = float(w.T @ (e ** 2))
r2 = 1 - residual_ss / total_ss
- res = self._postestimation(params, cov, debiased, df_resid, weps, wy, wx, root_w)
- res.update(dict(df_resid=df_resid, df_model=df_model, nobs=nobs,
- residual_ss=residual_ss, total_ss=total_ss, r2=r2, wresids=weps,
- resids=eps, index=self.dependent.entities, fitted=fitted, effects=effects,
- idiosyncratic=idiosyncratic))
+ res = self._postestimation(
+ params, cov, debiased, df_resid, weps, wy, wx, root_w
+ )
+ res.update(
+ dict(
+ df_resid=df_resid,
+ df_model=df_model,
+ nobs=nobs,
+ residual_ss=residual_ss,
+ total_ss=total_ss,
+ r2=r2,
+ wresids=weps,
+ resids=eps,
+ index=self.dependent.entities,
+ fitted=fitted,
+ effects=effects,
+ idiosyncratic=idiosyncratic,
+ )
+ )
return PanelResults(res)
@@ -1559,18 +1766,18 @@ def from_formula(cls, formula, data, *, weights=None):
----------
formula : str
Formula to transform into model. Conforms to patsy formula rules.
- data : array-like
+ data : array_like
Data structure that can be coerced into a PanelData. In most
cases, this should be a multi-index DataFrame where the level 0
index contains the entities and the level 1 contains the time.
- weights: array-like, optional
+ weights: array_like, optional
Weights to use in estimation. Assumes residual variance is
proportional to inverse of weight to that the residual times
the weight should be homoskedastic.
Returns
-------
- model : BetweenOLS
+ BetweenOLS
Model specified using the formula
Notes
@@ -1597,11 +1804,11 @@ class FirstDifferenceOLS(PooledOLS):
Parameters
----------
- dependent : array-like
+ dependent : array_like
Dependent (left-hand-side) variable (time by entity)
- exog : array-like
+ exog : array_like
Exogenous or right-hand-side variables (variable by time by entity).
- weights : array-like, optional
+ weights : array_like, optional
Weights to use in estimation. Assumes residual variance is
proportional to inverse of weight to that the residual time
the weight should be homoskedastic.
@@ -1618,51 +1825,60 @@ class FirstDifferenceOLS(PooledOLS):
def __init__(self, dependent, exog, *, weights=None):
super(FirstDifferenceOLS, self).__init__(dependent, exog, weights=weights)
if self._constant:
- raise ValueError('Constants are not allowed in first difference regressions.')
+ raise ValueError(
+ "Constants are not allowed in first difference regressions."
+ )
if self.dependent.nobs < 2:
- raise ValueError('Panel must have at least 2 time periods')
+ raise ValueError("Panel must have at least 2 time periods")
def _choose_cov(self, cov_type, **cov_config):
"""Return covariance estimator and reformat clusters"""
cov_est = self._cov_estimators[cov_type]
- if cov_type != 'clustered':
+ if cov_type != "clustered":
return cov_est, cov_config
cov_config_upd = {k: v for k, v in cov_config.items()}
- clusters = cov_config.get('clusters', None)
+ clusters = cov_config.get("clusters", None)
if clusters is not None:
clusters = self.reformat_clusters(clusters).copy()
fd = clusters.first_difference()
fd = fd.values2d
if np.any(fd.flat[np.isfinite(fd.flat)] != 0):
- raise ValueError('clusters must be identical for values used '
- 'to compute the first difference')
+ raise ValueError(
+ "clusters must be identical for values used "
+ "to compute the first difference"
+ )
clusters = clusters.dataframe.copy()
- cluster_entity = cov_config_upd.pop('cluster_entity', False)
+ cluster_entity = cov_config_upd.pop("cluster_entity", False)
if cluster_entity:
group_ids = self.dependent.entity_ids.squeeze()
- name = 'cov.cluster.entity'
- group_ids = pd.Series(group_ids,
- index=self.dependent.index,
- name=name)
+ name = "cov.cluster.entity"
+ group_ids = pd.Series(group_ids, index=self.dependent.index, name=name)
if clusters is not None:
clusters[name] = group_ids
else:
clusters = pd.DataFrame(group_ids)
clusters = PanelData(clusters)
values = clusters.values3d[:, 1:]
- clusters = panel_to_frame(values, clusters.panel.items, clusters.panel.major_axis[1:],
- clusters.panel.minor_axis, True)
+ clusters = panel_to_frame(
+ values,
+ clusters.panel.items,
+ clusters.panel.major_axis[1:],
+ clusters.panel.minor_axis,
+ True,
+ )
clusters = PanelData(clusters).dataframe
clusters = clusters.loc[self.dependent.first_difference().index]
clusters = clusters.astype(np.int64)
- cov_config_upd['clusters'] = clusters.values if clusters is not None else clusters
+ cov_config_upd["clusters"] = (
+ clusters.values if clusters is not None else clusters
+ )
return cov_est, cov_config_upd
- def fit(self, *, cov_type='unadjusted', debiased=True, **cov_config):
+ def fit(self, *, cov_type="unadjusted", debiased=True, **cov_config):
"""
Estimate model parameters
@@ -1678,7 +1894,7 @@ def fit(self, *, cov_type='unadjusted', debiased=True, **cov_config):
Returns
-------
- results : PanelResults
+ PanelResults
Estimation results
Examples
@@ -1728,9 +1944,14 @@ def fit(self, *, cov_type='unadjusted', debiased=True, **cov_config):
w = 1.0 / self.weights.values3d
w = w[:, :-1] + w[:, 1:]
w = 1.0 / w
- w = panel_to_frame(w, self.weights.panel.items, self.weights.panel.major_axis[1:],
- self.weights.panel.minor_axis, True)
- w = w.reindex(self.weights.index).dropna(how='any')
+ w = panel_to_frame(
+ w,
+ self.weights.panel.items,
+ self.weights.panel.major_axis[1:],
+ self.weights.panel.minor_axis,
+ True,
+ )
+ w = w.reindex(self.weights.index).dropna(how="any")
index = w.index
w = w.values
@@ -1742,26 +1963,49 @@ def fit(self, *, cov_type='unadjusted', debiased=True, **cov_config):
params = lstsq(wx, wy)[0]
df_resid = y.shape[0] - x.shape[1]
cov_est, cov_config = self._choose_cov(cov_type, **cov_config)
- cov = cov_est(wy, wx, params, entity_ids, time_ids, debiased=debiased, **cov_config)
+ cov = cov_est(
+ wy, wx, params, entity_ids, time_ids, debiased=debiased, **cov_config
+ )
weps = wy - wx @ params
- fitted = pd.DataFrame(self.exog.values2d @ params,
- self.dependent.index, ['fitted_values'])
- idiosyncratic = pd.DataFrame(self.dependent.values2d - fitted.values,
- self.dependent.index, ['idiosyncratic'])
- effects = pd.DataFrame(np.full_like(fitted.values, np.nan), self.dependent.index,
- ['estimated_effects'])
+ fitted = pd.DataFrame(
+ self.exog.values2d @ params, self.dependent.index, ["fitted_values"]
+ )
+ idiosyncratic = pd.DataFrame(
+ self.dependent.values2d - fitted.values,
+ self.dependent.index,
+ ["idiosyncratic"],
+ )
+ effects = pd.DataFrame(
+ np.full_like(fitted.values, np.nan),
+ self.dependent.index,
+ ["estimated_effects"],
+ )
eps = y - x @ params
residual_ss = float(weps.T @ weps)
total_ss = float(w.T @ (y ** 2))
r2 = 1 - residual_ss / total_ss
- res = self._postestimation(params, cov, debiased, df_resid, weps, wy, wx, root_w)
- res.update(dict(df_resid=df_resid, df_model=x.shape[1], nobs=y.shape[0],
- residual_ss=residual_ss, total_ss=total_ss, r2=r2,
- resids=eps, wresids=weps, index=index, fitted=fitted, effects=effects,
- idiosyncratic=idiosyncratic))
+ res = self._postestimation(
+ params, cov, debiased, df_resid, weps, wy, wx, root_w
+ )
+ res.update(
+ dict(
+ df_resid=df_resid,
+ df_model=x.shape[1],
+ nobs=y.shape[0],
+ residual_ss=residual_ss,
+ total_ss=total_ss,
+ r2=r2,
+ resids=eps,
+ wresids=weps,
+ index=index,
+ fitted=fitted,
+ effects=effects,
+ idiosyncratic=idiosyncratic,
+ )
+ )
return PanelResults(res)
@@ -1774,18 +2018,18 @@ def from_formula(cls, formula, data, *, weights=None):
----------
formula : str
Formula to transform into model. Conforms to patsy formula rules.
- data : array-like
+ data : array_like
Data structure that can be coerced into a PanelData. In most
cases, this should be a multi-index DataFrame where the level 0
index contains the entities and the level 1 contains the time.
- weights: array-like, optional
+ weights: array_like, optional
Weights to use in estimation. Assumes residual variance is
proportional to inverse of weight to that the residual times
the weight should be homoskedastic.
Returns
-------
- model : FirstDifferenceOLS
+ FirstDifferenceOLS
Model specified using the formula
Notes
@@ -1812,11 +2056,11 @@ class RandomEffects(PooledOLS):
Parameters
----------
- dependent : array-like
+ dependent : array_like
Dependent (left-hand-side) variable (time by entity)
- exog : array-like
+ exog : array_like
Exogenous or right-hand-side variables (variable by time by entity).
- weights : array-like, optional
+ weights : array_like, optional
Weights to use in estimation. Assumes residual variance is
proportional to inverse of weight to that the residual time
the weight should be homoskedastic.
@@ -1842,18 +2086,18 @@ def from_formula(cls, formula, data, *, weights=None):
----------
formula : str
Formula to transform into model. Conforms to patsy formula rules.
- data : array-like
+ data : array_like
Data structure that can be coerced into a PanelData. In most
cases, this should be a multi-index DataFrame where the level 0
index contains the entities and the level 1 contains the time.
- weights: array-like, optional
+ weights: array_like, optional
Weights to use in estimation. Assumes residual variance is
proportional to inverse of weight to that the residual times
the weight should be homoskedastic.
Returns
-------
- model : RandomEffects
+ RandomEffects
Model specified using the formula
Notes
@@ -1873,11 +2117,13 @@ def from_formula(cls, formula, data, *, weights=None):
mod.formula = formula
return mod
- def fit(self, *, small_sample=False, cov_type='unadjusted', debiased=True, **cov_config):
+ def fit(
+ self, *, small_sample=False, cov_type="unadjusted", debiased=True, **cov_config
+ ):
w = self.weights.values2d
root_w = np.sqrt(w)
- y = self.dependent.demean('entity', weights=self.weights).values2d
- x = self.exog.demean('entity', weights=self.weights).values2d
+ y = self.dependent.demean("entity", weights=self.weights).values2d
+ x = self.exog.demean("entity", weights=self.weights).values2d
if self.has_constant:
w_sum = w.sum()
y_gm = (w * self.dependent.values2d).sum(0) / w_sum
@@ -1887,8 +2133,8 @@ def fit(self, *, small_sample=False, cov_type='unadjusted', debiased=True, **cov
params = lstsq(x, y)[0]
weps = y - x @ params
- wybar = self.dependent.mean('entity', weights=self.weights)
- wxbar = self.exog.mean('entity', weights=self.weights)
+ wybar = self.dependent.mean("entity", weights=self.weights)
+ wxbar = self.exog.mean("entity", weights=self.weights)
params = lstsq(wxbar, wybar)[0]
wu = np.asarray(wybar) - np.asarray(wxbar) @ params
@@ -1897,12 +2143,12 @@ def fit(self, *, small_sample=False, cov_type='unadjusted', debiased=True, **cov
nvar = x.shape[1]
sigma2_e = float(weps.T @ weps) / (nobs - nvar - neffects + 1)
ssr = float(wu.T @ wu)
- t = self.dependent.count('entity').values
+ t = self.dependent.count("entity").values
unbalanced = np.ptp(t) != 0
if small_sample and unbalanced:
ssr = float((t * wu).T @ wu)
wx = root_w * self.exog.dataframe
- means = wx.groupby(level=0).transform('mean').values
+ means = wx.groupby(level=0).transform("mean").values
denom = means.T @ means
sums = wx.groupby(level=0).sum().values
num = sums.T @ sums
@@ -1914,7 +2160,7 @@ def fit(self, *, small_sample=False, cov_type='unadjusted', debiased=True, **cov
rho = sigma2_u / (sigma2_u + sigma2_e)
theta = 1 - np.sqrt(sigma2_e / (t * sigma2_u + sigma2_e))
- theta_out = pd.DataFrame(theta, columns=['theta'], index=wybar.index)
+ theta_out = pd.DataFrame(theta, columns=["theta"], index=wybar.index)
wy = root_w * self.dependent.values2d
wx = root_w * self.exog.values2d
index = self.dependent.index
@@ -1927,16 +2173,26 @@ def fit(self, *, small_sample=False, cov_type='unadjusted', debiased=True, **cov
df_resid = wy.shape[0] - wx.shape[1]
cov_est, cov_config = self._choose_cov(cov_type, **cov_config)
- cov = cov_est(wy, wx, params, self.dependent.entity_ids, self.dependent.time_ids,
- debiased=debiased, **cov_config)
+ cov = cov_est(
+ wy,
+ wx,
+ params,
+ self.dependent.entity_ids,
+ self.dependent.time_ids,
+ debiased=debiased,
+ **cov_config
+ )
weps = wy - wx @ params
eps = weps / root_w
index = self.dependent.index
- fitted = pd.DataFrame(self.exog.values2d @ params, index, ['fitted_values'])
- effects = pd.DataFrame(self.dependent.values2d - np.asarray(fitted) - eps, index,
- ['estimated_effects'])
- idiosyncratic = pd.DataFrame(eps, index, ['idiosyncratic'])
+ fitted = pd.DataFrame(self.exog.values2d @ params, index, ["fitted_values"])
+ effects = pd.DataFrame(
+ self.dependent.values2d - np.asarray(fitted) - eps,
+ index,
+ ["estimated_effects"],
+ )
+ idiosyncratic = pd.DataFrame(eps, index, ["idiosyncratic"])
residual_ss = float(weps.T @ weps)
wmu = 0
if self.has_constant:
@@ -1945,12 +2201,29 @@ def fit(self, *, small_sample=False, cov_type='unadjusted', debiased=True, **cov
total_ss = float(wy_demeaned.T @ wy_demeaned)
r2 = 1 - residual_ss / total_ss
- res = self._postestimation(params, cov, debiased, df_resid, weps, wy, wx, root_w)
- res.update(dict(df_resid=df_resid, df_model=x.shape[1], nobs=y.shape[0],
- residual_ss=residual_ss, total_ss=total_ss, r2=r2,
- resids=eps, wresids=weps, index=index, sigma2_eps=sigma2_e,
- sigma2_effects=sigma2_u, rho=rho, theta=theta_out,
- fitted=fitted, effects=effects, idiosyncratic=idiosyncratic))
+ res = self._postestimation(
+ params, cov, debiased, df_resid, weps, wy, wx, root_w
+ )
+ res.update(
+ dict(
+ df_resid=df_resid,
+ df_model=x.shape[1],
+ nobs=y.shape[0],
+ residual_ss=residual_ss,
+ total_ss=total_ss,
+ r2=r2,
+ resids=eps,
+ wresids=weps,
+ index=index,
+ sigma2_eps=sigma2_e,
+ sigma2_effects=sigma2_u,
+ rho=rho,
+ theta=theta_out,
+ fitted=fitted,
+ effects=effects,
+ idiosyncratic=idiosyncratic,
+ )
+ )
return RandomEffectsResults(res)
@@ -1961,11 +2234,11 @@ class FamaMacBeth(PooledOLS):
Parameters
----------
- dependent : array-like
+ dependent : array_like
Dependent (left-hand-side) variable (time by entity)
- exog : array-like
+ exog : array_like
Exogenous or right-hand-side variables (variable by time by entity).
- weights : array-like, optional
+ weights : array_like, optional
Weights to use in estimation. Assumes residual variance is
proportional to inverse of weight to that the residual time
the weight should be homoskedastic.
@@ -2006,30 +2279,41 @@ def _validate_blocks(self):
wx = root_w * x
exog = self.exog.dataframe
- wx = pd.DataFrame(wx[self._not_null], index=exog.notnull().index, columns=exog.columns)
+ wx = pd.DataFrame(
+ wx[self._not_null], index=exog.notnull().index, columns=exog.columns
+ )
def validate_block(ex):
return ex.shape[0] >= ex.shape[1] and matrix_rank(ex) == ex.shape[1]
valid_blocks = wx.groupby(level=1).apply(validate_block)
if not valid_blocks.any():
- err = 'Model cannot be estimated. All blocks of time-series observations are rank\n' \
- 'deficient, and so it is not possible to estimate any cross-sectional ' \
- 'regressions.'
+ err = (
+ "Model cannot be estimated. All blocks of time-series observations are rank\n"
+ "deficient, and so it is not possible to estimate any cross-sectional "
+ "regressions."
+ )
raise ValueError(err)
if valid_blocks.sum() < exog.shape[1]:
import warnings
- warnings.warn('The number of time-series observation available to estimate '
- 'cross-sectional\nregressions, {0}, is less than the number of '
- 'parameters in the model. Parameter\ninference is not '
- 'available.'.format(valid_blocks.sum()), InferenceUnavailableWarning)
+
+ warnings.warn(
+ "The number of time-series observation available to estimate "
+ "cross-sectional\nregressions, {0}, is less than the number of "
+ "parameters in the model. Parameter\ninference is not "
+ "available.".format(valid_blocks.sum()),
+ InferenceUnavailableWarning,
+ )
elif valid_blocks.sum() < valid_blocks.shape[0]:
import warnings
- warnings.warn('{0} of the time-series regressions cannot be estimated due to '
- 'deficient rank.'.format(valid_blocks.shape[0] - valid_blocks.sum()),
- MissingValueWarning)
- def fit(self, cov_type='unadjusted', debiased=True, **cov_config):
+ warnings.warn(
+ "{0} of the time-series regressions cannot be estimated due to "
+ "deficient rank.".format(valid_blocks.shape[0] - valid_blocks.sum()),
+ MissingValueWarning,
+ )
+
+ def fit(self, cov_type="unadjusted", debiased=True, **cov_config):
"""
Estimate model parameters
@@ -2045,7 +2329,7 @@ def fit(self, cov_type='unadjusted', debiased=True, **cov_config):
Returns
-------
- results : PanelResults
+ PanelResults
Estimation results
Examples
@@ -2078,10 +2362,15 @@ def fit(self, cov_type='unadjusted', debiased=True, **cov_config):
exog = self.exog.dataframe
index = self.dependent.index
wy = pd.DataFrame(wy[self._not_null], index=index, columns=dep.columns)
- wx = pd.DataFrame(wx[self._not_null], index=exog.notnull().index, columns=exog.columns)
+ wx = pd.DataFrame(
+ wx[self._not_null], index=exog.notnull().index, columns=exog.columns
+ )
- yx = pd.DataFrame(np.c_[wy.values, wx.values], columns=list(wy.columns) + list(wx.columns),
- index=wy.index)
+ yx = pd.DataFrame(
+ np.c_[wy.values, wx.values],
+ columns=list(wy.columns) + list(wx.columns),
+ index=wy.index,
+ )
def single(z: pd.DataFrame):
exog = z.iloc[:, 1:].values
@@ -2099,10 +2388,13 @@ def single(z: pd.DataFrame):
wy = wy.values
wx = wx.values
index = self.dependent.index
- fitted = pd.DataFrame(self.exog.values2d @ params, index, ['fitted_values'])
- effects = pd.DataFrame(np.full_like(fitted.values, np.nan), index, ['estimated_effects'])
- idiosyncratic = pd.DataFrame(self.dependent.values2d - fitted.values, index,
- ['idiosyncratic'])
+ fitted = pd.DataFrame(self.exog.values2d @ params, index, ["fitted_values"])
+ effects = pd.DataFrame(
+ np.full_like(fitted.values, np.nan), index, ["estimated_effects"]
+ )
+ idiosyncratic = pd.DataFrame(
+ self.dependent.values2d - fitted.values, index, ["idiosyncratic"]
+ )
eps = self.dependent.values2d - fitted.values
weps = wy - wx @ params
@@ -2116,21 +2408,35 @@ def single(z: pd.DataFrame):
total_ss = float(w.T @ (e ** 2))
r2 = 1 - residual_ss / total_ss
- if cov_type in ('robust', 'unadjusted', 'homoskedastic', 'heteroskedastic'):
+ if cov_type in ("robust", "unadjusted", "homoskedastic", "heteroskedastic"):
cov_est = FamaMacBethCovariance
- elif cov_type == 'kernel':
+ elif cov_type == "kernel":
cov_est = FamaMacBethKernelCovariance
else:
- raise ValueError('Unknown cov_type')
+ raise ValueError("Unknown cov_type")
cov = cov_est(wy, wx, params, all_params, debiased=debiased, **cov_config)
df_resid = wy.shape[0] - params.shape[0]
- res = self._postestimation(params, cov, debiased, df_resid, weps, wy, wx, root_w)
+ res = self._postestimation(
+ params, cov, debiased, df_resid, weps, wy, wx, root_w
+ )
index = self.dependent.index
- res.update(dict(df_resid=df_resid, df_model=x.shape[1], nobs=y.shape[0],
- residual_ss=residual_ss, total_ss=total_ss,
- r2=r2, resids=eps, wresids=weps, index=index, fitted=fitted,
- effects=effects, idiosyncratic=idiosyncratic))
+ res.update(
+ dict(
+ df_resid=df_resid,
+ df_model=x.shape[1],
+ nobs=y.shape[0],
+ residual_ss=residual_ss,
+ total_ss=total_ss,
+ r2=r2,
+ resids=eps,
+ wresids=weps,
+ index=index,
+ fitted=fitted,
+ effects=effects,
+ idiosyncratic=idiosyncratic,
+ )
+ )
return PanelResults(res)
@classmethod
@@ -2142,18 +2448,18 @@ def from_formula(cls, formula, data, *, weights=None):
----------
formula : str
Formula to transform into model. Conforms to patsy formula rules.
- data : array-like
+ data : array_like
Data structure that can be coerced into a PanelData. In most
cases, this should be a multi-index DataFrame where the level 0
index contains the entities and the level 1 contains the time.
- weights: array-like, optional
+ weights: array_like, optional
Weights to use in estimation. Assumes residual variance is
proportional to inverse of weight to that the residual times
the weight should be homoskedastic.
Returns
-------
- model : FamaMacBeth
+ FamaMacBeth
Model specified using the formula
Notes
diff --git a/linearmodels/panel/results.py b/linearmodels/panel/results.py
index 9c35633d22..ddf70e039b 100644
--- a/linearmodels/panel/results.py
+++ b/linearmodels/panel/results.py
@@ -2,9 +2,9 @@
import datetime as dt
-from property_cached import cached_property
import numpy as np
from pandas import DataFrame, Series, concat
+from property_cached import cached_property
from scipy import stats
from statsmodels.iolib.summary import SimpleTable, fmt_2cols, fmt_params
@@ -12,7 +12,7 @@
from linearmodels.utility import (_ModelComparison, _str, _SummaryStr,
pval_format, quadratic_form_test)
-__all__ = ['PanelResults', 'PanelEffectsResults', 'RandomEffectsResults']
+__all__ = ["PanelResults", "PanelEffectsResults", "RandomEffectsResults"]
class PanelResults(_SummaryStr):
@@ -56,24 +56,24 @@ def __init__(self, res):
@property
def params(self):
"""Estimated parameters"""
- return Series(self._params, index=self._var_names, name='parameter')
+ return Series(self._params, index=self._var_names, name="parameter")
@cached_property
def cov(self):
"""Estimated covariance of parameters"""
- return DataFrame(self._deferred_cov(),
- columns=self._var_names,
- index=self._var_names)
+ return DataFrame(
+ self._deferred_cov(), columns=self._var_names, index=self._var_names
+ )
@property
def std_errors(self):
"""Estimated parameter standard errors"""
- return Series(np.sqrt(np.diag(self.cov)), self._var_names, name='std_error')
+ return Series(np.sqrt(np.diag(self.cov)), self._var_names, name="std_error")
@property
def tstats(self):
"""Parameter t-statistics"""
- return Series(self._params / self.std_errors, name='tstat')
+ return Series(self._params / self.std_errors, name="tstat")
@cached_property
def pvalues(self):
@@ -85,7 +85,7 @@ def pvalues(self):
pv = 2 * (1 - stats.t.cdf(abs_tstats, self.df_resid))
else:
pv = 2 * (1 - stats.norm.cdf(abs_tstats))
- return Series(pv, index=self._var_names, name='pvalue')
+ return Series(pv, index=self._var_names, name="pvalue")
@property
def df_resid(self):
@@ -145,7 +145,7 @@ def rsquared_between(self):
Returns
-------
- rsquared : float
+ float
Between coefficient of determination
Notes
@@ -161,7 +161,7 @@ def rsquared_within(self):
Returns
-------
- rsquared : float
+ float
Within coefficient of determination
Notes
@@ -177,7 +177,7 @@ def rsquared_overall(self):
Returns
-------
- rsquared : float
+ float
Between coefficient of determination
Notes
@@ -214,7 +214,7 @@ def conf_int(self, level=0.95):
Returns
-------
- ci : DataFrame
+ DataFrame
Confidence interval of the form [lower, upper] for each parameters
Notes
@@ -228,7 +228,7 @@ def conf_int(self, level=0.95):
q = stats.norm.ppf(ci_quantiles)
q = q[None, :]
ci = self.params[:, None] + self.std_errors[:, None] * q
- return DataFrame(ci, index=self._var_names, columns=['lower', 'upper'])
+ return DataFrame(ci, index=self._var_names, columns=["lower", "upper"])
@property
def summary(self):
@@ -238,54 +238,59 @@ def summary(self):
``summary.as_html()`` and ``summary.as_latex()``.
"""
- title = self.name + ' Estimation Summary'
+ title = self.name + " Estimation Summary"
mod = self.model
- top_left = [('Dep. Variable:', mod.dependent.vars[0]),
- ('Estimator:', self.name),
- ('No. Observations:', self.nobs),
- ('Date:', self._datetime.strftime('%a, %b %d %Y')),
- ('Time:', self._datetime.strftime('%H:%M:%S')),
- ('Cov. Estimator:', self._cov_type),
- ('', ''),
- ('Entities:', str(int(self.entity_info['total']))),
- ('Avg Obs:', _str(self.entity_info['mean'])),
- ('Min Obs:', _str(self.entity_info['min'])),
- ('Max Obs:', _str(self.entity_info['max'])),
- ('', ''),
- ('Time periods:', str(int(self.time_info['total']))),
- ('Avg Obs:', _str(self.time_info['mean'])),
- ('Min Obs:', _str(self.time_info['min'])),
- ('Max Obs:', _str(self.time_info['max'])),
- ('', '')]
+ top_left = [
+ ("Dep. Variable:", mod.dependent.vars[0]),
+ ("Estimator:", self.name),
+ ("No. Observations:", self.nobs),
+ ("Date:", self._datetime.strftime("%a, %b %d %Y")),
+ ("Time:", self._datetime.strftime("%H:%M:%S")),
+ ("Cov. Estimator:", self._cov_type),
+ ("", ""),
+ ("Entities:", str(int(self.entity_info["total"]))),
+ ("Avg Obs:", _str(self.entity_info["mean"])),
+ ("Min Obs:", _str(self.entity_info["min"])),
+ ("Max Obs:", _str(self.entity_info["max"])),
+ ("", ""),
+ ("Time periods:", str(int(self.time_info["total"]))),
+ ("Avg Obs:", _str(self.time_info["mean"])),
+ ("Min Obs:", _str(self.time_info["min"])),
+ ("Max Obs:", _str(self.time_info["max"])),
+ ("", ""),
+ ]
is_invalid = np.isfinite(self.f_statistic.stat)
- f_stat = _str(self.f_statistic.stat) if is_invalid else '--'
- f_pval = pval_format(self.f_statistic.pval) if is_invalid else '--'
- f_dist = self.f_statistic.dist_name if is_invalid else '--'
-
- f_robust = _str(self.f_statistic_robust.stat) if is_invalid else '--'
- f_robust_pval = pval_format(self.f_statistic_robust.pval) if is_invalid else '--'
- f_robust_name = self.f_statistic_robust.dist_name if is_invalid else '--'
-
- top_right = [('R-squared:', _str(self.rsquared)),
- ('R-squared (Between):', _str(self.rsquared_between)),
- ('R-squared (Within):', _str(self.rsquared_within)),
- ('R-squared (Overall):', _str(self.rsquared_overall)),
- ('Log-likelihood', _str(self._loglik)),
- ('', ''),
- ('F-statistic:', f_stat),
- ('P-value', f_pval),
- ('Distribution:', f_dist),
- ('', ''),
- ('F-statistic (robust):', f_robust),
- ('P-value', f_robust_pval),
- ('Distribution:', f_robust_name),
- ('', ''),
- ('', ''),
- ('', ''),
- ('', ''),
- ]
+ f_stat = _str(self.f_statistic.stat) if is_invalid else "--"
+ f_pval = pval_format(self.f_statistic.pval) if is_invalid else "--"
+ f_dist = self.f_statistic.dist_name if is_invalid else "--"
+
+ f_robust = _str(self.f_statistic_robust.stat) if is_invalid else "--"
+ f_robust_pval = (
+ pval_format(self.f_statistic_robust.pval) if is_invalid else "--"
+ )
+ f_robust_name = self.f_statistic_robust.dist_name if is_invalid else "--"
+
+ top_right = [
+ ("R-squared:", _str(self.rsquared)),
+ ("R-squared (Between):", _str(self.rsquared_between)),
+ ("R-squared (Within):", _str(self.rsquared_within)),
+ ("R-squared (Overall):", _str(self.rsquared_overall)),
+ ("Log-likelihood", _str(self._loglik)),
+ ("", ""),
+ ("F-statistic:", f_stat),
+ ("P-value", f_pval),
+ ("Distribution:", f_dist),
+ ("", ""),
+ ("F-statistic (robust):", f_robust),
+ ("P-value", f_robust_pval),
+ ("Distribution:", f_robust_name),
+ ("", ""),
+ ("", ""),
+ ("", ""),
+ ("", ""),
+ ]
stubs = []
vals = []
@@ -299,9 +304,9 @@ def summary(self):
# Top Table
# Parameter table
fmt = fmt_2cols
- fmt['data_fmts'][1] = '%18s'
+ fmt["data_fmts"][1] = "%18s"
- top_right = [('%-21s' % (' ' + k), v) for k, v in top_right]
+ top_right = [("%-21s" % (" " + k), v) for k, v in top_right]
stubs = []
vals = []
for stub, val in top_right:
@@ -310,11 +315,13 @@ def summary(self):
table.extend_right(SimpleTable(vals, stubs=stubs))
smry.tables.append(table)
- param_data = np.c_[self.params.values[:, None],
- self.std_errors.values[:, None],
- self.tstats.values[:, None],
- self.pvalues.values[:, None],
- self.conf_int()]
+ param_data = np.c_[
+ self.params.values[:, None],
+ self.std_errors.values[:, None],
+ self.tstats.values[:, None],
+ self.pvalues.values[:, None],
+ self.conf_int(),
+ ]
data = []
for row in param_data:
txt_row = []
@@ -324,14 +331,12 @@ def summary(self):
f = pval_format
txt_row.append(f(v))
data.append(txt_row)
- title = 'Parameter Estimates'
+ title = "Parameter Estimates"
table_stubs = list(self.params.index)
- header = ['Parameter', 'Std. Err.', 'T-stat', 'P-value', 'Lower CI', 'Upper CI']
- table = SimpleTable(data,
- stubs=table_stubs,
- txt_fmt=fmt_params,
- headers=header,
- title=title)
+ header = ["Parameter", "Std. Err.", "T-stat", "P-value", "Lower CI", "Upper CI"]
+ table = SimpleTable(
+ data, stubs=table_stubs, txt_fmt=fmt_params, headers=header, title=title
+ )
smry.tables.append(table)
return smry
@@ -345,26 +350,36 @@ def resids(self):
These residuals are from the estimated model. They will not have the
same shape as the original data whenever the model is estimated on
transformed data which has a different shape."""
- return Series(self._resids.squeeze(), index=self._index, name='residual')
+ return Series(self._resids.squeeze(), index=self._index, name="residual")
def _out_of_sample(self, exog, data, fitted, missing):
"""Interface between model predict and predict for OOS fits"""
if exog is not None and data is not None:
- raise ValueError('Predictions can only be constructed using one '
- 'of exog or data, but not both.')
+ raise ValueError(
+ "Predictions can only be constructed using one "
+ "of exog or data, but not both."
+ )
pred = self.model.predict(self.params, exog=exog, data=data)
if not missing:
pred = pred.loc[pred.notnull().all(1)]
return pred
- def predict(self, exog=None, *, data=None, fitted=True, effects=False, idiosyncratic=False,
- missing=False):
+ def predict(
+ self,
+ exog=None,
+ *,
+ data=None,
+ fitted=True,
+ effects=False,
+ idiosyncratic=False,
+ missing=False
+ ):
"""
In- and out-of-sample predictions
Parameters
----------
- exog : array-like
+ exog : array_like
Exogenous values to use in out-of-sample prediction (nobs by nexog)
data : DataFrame, optional
DataFrame to use for out-of-sample predictions when model was
@@ -382,7 +397,7 @@ def predict(self, exog=None, *, data=None, fitted=True, effects=False, idiosyncr
Returns
-------
- predictions : DataFrame
+ DataFrame
DataFrame containing columns for all selected output
Notes
@@ -407,7 +422,7 @@ def predict(self, exog=None, *, data=None, fitted=True, effects=False, idiosyncr
if idiosyncratic:
out.append(self.idiosyncratic)
if len(out) == 0:
- raise ValueError('At least one output must be selected')
+ raise ValueError("At least one output must be selected")
out = concat(out, 1) # type: DataFrame
if missing:
index = self._original_index
@@ -448,7 +463,9 @@ def idiosyncratic(self):
@property
def wresids(self):
"""Weighted model residuals"""
- return Series(self._wresids.squeeze(), index=self._index, name='weighted residual')
+ return Series(
+ self._wresids.squeeze(), index=self._index, name="weighted residual"
+ )
@property
def f_statistic_robust(self):
@@ -457,7 +474,7 @@ def f_statistic_robust(self):
Returns
-------
- f_stat : WaldTestStatistic
+ WaldTestStatistic
Statistic value, distribution and p-value
Notes
@@ -489,7 +506,7 @@ def f_statistic(self):
Returns
-------
- f_stat : WaldTestStatistic
+ WaldTestStatistic
Statistic value, distribution and p-value
Notes
@@ -540,7 +557,7 @@ def wald_test(self, restriction=None, value=None, *, formula=None):
Returns
-------
- t: WaldTestStatistic
+ WaldTestStatistic
Test statistic for null that restrictions are valid.
Notes
@@ -579,8 +596,9 @@ def wald_test(self, restriction=None, value=None, *, formula=None):
>>> formula = 'union = married = 0'
>>> fe_res.wald_test(formula=formula)
"""
- return quadratic_form_test(self.params, self.cov, restriction=restriction,
- value=value, formula=formula)
+ return quadratic_form_test(
+ self.params, self.cov, restriction=restriction, value=value, formula=formula
+ )
class PanelEffectsResults(PanelResults):
@@ -608,7 +626,7 @@ def f_pooled(self):
Returns
-------
- f_pooled : WaldTestStatistic
+ WaldTestStatistic
Statistic value, distribution and p-value
Notes
@@ -641,13 +659,13 @@ def included_effects(self):
if entity_effect or time_effect or other_effect:
effects = []
if entity_effect:
- effects.append('Entity')
+ effects.append("Entity")
if time_effect:
- effects.append('Time')
+ effects.append("Time")
if other_effect:
oe = self.model._other_effect_cats.dataframe
for c in oe:
- effects.append('Other Effect (' + str(c) + ')')
+ effects.append("Other Effect (" + str(c) + ")")
else:
effects = []
return effects
@@ -673,30 +691,34 @@ def summary(self):
smry = super(PanelEffectsResults, self).summary
is_invalid = np.isfinite(self.f_pooled.stat)
- f_pool = _str(self.f_pooled.stat) if is_invalid else '--'
- f_pool_pval = pval_format(self.f_pooled.pval) if is_invalid else '--'
- f_pool_name = self.f_pooled.dist_name if is_invalid else '--'
+ f_pool = _str(self.f_pooled.stat) if is_invalid else "--"
+ f_pool_pval = pval_format(self.f_pooled.pval) if is_invalid else "--"
+ f_pool_name = self.f_pooled.dist_name if is_invalid else "--"
extra_text = []
if is_invalid:
- extra_text.append('F-test for Poolability: {0}'.format(f_pool))
- extra_text.append('P-value: {0}'.format(f_pool_pval))
- extra_text.append('Distribution: {0}'.format(f_pool_name))
- extra_text.append('')
+ extra_text.append("F-test for Poolability: {0}".format(f_pool))
+ extra_text.append("P-value: {0}".format(f_pool_pval))
+ extra_text.append("Distribution: {0}".format(f_pool_name))
+ extra_text.append("")
if self.included_effects:
- effects = ', '.join(self.included_effects)
- extra_text.append('Included effects: ' + effects)
+ effects = ", ".join(self.included_effects)
+ extra_text.append("Included effects: " + effects)
if self.other_info is not None:
ncol = self.other_info.shape[1]
- extra_text.append('Model includes {0} other effects'.format(ncol))
+ extra_text.append("Model includes {0} other effects".format(ncol))
for c in self.other_info.T:
col = self.other_info.T[c]
- extra_text.append('Other Effect {0}:'.format(c))
- stats = 'Avg Obs: {0}, Min Obs: {1}, Max Obs: {2}, Groups: {3}'
- stats = stats.format(_str(col['mean']), _str(col['min']), _str(col['max']),
- int(col['total']))
+ extra_text.append("Other Effect {0}:".format(c))
+ stats = "Avg Obs: {0}, Min Obs: {1}, Max Obs: {2}, Groups: {3}"
+ stats = stats.format(
+ _str(col["mean"]),
+ _str(col["min"]),
+ _str(col["max"]),
+ int(col["total"]),
+ )
extra_text.append(stats)
smry.add_extra_txt(extra_text)
@@ -707,8 +729,8 @@ def summary(self):
def variance_decomposition(self):
"""Decomposition of total variance into effects and residuals"""
vals = [self._sigma2_effects, self._sigma2_eps, self._rho]
- index = ['Effects', 'Residual', 'Percent due to Effects']
- return Series(vals, index=index, name='Variance Decomposition')
+ index = ["Effects", "Residual", "Percent due to Effects"]
+ return Series(vals, index=index, name="Variance Decomposition")
class RandomEffectsResults(PanelResults):
@@ -727,8 +749,8 @@ def __init__(self, res):
def variance_decomposition(self):
"""Decomposition of total variance into effects and residuals"""
vals = [self._sigma2_effects, self._sigma2_eps, self._rho]
- index = ['Effects', 'Residual', 'Percent due to Effects']
- return Series(vals, index=index, name='Variance Decomposition')
+ index = ["Effects", "Residual", "Percent due to Effects"]
+ return Series(vals, index=index, name="Variance Decomposition")
@property
def theta(self):
@@ -736,7 +758,7 @@ def theta(self):
return self._theta
-def compare(results, precision='tstats'):
+def compare(results, precision="tstats"):
"""
Compare the results of multiple models
@@ -751,7 +773,8 @@ def compare(results, precision='tstats'):
Returns
-------
- comparison : PanelModelComparison
+ PanelModelComparison
+ The model comparison object.
"""
return PanelModelComparison(results, precision=precision)
@@ -769,35 +792,36 @@ class PanelModelComparison(_ModelComparison):
Estimator precision estimator to include in the comparison output.
Default is 'tstats'.
"""
+
_supported = (PanelEffectsResults, PanelResults, RandomEffectsResults)
- def __init__(self, results, *, precision='tstats'):
+ def __init__(self, results, *, precision="tstats"):
super(PanelModelComparison, self).__init__(results, precision=precision)
@property
def rsquared_between(self):
"""Coefficients of determination (R**2)"""
- return self._get_property('rsquared_between')
+ return self._get_property("rsquared_between")
@property
def rsquared_within(self):
"""Coefficients of determination (R**2)"""
- return self._get_property('rsquared_within')
+ return self._get_property("rsquared_within")
@property
def rsquared_overall(self):
"""Coefficients of determination (R**2)"""
- return self._get_property('rsquared_overall')
+ return self._get_property("rsquared_overall")
@property
def estimator_method(self):
"""Estimation methods"""
- return self._get_property('name')
+ return self._get_property("name")
@property
def cov_estimator(self):
"""Covariance estimator descriptions"""
- return self._get_property('_cov_type')
+ return self._get_property("_cov_type")
@property
def summary(self):
@@ -809,18 +833,38 @@ def summary(self):
smry = Summary()
models = list(self._results.keys())
- title = 'Model Comparison'
- stubs = ['Dep. Variable', 'Estimator', 'No. Observations', 'Cov. Est.', 'R-squared',
- 'R-Squared (Within)', 'R-Squared (Between)', 'R-Squared (Overall)',
- 'F-statistic', 'P-value (F-stat)']
+ title = "Model Comparison"
+ stubs = [
+ "Dep. Variable",
+ "Estimator",
+ "No. Observations",
+ "Cov. Est.",
+ "R-squared",
+ "R-Squared (Within)",
+ "R-Squared (Between)",
+ "R-Squared (Overall)",
+ "F-statistic",
+ "P-value (F-stat)",
+ ]
dep_name = {}
for key in self._results:
dep_name[key] = self._results[key].model.dependent.vars[0]
dep_name = Series(dep_name)
- vals = concat([dep_name, self.estimator_method, self.nobs, self.cov_estimator,
- self.rsquared, self.rsquared_within, self.rsquared_between,
- self.rsquared_overall, self.f_statistic], 1)
+ vals = concat(
+ [
+ dep_name,
+ self.estimator_method,
+ self.nobs,
+ self.cov_estimator,
+ self.rsquared,
+ self.rsquared_within,
+ self.rsquared_between,
+ self.rsquared_overall,
+ self.f_statistic,
+ ],
+ 1,
+ )
vals = [[i for i in v] for v in vals.T.values]
vals[2] = [str(v) for v in vals[2]]
for i in range(4, len(vals)):
@@ -838,11 +882,11 @@ def summary(self):
precision_fmt = []
for v in precision.values[i]:
v_str = _str(v)
- v_str = '({0})'.format(v_str) if v_str.strip() else v_str
+ v_str = "({0})".format(v_str) if v_str.strip() else v_str
precision_fmt.append(v_str)
params_fmt.append(precision_fmt)
params_stub.append(params.index[i])
- params_stub.append(' ')
+ params_stub.append(" ")
vals = table_concat((vals, params_fmt))
stubs = stub_concat((stubs, params_stub))
@@ -850,32 +894,34 @@ def summary(self):
all_effects = []
for key in self._results:
res = self._results[key]
- effects = getattr(res, 'included_effects', [])
+ effects = getattr(res, "included_effects", [])
all_effects.append(effects)
neffect = max(map(len, all_effects))
effects = []
- effects_stub = ['Effects']
+ effects_stub = ["Effects"]
for i in range(neffect):
if i > 0:
- effects_stub.append('')
+ effects_stub.append("")
row = []
for j in range(len(self._results)):
effect = all_effects[j]
if len(effect) > i:
row.append(effect[i])
else:
- row.append('')
+ row.append("")
effects.append(row)
if effects:
vals = table_concat((vals, effects))
stubs = stub_concat((stubs, effects_stub))
txt_fmt = default_txt_fmt.copy()
- txt_fmt['data_aligns'] = 'r'
- txt_fmt['header_align'] = 'r'
- table = SimpleTable(vals, headers=models, title=title, stubs=stubs, txt_fmt=txt_fmt)
+ txt_fmt["data_aligns"] = "r"
+ txt_fmt["header_align"] = "r"
+ table = SimpleTable(
+ vals, headers=models, title=title, stubs=stubs, txt_fmt=txt_fmt
+ )
smry.tables.append(table)
prec_type = self._PRECISION_TYPES[self._precision]
- smry.add_extra_txt(['{0} reported in parentheses'.format(prec_type)])
+ smry.add_extra_txt(["{0} reported in parentheses".format(prec_type)])
return smry
diff --git a/linearmodels/panel/utility.py b/linearmodels/panel/utility.py
index aef4621dd1..19cdeacbf0 100644
--- a/linearmodels/panel/utility.py
+++ b/linearmodels/panel/utility.py
@@ -46,7 +46,7 @@ def preconditioner(d, *, copy=False):
"""
Parameters
----------
- d : array-like
+ d : array_like
Array to precondition
copy : bool
Flag indicating whether the operation should be in-place, if possible.
@@ -54,12 +54,12 @@ def preconditioner(d, *, copy=False):
Returns
-------
- d : array-like
- Array with same type as input array. If copy is False, and d is
- an ndarray or a csc_matrix, then the operation is inplace
- cond : ndarray
- Array of conditioning numbers defined as the square root of the column
- 2-norms (nvar,)
+ d : array_like
+ Array with same type as input array. If copy is False, and d is
+ an ndarray or a csc_matrix, then the operation is inplace
+ cond : ndarray
+ Array of conditioning numbers defined as the square root of the column
+ 2-norms (nvar,)
"""
# Dense path
if not sp.issparse(d):
@@ -92,7 +92,9 @@ def preconditioner(d, *, copy=False):
return d, cond
-def dummy_matrix(cats, *, format='csc', drop='first', drop_all=False, precondition=True):
+def dummy_matrix(
+ cats, *, format="csc", drop="first", drop_all=False, precondition=True
+):
"""
Parameters
----------
@@ -119,7 +121,7 @@ def dummy_matrix(cats, *, format='csc', drop='first', drop_all=False, preconditi
Returns
-------
- dummies : array-like
+ dummies : array_like
Array, either sparse or dense, of size nobs x ncats containing the
dummy variable values
cond : ndarray
@@ -137,37 +139,43 @@ def dummy_matrix(cats, *, format='csc', drop='first', drop_all=False, preconditi
rows = np.arange(nobs)
ucats, inverse = np.unique(codes[:, i], return_inverse=True)
ncategories = len(ucats)
- bits = min([i for i in (8, 16, 32, 64) if i - 1 > np.log2(ncategories + total_dummies)])
- replacements = np.arange(ncategories, dtype='int{:d}'.format(bits))
+ bits = min(
+ [i for i in (8, 16, 32, 64) if i - 1 > np.log2(ncategories + total_dummies)]
+ )
+ replacements = np.arange(ncategories, dtype="int{:d}".format(bits))
cols = replacements[inverse]
if i == 0 and not drop_all:
retain = np.arange(nobs)
- elif drop == 'first':
+ elif drop == "first":
# remove first
retain = cols != 0
else: # drop == 'last'
# remove last
retain = cols != (ncategories - 1)
rows = rows[retain]
- col_adj = -1 if (drop == 'first' and i > 0) else 0
+ col_adj = -1 if (drop == "first" and i > 0) else 0
cols = cols[retain] + total_dummies + col_adj
values = np.ones(rows.shape)
- data['values'].append(values)
- data['rows'].append(rows)
- data['cols'].append(cols)
+ data["values"].append(values)
+ data["rows"].append(rows)
+ data["cols"].append(cols)
total_dummies += ncategories - (i > 0)
- if format in ('csc', 'array'):
+ if format in ("csc", "array"):
fmt = sp.csc_matrix
- elif format == 'csr':
+ elif format == "csr":
fmt = sp.csr_matrix
- elif format == 'coo':
+ elif format == "coo":
fmt = sp.coo_matrix
else:
- raise ValueError('Unknown format: {0}'.format(format))
- out = fmt((np.concatenate(data['values']),
- (np.concatenate(data['rows']), np.concatenate(data['cols']))))
- if format == 'array':
+ raise ValueError("Unknown format: {0}".format(format))
+ out = fmt(
+ (
+ np.concatenate(data["values"]),
+ (np.concatenate(data["rows"]), np.concatenate(data["cols"])),
+ )
+ )
+ if format == "array":
out = out.toarray()
if precondition:
@@ -310,10 +318,12 @@ def in_2core_graph(cats):
def min_dtype(*args):
bits = max([np.log2(max(arg.max(), 1)) for arg in args])
- return 'int{0}'.format(min([i for i in (8, 16, 32, 64) if bits < (i - 1)]))
+ return "int{0}".format(min([i for i in (8, 16, 32, 64) if bits < (i - 1)]))
dtype = min_dtype(offset, node_id, count, orig_dest)
- meta = np.column_stack([node_id.astype(dtype), count.astype(dtype), offset.astype(dtype)])
+ meta = np.column_stack(
+ [node_id.astype(dtype), count.astype(dtype), offset.astype(dtype)]
+ )
orig_dest = orig_dest.astype(dtype)
singletons = np.any(meta[:, 1] == 1)
@@ -388,8 +398,8 @@ def check_absorbed(x: np.ndarray, variables: List[str]):
rows = []
for i in range(nabsorbed):
vars_idx = np.where(np.abs(absorbed_vecs[:, i]) > tol)[0]
- rows.append(' ' * 10 + ', '.join((variables[vi] for vi in vars_idx)))
- absorbed_variables = '\n'.join(rows)
+ rows.append(" " * 10 + ", ".join((variables[vi] for vi in vars_idx)))
+ absorbed_variables = "\n".join(rows)
msg = absorbing_error_msg.format(absorbed_variables=absorbed_variables)
raise AbsorbingEffectError(msg)
diff --git a/linearmodels/system/__init__.py b/linearmodels/system/__init__.py
index faed894de8..e710ee6df5 100644
--- a/linearmodels/system/__init__.py
+++ b/linearmodels/system/__init__.py
@@ -1,3 +1,3 @@
from linearmodels.system.model import IV3SLS, SUR, IVSystemGMM
-__all__ = ['SUR', 'IV3SLS', 'IVSystemGMM']
+__all__ = ["SUR", "IV3SLS", "IVSystemGMM"]
diff --git a/linearmodels/system/_utility.py b/linearmodels/system/_utility.py
index 52b6c9fea7..67834f95ca 100644
--- a/linearmodels/system/_utility.py
+++ b/linearmodels/system/_utility.py
@@ -14,7 +14,7 @@ def blocked_column_product(x, s):
Returns
-------
- bp : ndarray
+ ndarray
Blocked product. k x nobs rows and the number of columns is the same
the number of columns as any member of x.
"""
@@ -39,7 +39,7 @@ def blocked_diag_product(x, s):
Returns
-------
- bp : ndarray
+ ndarray
Blocked product. k x nobs rows and the number of columns is the same
as the total number of columns in x.
"""
@@ -67,7 +67,7 @@ def blocked_inner_prod(x, s):
Returns
-------
- ip : ndarray
+ ndarray
Weighted inner product constructed from x and s
Notes
@@ -135,7 +135,7 @@ def blocked_cross_prod(x, z, s):
Returns
-------
- xp : ndarray
+ ndarray
Weighted cross product constructed from x and s
Notes
@@ -182,8 +182,8 @@ def blocked_full_inner_product(x, s):
for i in range(k):
v = s[i, 0] * x[0:t]
for j in range(1, k):
- v += s[i, j] * x[j * t:(j + 1) * t]
- sx[i * t:(i + 1) * t] = v
+ v += s[i, j] * x[j * t : (j + 1) * t]
+ sx[i * t : (i + 1) * t] = v
return x.T @ sx
@@ -220,20 +220,20 @@ class LinearConstraint(object):
def __init__(self, r, q=None, num_params=None, require_pandas=True):
if not isinstance(r, (pd.DataFrame, np.ndarray)):
- raise TypeError('r must be an array or DataFrame')
+ raise TypeError("r must be an array or DataFrame")
elif require_pandas and not isinstance(r, pd.DataFrame):
- raise TypeError('r must be a DataFrame')
+ raise TypeError("r must be a DataFrame")
if r.ndim != 2:
- raise ValueError('r must be 2-dimensional')
+ raise ValueError("r must be 2-dimensional")
r_pd = pd.DataFrame(r)
ra = np.asarray(r, dtype=np.float64)
self._r_pd = r_pd
self._ra = ra
if q is not None:
if require_pandas and not isinstance(q, pd.Series):
- raise TypeError('q must be a Series')
+ raise TypeError("q must be a Series")
elif not isinstance(q, (pd.Series, np.ndarray)):
- raise TypeError('q must be a Series')
+ raise TypeError("q must be a Series")
q_pd = pd.Series(q, index=r_pd.index)
else:
q_pd = pd.Series(np.zeros(r_pd.shape[0]), index=r_pd.index)
@@ -244,26 +244,27 @@ def __init__(self, r, q=None, num_params=None, require_pandas=True):
self._verify_constraints()
def __repr__(self):
- return self.__str__() + '\nid: ' + str(hex(id(self)))
+ return self.__str__() + "\nid: " + str(hex(id(self)))
def __str__(self):
- return 'Linear Constraint with {0} constraints'.format(self._ra.shape[0])
+ return "Linear Constraint with {0} constraints".format(self._ra.shape[0])
def _verify_constraints(self):
r = self._ra
q = self._qa
if r.shape[0] != q.shape[0]:
- raise ValueError('Constraint inputs are not shape compatible')
+ raise ValueError("Constraint inputs are not shape compatible")
if self._num_params is not None:
if r.shape[1] != self._num_params:
- raise ValueError('r is incompatible with the number of model '
- 'parameters')
+ raise ValueError(
+ "r is incompatible with the number of model " "parameters"
+ )
rq = np.c_[r, q[:, None]]
if not np.all(np.isfinite(rq)) or matrix_rank(rq) < rq.shape[0]:
- raise ValueError('Constraints must be non-redundant')
+ raise ValueError("Constraints must be non-redundant")
qr = np.linalg.qr(rq)
if matrix_rank(qr[1][:, :-1]) != matrix_rank(qr[1]):
- raise ValueError('One or more constraints are infeasible')
+ raise ValueError("One or more constraints are infeasible")
def _compute_transform(self):
r = self._ra
@@ -274,7 +275,7 @@ def _compute_transform(self):
vecs = np.real(vecs)
idx = np.argsort(vals)[::-1]
vecs = vecs[:, idx]
- t, left = vecs[:, :k - c], vecs[:, k - c:]
+ t, left = vecs[:, : k - c], vecs[:, k - c :]
q = self._qa[:, None]
a = q.T @ inv(left.T @ r.T) @ left.T
self._t, self._l, self._a = t, left, a
@@ -291,7 +292,7 @@ def t(self):
Returns
-------
- t : ndarray
+ ndarray
Constraint transformation matrix
Notes
@@ -309,7 +310,7 @@ def a(self):
Returns
-------
- a : ndarray
+ ndarray
Transformed target
Notes
diff --git a/linearmodels/system/covariance.py b/linearmodels/system/covariance.py
index 3722ac751f..d5ed4a11d3 100644
--- a/linearmodels/system/covariance.py
+++ b/linearmodels/system/covariance.py
@@ -44,7 +44,9 @@ class HomoskedasticCovariance(object):
(X'X)^{-1}(X'\Omega X)(X'X)^{-1}
"""
- def __init__(self, x, eps, sigma, full_sigma, *, gls=False, debiased=False, constraints=None):
+ def __init__(
+ self, x, eps, sigma, full_sigma, *, gls=False, debiased=False, constraints=None
+ ):
self._eps = eps
self._x = x
self._nobs = eps.shape[0]
@@ -54,7 +56,7 @@ def __init__(self, x, eps, sigma, full_sigma, *, gls=False, debiased=False, cons
self._gls = gls
self._debiased = debiased
self._constraints = constraints
- self._name = 'Homoskedastic (Unadjusted) Covariance'
+ self._name = "Homoskedastic (Unadjusted) Covariance"
self._str_extra = AttrDict(Debiased=self._debiased, GLS=self._gls)
self._cov_config = AttrDict(debiased=self._debiased)
@@ -62,14 +64,14 @@ def __str__(self):
out = self._name
extra = []
for key in self._str_extra:
- extra.append(': '.join([key, str(self._str_extra[key])]))
+ extra.append(": ".join([key, str(self._str_extra[key])]))
if extra:
- out += ' (' + ', '.join(extra) + ')'
+ out += " (" + ", ".join(extra) + ")"
return out
def __repr__(self):
out = self.__str__()
- return out + ', id: {0}'.format(hex(id(self)))
+ return out + ", id: {0}".format(hex(id(self)))
@property
def sigma(self):
@@ -174,12 +176,19 @@ class HeteroskedasticCovariance(HomoskedasticCovariance):
where :math:`\hat{S}` is a estimator of the covariance of the model scores.
"""
- def __init__(self, x, eps, sigma, full_sigma, gls=False, debiased=False, constraints=None):
- super(HeteroskedasticCovariance, self).__init__(x, eps, sigma, full_sigma,
- gls=gls,
- debiased=debiased,
- constraints=constraints)
- self._name = 'Heteroskedastic (Robust) Covariance'
+ def __init__(
+ self, x, eps, sigma, full_sigma, gls=False, debiased=False, constraints=None
+ ):
+ super(HeteroskedasticCovariance, self).__init__(
+ x,
+ eps,
+ sigma,
+ full_sigma,
+ gls=gls,
+ debiased=debiased,
+ constraints=constraints,
+ )
+ self._name = "Heteroskedastic (Robust) Covariance"
k = len(x)
nobs = eps.shape[0]
@@ -200,7 +209,7 @@ def __init__(self, x, eps, sigma, full_sigma, gls=False, debiased=False, constra
loc = 0
for i in range(k):
offset = x[i].shape[1]
- xe[:, loc:loc+offset] = x[i] * eps[:, i:i+1]
+ xe[:, loc : loc + offset] = x[i] * eps[:, i : i + 1]
loc += offset
self._moments = xe
@@ -305,17 +314,34 @@ class KernelCovariance(HeteroskedasticCovariance, _HACMixin):
linearmodels.iv.covariance.kernel_weight_quadratic_spectral
"""
- def __init__(self, x, eps, sigma, full_sigma, *, gls=False, debiased=False, constraints=None,
- kernel='bartlett', bandwidth=None):
- super(KernelCovariance, self).__init__(x, eps, sigma, full_sigma, gls=gls,
- debiased=debiased,
- constraints=constraints)
+ def __init__(
+ self,
+ x,
+ eps,
+ sigma,
+ full_sigma,
+ *,
+ gls=False,
+ debiased=False,
+ constraints=None,
+ kernel="bartlett",
+ bandwidth=None
+ ):
+ super(KernelCovariance, self).__init__(
+ x,
+ eps,
+ sigma,
+ full_sigma,
+ gls=gls,
+ debiased=debiased,
+ constraints=constraints,
+ )
self._check_kernel(kernel)
self._check_bandwidth(bandwidth)
- self._name = 'Kernel (HAC) Covariance'
- self._str_extra['Kernel'] = kernel
- self._cov_config['kernel'] = kernel
+ self._name = "Kernel (HAC) Covariance"
+ self._str_extra["Kernel"] = kernel
+ self._cov_config["kernel"] = kernel
def _xeex(self):
return self._kernel_cov(self._moments)
@@ -324,7 +350,7 @@ def _xeex(self):
def cov_config(self):
"""Optional configuration information used in covariance"""
out = AttrDict([(k, v) for k, v in self._cov_config.items()])
- out['bandwidth'] = self.bandwidth
+ out["bandwidth"] = self.bandwidth
return out
@@ -368,7 +394,7 @@ def __init__(self, x, z, eps, w, *, sigma=None, debiased=False, constraints=None
self._w = w
self._debiased = debiased
self._constraints = constraints
- self._name = 'GMM Homoskedastic (Unadjusted) Covariance'
+ self._name = "GMM Homoskedastic (Unadjusted) Covariance"
self._cov_config = AttrDict(debiased=self._debiased)
def __str__(self):
@@ -377,7 +403,7 @@ def __str__(self):
def __repr__(self):
out = self.__str__()
- return out + ', id: {0}'.format(hex(id(self)))
+ return out + ", id: {0}".format(hex(id(self)))
@property
def cov(self):
@@ -401,7 +427,14 @@ def cov(self):
xpz_wi_zpx = cons.t.T @ xpz_wi_zpx @ cons.t
xpz_wi_zpxi = inv(xpz_wi_zpx)
xpz_wi_omega_wi_zpx = cons.t.T @ xpz_wi_omega_wi_zpx @ cons.t
- cov = cons.t @ xpz_wi_zpxi @ xpz_wi_omega_wi_zpx @ xpz_wi_zpxi @ cons.t.T / nobs
+ cov = (
+ cons.t
+ @ xpz_wi_zpxi
+ @ xpz_wi_omega_wi_zpx
+ @ xpz_wi_zpxi
+ @ cons.t.T
+ / nobs
+ )
cov = (cov + cov.T) / 2
return adj * cov
@@ -465,8 +498,10 @@ class GMMHeteroskedasticCovariance(GMMHomoskedasticCovariance):
"""
def __init__(self, x, z, eps, w, *, sigma=None, debiased=False, constraints=None):
- super().__init__(x, z, eps, w, sigma=sigma, debiased=debiased, constraints=constraints)
- self._name = 'GMM Heteroskedastic (Robust) Covariance'
+ super().__init__(
+ x, z, eps, w, sigma=sigma, debiased=debiased, constraints=constraints
+ )
+ self._name = "GMM Heteroskedastic (Robust) Covariance"
k = len(z)
k_total = sum(map(lambda a: a.shape[1], z))
@@ -475,7 +510,7 @@ def __init__(self, x, z, eps, w, *, sigma=None, debiased=False, constraints=None
ze = empty((nobs, k_total))
for i in range(k):
kz = z[i].shape[1]
- ze[:, loc:loc + kz] = z[i] * eps[:, [i]]
+ ze[:, loc : loc + kz] = z[i] * eps[:, [i]]
loc += kz
self._moments = ze
@@ -528,13 +563,26 @@ class GMMKernelCovariance(GMMHeteroskedasticCovariance, _HACMixin):
where :math:`\Omega` is the covariance of the moment conditions.
"""
- def __init__(self, x, z, eps, w, *, sigma=None, debiased=False, constraints=None,
- kernel='bartlett', bandwidth=None):
- super().__init__(x, z, eps, w, sigma=sigma, debiased=debiased, constraints=constraints)
- self._name = 'GMM Kernel (HAC) Covariance'
+ def __init__(
+ self,
+ x,
+ z,
+ eps,
+ w,
+ *,
+ sigma=None,
+ debiased=False,
+ constraints=None,
+ kernel="bartlett",
+ bandwidth=None
+ ):
+ super().__init__(
+ x, z, eps, w, sigma=sigma, debiased=debiased, constraints=constraints
+ )
+ self._name = "GMM Kernel (HAC) Covariance"
self._check_bandwidth(bandwidth)
self._check_kernel(kernel)
- self._cov_config['kernel'] = kernel
+ self._cov_config["kernel"] = kernel
def _omega(self):
return self._kernel_cov(self._moments)
@@ -543,5 +591,5 @@ def _omega(self):
def cov_config(self):
"""Optional configuration information used in covariance"""
out = AttrDict([(k, v) for k, v in self._cov_config.items()])
- out['bandwidth'] = self.bandwidth
+ out["bandwidth"] = self.bandwidth
return out
diff --git a/linearmodels/system/gmm.py b/linearmodels/system/gmm.py
index 13e54ed768..4449a3beae 100644
--- a/linearmodels/system/gmm.py
+++ b/linearmodels/system/gmm.py
@@ -40,20 +40,20 @@ def __init__(self, center=False, debiased=False):
self._center = center
self._debiased = debiased
self._bandwidth = 0
- self._name = 'Homoskedastic (Unadjusted) Weighting'
+ self._name = "Homoskedastic (Unadjusted) Weighting"
self._config = AttrDict(center=center, debiased=debiased)
def __str__(self):
out = self._name
extra = []
for key in self._str_extra:
- extra.append(': '.join([key, str(self._str_extra[key])]))
+ extra.append(": ".join([key, str(self._str_extra[key])]))
if extra:
- out += ' (' + ', '.join(extra) + ')'
+ out += " (" + ", ".join(extra) + ")"
return out
def __repr__(self):
- return self.__str__() + ', id: {0}'.format(hex(id(self)))
+ return self.__str__() + ", id: {0}".format(hex(id(self)))
@property
def _str_extra(self):
@@ -104,7 +104,7 @@ def weight_matrix(self, x, z, eps, *, sigma=None):
Returns
-------
- weight : ndarray
+ ndarray
Covariance of GMM moment conditions.
"""
nobs = z[0].shape[0]
@@ -118,7 +118,7 @@ def config(self):
Returns
-------
- config : AttrDict
+ AttrDict
Dictionary containing weight estimator configuration information
"""
return self._config
@@ -154,7 +154,7 @@ class HeteroskedasticWeightMatrix(HomoskedasticWeightMatrix):
def __init__(self, center=False, debiased=False):
super(HeteroskedasticWeightMatrix, self).__init__(center, debiased)
- self._name = 'Heteroskedastic (Robust) Weighting'
+ self._name = "Heteroskedastic (Robust) Weighting"
def weight_matrix(self, x, z, eps, *, sigma=None):
"""
@@ -171,7 +171,7 @@ def weight_matrix(self, x, z, eps, *, sigma=None):
Returns
-------
- weight : ndarray
+ ndarray
Covariance of GMM moment conditions.
"""
nobs = x[0].shape[0]
@@ -182,7 +182,7 @@ def weight_matrix(self, x, z, eps, *, sigma=None):
for i in range(k):
e = eps[:, [i]]
zk = z[i].shape[1]
- ze[:, loc:loc + zk] = z[i] * e
+ ze[:, loc : loc + zk] = z[i] * e
loc += zk
mu = ze.mean(axis=0) if self._center else 0
ze -= mu
@@ -247,10 +247,16 @@ class KernelWeightMatrix(HeteroskedasticWeightMatrix, _HACMixin):
between the moment conditions.
"""
- def __init__(self, center=False, debiased=False, kernel='bartlett', bandwidth=None,
- optimal_bw=False):
+ def __init__(
+ self,
+ center=False,
+ debiased=False,
+ kernel="bartlett",
+ bandwidth=None,
+ optimal_bw=False,
+ ):
super(KernelWeightMatrix, self).__init__(center, debiased)
- self._name = 'Kernel (HAC) Weighting'
+ self._name = "Kernel (HAC) Weighting"
self._check_kernel(kernel)
self._check_bandwidth(bandwidth)
self._predefined_bw = self._bandwidth
@@ -271,7 +277,7 @@ def weight_matrix(self, x, z, eps, *, sigma=None):
Returns
-------
- weight : ndarray
+ ndarray
Covariance of GMM moment conditions.
"""
nobs = x[0].shape[0]
@@ -282,7 +288,7 @@ def weight_matrix(self, x, z, eps, *, sigma=None):
for i in range(k):
e = eps[:, [i]]
zk = z[i].shape[1]
- ze[:, loc:loc + zk] = z[i] * e
+ ze[:, loc : loc + zk] = z[i] * e
loc += zk
mu = ze.mean(axis=0) if self._center else 0
ze -= mu
@@ -317,9 +323,9 @@ def config(self):
Returns
-------
- config : AttrDict
+ AttrDict
Dictionary containing weight estimator configuration information
"""
out = AttrDict([(k, v) for k, v in self._config.items()])
- out['bandwidth'] = self.bandwidth
+ out["bandwidth"] = self.bandwidth
return out
diff --git a/linearmodels/system/model.py b/linearmodels/system/model.py
index 660d979950..1c6aa9e0a0 100644
--- a/linearmodels/system/model.py
+++ b/linearmodels/system/model.py
@@ -42,7 +42,7 @@
WaldTestStatistic, has_constant,
missing_warning)
-__all__ = ['SUR', 'IV3SLS', 'IVSystemGMM']
+__all__ = ["SUR", "IV3SLS", "IVSystemGMM"]
UNKNOWN_EQ_TYPE = """
Contents of each equation must be either a dictionary with keys 'dependent'
@@ -50,24 +50,32 @@
equations[{key}] was {type}
"""
-COV_TYPES = {'unadjusted': 'unadjusted',
- 'homoskedastic': 'unadjusted',
- 'robust': 'robust',
- 'heteroskedastic': 'robust',
- 'kernel': 'kernel',
- 'hac': 'kernel'}
-
-COV_EST = {'unadjusted': HomoskedasticCovariance,
- 'robust': HeteroskedasticCovariance,
- 'kernel': KernelCovariance}
-
-GMM_W_EST = {'unadjusted': HomoskedasticWeightMatrix,
- 'robust': HeteroskedasticWeightMatrix,
- 'kernel': KernelWeightMatrix}
-
-GMM_COV_EST = {'unadjusted': GMMHomoskedasticCovariance,
- 'robust': GMMHeteroskedasticCovariance,
- 'kernel': GMMKernelCovariance}
+COV_TYPES = {
+ "unadjusted": "unadjusted",
+ "homoskedastic": "unadjusted",
+ "robust": "robust",
+ "heteroskedastic": "robust",
+ "kernel": "kernel",
+ "hac": "kernel",
+}
+
+COV_EST = {
+ "unadjusted": HomoskedasticCovariance,
+ "robust": HeteroskedasticCovariance,
+ "kernel": KernelCovariance,
+}
+
+GMM_W_EST = {
+ "unadjusted": HomoskedasticWeightMatrix,
+ "robust": HeteroskedasticWeightMatrix,
+ "kernel": KernelWeightMatrix,
+}
+
+GMM_COV_EST = {
+ "unadjusted": GMMHomoskedasticCovariance,
+ "robust": GMMHeteroskedasticCovariance,
+ "kernel": GMMKernelCovariance,
+}
def _to_ordered_dict(equations):
@@ -86,7 +94,8 @@ def _missing_weights(weights):
missing = [key for key in weights if weights[key] is None]
if missing:
import warnings
- msg = 'Weights not found for equation labels:\n{0}'.format(', '.join(missing))
+
+ msg = "Weights not found for equation labels:\n{0}".format(", ".join(missing))
warnings.warn(msg, UserWarning)
return None
@@ -135,7 +144,7 @@ def _parameters_from_xprod(xpx, xpy, constraints=None):
class SystemFormulaParser(object):
def __init__(self, formula, data, weights=None, eval_env=6):
if not isinstance(formula, (Mapping, str)):
- raise TypeError('formula must be a string or dictionary-like')
+ raise TypeError("formula must be a string or dictionary-like")
self._formula = formula
self._data = data
self._weights = weights
@@ -147,8 +156,8 @@ def __init__(self, formula, data, weights=None, eval_env=6):
@staticmethod
def _prevent_autoconst(formula):
- if not (' 0+' in formula or ' 0 +' in formula):
- formula = '~ 0 +'.join(formula.split('~'))
+ if not (" 0+" in formula or " 0 +" in formula):
+ formula = "~ 0 +".join(formula.split("~"))
return formula
def _parse(self):
@@ -172,24 +181,24 @@ def _parse(self):
weight_dict[key] = None
cln_formula[key] = f
else:
- formula = formula.replace('\n', ' ').strip()
- parts = formula.split('}')
+ formula = formula.replace("\n", " ").strip()
+ parts = formula.split("}")
for part in parts:
key = base_key = None
part = part.strip()
- if part == '':
+ if part == "":
continue
- part = part.replace('{', '')
- if ':' in part.split('~')[0]:
- base_key, part = part.split(':')
+ part = part.replace("{", "")
+ if ":" in part.split("~")[0]:
+ base_key, part = part.split(":")
key = base_key = base_key.strip()
part = part.strip()
f = self._prevent_autoconst(part)
if base_key is None:
- base_key = key = f.split('~')[0].strip()
+ base_key = key = f.split("~")[0].strip()
count = 0
while key in parsers:
- key = base_key + '.{0}'.format(count)
+ key = base_key + ".{0}".format(count)
count += 1
parsers[key] = IVFormulaParser(f, data, eval_env=self._eval_env)
cln_formula[key] = f
@@ -202,7 +211,9 @@ def _parse(self):
self._weight_dict = weight_dict
def _get_variable(self, variable):
- return OrderedDict([(key, getattr(self._parsers[key], variable)) for key in self._parsers])
+ return OrderedDict(
+ [(key, getattr(self._parsers[key], variable)) for key in self._parsers]
+ )
@property
def formula(self):
@@ -222,7 +233,9 @@ def eval_env(self, value):
new_parsers = OrderedDict()
for key in parsers:
parser = parsers[key]
- new_parsers[key] = IVFormulaParser(parser._formula, parser._data, self._eval_env)
+ new_parsers[key] = IVFormulaParser(
+ parser._formula, parser._data, self._eval_env
+ )
self._parsers = new_parsers
@property
@@ -234,36 +247,36 @@ def data(self):
out = OrderedDict()
dep = self.dependent
for key in dep:
- out[key] = {'dependent': dep[key]}
+ out[key] = {"dependent": dep[key]}
exog = self.exog
for key in exog:
- out[key]['exog'] = exog[key]
+ out[key]["exog"] = exog[key]
endog = self.endog
for key in endog:
- out[key]['endog'] = endog[key]
+ out[key]["endog"] = endog[key]
instr = self.instruments
for key in instr:
- out[key]['instruments'] = instr[key]
+ out[key]["instruments"] = instr[key]
for key in self._weight_dict:
if self._weight_dict[key] is not None:
- out[key]['weights'] = self._weight_dict[key]
+ out[key]["weights"] = self._weight_dict[key]
return out
@property
def dependent(self):
- return self._get_variable('dependent')
+ return self._get_variable("dependent")
@property
def exog(self):
- return self._get_variable('exog')
+ return self._get_variable("exog")
@property
def endog(self):
- return self._get_variable('endog')
+ return self._get_variable("endog")
@property
def instruments(self):
- return self._get_variable('instruments')
+ return self._get_variable("instruments")
class IV3SLS(object):
@@ -287,7 +300,7 @@ class IV3SLS(object):
equation contains no exogenous regressors. Similarly 'endog' and
'instruments' can either be omitted or may contain an empty array (or
`None`) if all variables in an equation are exogenous.
- sigma : array-like
+ sigma : array_like
Prespecified residual covariance to use in GLS estimation. If not
provided, FGLS is implemented based on an estimate of sigma.
@@ -352,10 +365,10 @@ class IV3SLS(object):
def __init__(self, equations, *, sigma=None):
if not isinstance(equations, Mapping):
- raise TypeError('equations must be a dictionary-like')
+ raise TypeError("equations must be a dictionary-like")
for key in equations:
if not isinstance(key, str):
- raise ValueError('Equation labels (keys) must be strings')
+ raise ValueError("Equation labels (keys) must be strings")
# Ensure nearly deterministic equation ordering
equations = _to_ordered_dict(equations)
@@ -366,8 +379,10 @@ def __init__(self, equations, *, sigma=None):
self._sigma = np.asarray(sigma)
k = len(self._equations)
if self._sigma.shape != (k, k):
- raise ValueError('sigma must be a square matrix with dimensions '
- 'equal to the number of equations')
+ raise ValueError(
+ "sigma must be a square matrix with dimensions "
+ "equal to the number of equations"
+ )
self._param_names = []
self._eq_labels = []
self._dependent = []
@@ -390,7 +405,7 @@ def __init__(self, equations, *, sigma=None):
self._has_constant = None
self._common_exog = False
self._original_index = None
- self._model_name = 'Three Stage Least Squares (3SLS)'
+ self._model_name = "Three Stage Least Squares (3SLS)"
self._validate_data()
@@ -408,22 +423,26 @@ def _validate_data(self):
for i, key in enumerate(self._equations):
self._eq_labels.append(key)
eq_data = self._equations[key]
- dep_name = 'dependent_' + str(i)
- exog_name = 'exog_' + str(i)
- endog_name = 'endog_' + str(i)
- instr_name = 'instr_' + str(i)
+ dep_name = "dependent_" + str(i)
+ exog_name = "exog_" + str(i)
+ endog_name = "endog_" + str(i)
+ instr_name = "instr_" + str(i)
if isinstance(eq_data, (tuple, list)):
dep = IVData(eq_data[0], var_name=dep_name)
self._dependent.append(dep)
current_id = id(eq_data[1])
- self._exog.append(IVData(eq_data[1], var_name=exog_name, nobs=dep.shape[0]))
+ self._exog.append(
+ IVData(eq_data[1], var_name=exog_name, nobs=dep.shape[0])
+ )
endog = IVData(eq_data[2], var_name=endog_name, nobs=dep.shape[0])
if endog.shape[1] > 0:
current_id = (current_id, id(eq_data[2]))
ids.append(current_id)
self._endog.append(endog)
- self._instr.append(IVData(eq_data[3], var_name=instr_name, nobs=dep.shape[0]))
+ self._instr.append(
+ IVData(eq_data[3], var_name=instr_name, nobs=dep.shape[0])
+ )
if len(eq_data) == 5:
self._weights.append(IVData(eq_data[4]))
else:
@@ -431,26 +450,26 @@ def _validate_data(self):
self._weights.append(IVData(np.ones_like(dep)))
elif isinstance(eq_data, (dict, Mapping)):
- dep = IVData(eq_data['dependent'], var_name=dep_name)
+ dep = IVData(eq_data["dependent"], var_name=dep_name)
self._dependent.append(dep)
- exog = eq_data.get('exog', None)
+ exog = eq_data.get("exog", None)
self._exog.append(IVData(exog, var_name=exog_name, nobs=dep.shape[0]))
current_id = id(exog)
- endog = eq_data.get('endog', None)
+ endog = eq_data.get("endog", None)
endog = IVData(endog, var_name=endog_name, nobs=dep.shape[0])
self._endog.append(endog)
- if 'endog' in eq_data:
- current_id = (current_id, id(eq_data['endog']))
+ if "endog" in eq_data:
+ current_id = (current_id, id(eq_data["endog"]))
ids.append(current_id)
- instr = eq_data.get('instruments', None)
+ instr = eq_data.get("instruments", None)
instr = IVData(instr, var_name=instr_name, nobs=dep.shape[0])
self._instr.append(instr)
- if 'weights' in eq_data:
- self._weights.append(IVData(eq_data['weights']))
+ if "weights" in eq_data:
+ self._weights.append(IVData(eq_data["weights"]))
else:
self._weights.append(IVData(np.ones(dep.shape)))
else:
@@ -460,14 +479,17 @@ def _validate_data(self):
for instr in self._instr:
self._has_instruments = self._has_instruments or (instr.shape[1] > 1)
- for i, comps in enumerate(zip(self._dependent, self._exog, self._endog, self._instr,
- self._weights)):
+ for i, comps in enumerate(
+ zip(self._dependent, self._exog, self._endog, self._instr, self._weights)
+ ):
shapes = list(map(lambda a: a.shape[0], comps))
if min(shapes) != max(shapes):
- raise ValueError('Dependent, exogenous, endogenous and '
- 'instruments, and weights, if provided, do '
- 'not have the same number of observations in '
- '{eq}'.format(eq=self._eq_labels[i]))
+ raise ValueError(
+ "Dependent, exogenous, endogenous and "
+ "instruments, and weights, if provided, do "
+ "not have the same number of observations in "
+ "{eq}".format(eq=self._eq_labels[i])
+ )
self._drop_missing()
self._common_exog = len(set(ids)) == 1
@@ -479,9 +501,14 @@ def _validate_data(self):
constant = []
constant_loc = []
- for dep, exog, endog, instr, w, label in zip(self._dependent, self._exog, self._endog,
- self._instr, self._weights,
- self._eq_labels):
+ for dep, exog, endog, instr, w, label in zip(
+ self._dependent,
+ self._exog,
+ self._endog,
+ self._instr,
+ self._weights,
+ self._eq_labels,
+ ):
y = dep.ndarray
x = np.concatenate([exog.ndarray, endog.ndarray], 1)
z = np.concatenate([exog.ndarray, instr.ndarray], 1)
@@ -496,29 +523,38 @@ def _validate_data(self):
self._wx.append(x * w_sqrt)
self._wz.append(z * w_sqrt)
cols = list(exog.cols) + list(endog.cols)
- self._param_names.extend([label + '_' + col for col in cols])
+ self._param_names.extend([label + "_" + col for col in cols])
if y.shape[0] <= x.shape[1]:
- raise ValueError('Fewer observations than variables in '
- 'equation {eq}'.format(eq=label))
+ raise ValueError(
+ "Fewer observations than variables in "
+ "equation {eq}".format(eq=label)
+ )
if matrix_rank(x) < x.shape[1]:
- raise ValueError('Equation {eq} regressor array is not full '
- 'rank'.format(eq=label))
+ raise ValueError(
+ "Equation {eq} regressor array is not full " "rank".format(eq=label)
+ )
if x.shape[1] > z.shape[1]:
- raise ValueError('Equation {eq} has fewer instruments than '
- 'endogenous variables.'.format(eq=label))
+ raise ValueError(
+ "Equation {eq} has fewer instruments than "
+ "endogenous variables.".format(eq=label)
+ )
if z.shape[1] > z.shape[0]:
- raise ValueError('Fewer observations than instruments in '
- 'equation {eq}'.format(eq=label))
+ raise ValueError(
+ "Fewer observations than instruments in "
+ "equation {eq}".format(eq=label)
+ )
if matrix_rank(z) < z.shape[1]:
- raise ValueError('Equation {eq} instrument array is full '
- 'rank'.format(eq=label))
+ raise ValueError(
+ "Equation {eq} instrument array is full " "rank".format(eq=label)
+ )
for rhs in self._x:
const, const_loc = has_constant(rhs)
constant.append(const)
constant_loc.append(const_loc)
- self._has_constant = Series(constant,
- index=[d.cols[0] for d in self._dependent])
+ self._has_constant = Series(
+ constant, index=[d.cols[0] for d in self._dependent]
+ )
self._constant_loc = constant_loc
def _drop_missing(self):
@@ -544,15 +580,15 @@ def _drop_missing(self):
self._weights[i].drop(missing)
def __repr__(self):
- return self.__str__() + '\nid: {0}'.format(hex(id(self)))
+ return self.__str__() + "\nid: {0}".format(hex(id(self)))
def __str__(self):
- out = self._model_name + ', '
- out += '{0} Equations:\n'.format(len(self._y))
- eqns = ', '.join(self._equations.keys())
- out += '\n'.join(textwrap.wrap(eqns, 70))
+ out = self._model_name + ", "
+ out += "{0} Equations:\n".format(len(self._y))
+ eqns = ", ".join(self._equations.keys())
+ out += "\n".join(textwrap.wrap(eqns, 70))
if self._common_exog:
- out += '\nCommon Exogenous Variables'
+ out += "\nCommon Exogenous Variables"
return out
def predict(self, params, *, equations=None, data=None, eval_env=8):
@@ -561,7 +597,7 @@ def predict(self, params, *, equations=None, data=None, eval_env=8):
Parameters
----------
- params : array-like
+ params : array_like
Model parameters (nvar by 1)
equations : dict
Dictionary-like structure containing exogenous and endogenous
@@ -608,10 +644,10 @@ def predict(self, params, *, equations=None, data=None, eval_env=8):
for i, label in enumerate(self._eq_labels):
kx = self._x[i].shape[1]
if label in equations:
- b = params[loc:loc + kx]
+ b = params[loc : loc + kx]
eqn = equations[label] # type: dict
- exog = eqn.get('exog', None)
- endog = eqn.get('endog', None)
+ exog = eqn.get("exog", None)
+ endog = eqn.get("endog", None)
if exog is None and endog is None:
loc += kx
continue
@@ -628,13 +664,25 @@ def predict(self, params, *, equations=None, data=None, eval_env=8):
fitted = DataFrame(fitted, index=exog_endog.index, columns=[label])
out[label] = fitted
loc += kx
- out = reduce(lambda left, right: left.merge(right, how='outer',
- left_index=True, right_index=True),
- [out[key] for key in out])
+ out = reduce(
+ lambda left, right: left.merge(
+ right, how="outer", left_index=True, right_index=True
+ ),
+ [out[key] for key in out],
+ )
return out
- def fit(self, *, method=None, full_cov=True, iterate=False, iter_limit=100, tol=1e-6,
- cov_type='robust', **cov_config):
+ def fit(
+ self,
+ *,
+ method=None,
+ full_cov=True,
+ iterate=False,
+ iter_limit=100,
+ tol=1e-6,
+ cov_type="robust",
+ **cov_config
+ ):
"""
Estimate model parameters
@@ -677,9 +725,14 @@ def fit(self, *, method=None, full_cov=True, iterate=False, iter_limit=100, tol=
linearmodels.system.covariance.HeteroskedasticCovariance
linearmodels.system.covariance.KernelCovariance
"""
+ if method is None:
+ method = (
+ "ols" if (self._common_exog and self._constraints is None) else "gls"
+ )
+
cov_type = cov_type.lower()
if cov_type not in COV_TYPES:
- raise ValueError('Unknown cov_type: {0}'.format(cov_type))
+ raise ValueError("Unknown cov_type: {0}".format(cov_type))
cov_type = COV_TYPES[cov_type]
k = len(self._dependent)
col_sizes = [0] + list(map(lambda v: v.shape[1], self._x))
@@ -688,18 +741,24 @@ def fit(self, *, method=None, full_cov=True, iterate=False, iter_limit=100, tol=
self._construct_xhat()
beta, eps = self._multivariate_ls_fit()
nobs = eps.shape[0]
- debiased = cov_config.get('debiased', False)
+ debiased = cov_config.get("debiased", False)
full_sigma = sigma = (eps.T @ eps / nobs) * self._sigma_scale(debiased)
- if (self._common_exog and method is None and self._constraints is None) or method == 'ols':
- return self._multivariate_ls_finalize(beta, eps, sigma, cov_type, **cov_config)
+
+ if method == "ols":
+ return self._multivariate_ls_finalize(
+ beta, eps, sigma, col_idx, total_cols, cov_type, **cov_config
+ )
beta_hist = [beta]
nobs = eps.shape[0]
iter_count = 0
delta = np.inf
- while ((iter_count < iter_limit and iterate) or iter_count == 0) and delta >= tol:
- beta, eps, sigma = self._gls_estimate(eps, nobs, total_cols, col_idx,
- full_cov, debiased)
+ while (
+ (iter_count < iter_limit and iterate) or iter_count == 0
+ ) and delta >= tol:
+ beta, eps, sigma, est_sigma = self._gls_estimate(
+ eps, nobs, total_cols, col_idx, full_cov, debiased
+ )
beta_hist.append(beta)
delta = beta_hist[-1] - beta_hist[-2]
delta = np.sqrt(np.mean(delta ** 2))
@@ -714,8 +773,18 @@ def fit(self, *, method=None, full_cov=True, iterate=False, iter_limit=100, tol=
x = blocked_diag_product(self._x, np.eye(k))
eps = y - x @ beta
- return self._gls_finalize(beta, sigma, full_sigma, gls_eps,
- eps, cov_type, iter_count, **cov_config)
+ return self._gls_finalize(
+ beta,
+ sigma,
+ full_sigma,
+ est_sigma,
+ gls_eps,
+ eps,
+ full_cov,
+ cov_type,
+ iter_count,
+ **cov_config
+ )
def _multivariate_ls_fit(self):
wy, wx, wxhat = self._wy, self._wx, self._wxhat
@@ -732,7 +801,7 @@ def _multivariate_ls_fit(self):
eps = []
for i in range(k):
nb = wx[i].shape[1]
- b = beta[loc:loc + nb]
+ b = beta[loc : loc + nb]
eps.append(wy[i] - wx[i] @ b)
loc += nb
eps = np.hstack(eps)
@@ -763,6 +832,7 @@ def _gls_estimate(self, eps, nobs, total_cols, ci, full_cov, debiased):
if sigma is None:
sigma = eps.T @ eps / nobs
sigma *= self._sigma_scale(debiased)
+ est_sigma = sigma
if not full_cov:
sigma = np.diag(np.diag(sigma))
@@ -776,7 +846,7 @@ def _gls_estimate(self, eps, nobs, total_cols, ci, full_cov, debiased):
sy = np.zeros((nobs, 1))
for j in range(k):
sy += sigma_inv[i, j] * wy[j]
- xpy[ci[i]:ci[i + 1]] = wxhat[i].T @ sy
+ xpy[ci[i] : ci[i + 1]] = wxhat[i].T @ sy
beta = _parameters_from_xprod(xpx, xpy, constraints=self.constraints)
@@ -785,22 +855,31 @@ def _gls_estimate(self, eps, nobs, total_cols, ci, full_cov, debiased):
_wx = wx[j]
_wy = wy[j]
kx = _wx.shape[1]
- eps[:, [j]] = _wy - _wx @ beta[loc:loc + kx]
+ eps[:, [j]] = _wy - _wx @ beta[loc : loc + kx]
loc += kx
- return beta, eps, sigma
+ return beta, eps, sigma, est_sigma
- def _multivariate_ls_finalize(self, beta, eps, sigma, cov_type, **cov_config):
+ def _multivariate_ls_finalize(
+ self, beta, eps, sigma, col_idx, total_cols, cov_type, **cov_config
+ ):
k = len(self._wx)
# Covariance estimation
cov_est = COV_EST[cov_type]
- cov_est = cov_est(self._wxhat, eps, sigma, sigma, gls=False,
- constraints=self._constraints, **cov_config)
+ cov_est = cov_est(
+ self._wxhat,
+ eps,
+ sigma,
+ sigma,
+ gls=False,
+ constraints=self._constraints,
+ **cov_config
+ )
cov = cov_est.cov
individual = AttrDict()
- debiased = cov_config.get('debiased', False)
+ debiased = cov_config.get("debiased", False)
for i in range(k):
wy = wye = self._wy[i]
w = self._w[i]
@@ -809,17 +888,33 @@ def _multivariate_ls_finalize(self, beta, eps, sigma, cov_type, **cov_config):
wc = np.ones_like(wy) * np.sqrt(w)
wye = wy - wc @ lstsq(wc, wy)[0]
total_ss = float(wye.T @ wye)
- stats = self._common_indiv_results(i, beta, cov, eps, eps, 'OLS',
- cov_type, cov_est, 0, debiased, cons, total_ss)
+ stats = self._common_indiv_results(
+ i,
+ beta,
+ cov,
+ eps,
+ eps,
+ "OLS",
+ cov_type,
+ cov_est,
+ 0,
+ debiased,
+ cons,
+ total_ss,
+ )
key = self._eq_labels[i]
individual[key] = stats
nobs = eps.size
- results = self._common_results(beta, cov, 'OLS', 0, nobs, cov_type,
- sigma, individual, debiased)
- results['wresid'] = results.resid
- results['cov_estimator'] = cov_est
- results['cov_config'] = cov_est.cov_config
+ results = self._common_results(
+ beta, cov, "OLS", 0, nobs, cov_type, sigma, individual, debiased
+ )
+ results["wresid"] = results.resid
+ results["cov_estimator"] = cov_est
+ results["cov_config"] = cov_est.cov_config
+ individual = results["individual"]
+ r2s = [individual[eq].r2 for eq in individual]
+ results["system_r2"] = self._system_r2(eps, sigma, "ols", False, debiased, r2s)
return SystemResults(results)
@@ -835,13 +930,13 @@ def multivariate_ls(cls, dependent, exog=None, endog=None, instruments=None):
Parameters
----------
- dependent : array-like
+ dependent : array_like
nobs by ndep array of dependent variables
- exog : array-like, optional
+ exog : array_like, optional
nobs by nexog array of exogenous regressors common to all models
- endog : array-like, optional
+ endog : array_like, optional
nobs by nendog array of endogenous regressors common to all models
- instruments : array-like, optional
+ instruments : array_like, optional
nobs by ninstr array of instruments to use in all equations
Returns
@@ -859,14 +954,19 @@ def multivariate_ls(cls, dependent, exog=None, endog=None, instruments=None):
exogenous, endogenous and instrumental variables.
"""
equations = OrderedDict()
- dependent = IVData(dependent, var_name='dependent')
+ dependent = IVData(dependent, var_name="dependent")
if exog is None and endog is None:
- raise ValueError('At least one of exog or endog must be provided')
- exog = IVData(exog, var_name='exog')
- endog = IVData(endog, var_name='endog', nobs=dependent.shape[0])
- instr = IVData(instruments, var_name='instruments', nobs=dependent.shape[0])
+ raise ValueError("At least one of exog or endog must be provided")
+ exog = IVData(exog, var_name="exog")
+ endog = IVData(endog, var_name="endog", nobs=dependent.shape[0])
+ instr = IVData(instruments, var_name="instruments", nobs=dependent.shape[0])
for col in dependent.pandas:
- equations[col] = (dependent.pandas[[col]], exog.pandas, endog.pandas, instr.pandas)
+ equations[col] = (
+ dependent.pandas[[col]],
+ exog.pandas,
+ endog.pandas,
+ instr.pandas,
+ )
return cls(equations)
@classmethod
@@ -882,7 +982,7 @@ def from_formula(cls, formula, data, *, sigma=None, weights=None):
description of the accepted syntax
data : DataFrame
Frame containing named variables
- sigma : array-like
+ sigma : array_like
Prespecified residual covariance to use in GLS estimation. If
not provided, FGLS is implemented based on an estimate of sigma.
weights : dict-like
@@ -942,96 +1042,114 @@ def _f_stat(self, stats, debiased):
params = stats.params[sel]
df = params.shape[0]
nobs = stats.nobs
- null = 'All parameters ex. constant are zero'
- name = 'Equation F-statistic'
+ null = "All parameters ex. constant are zero"
+ name = "Equation F-statistic"
try:
stat = float(params.T @ inv(cov) @ params)
except np.linalg.LinAlgError:
- return InvalidTestStatistic('Covariance is singular, possibly due '
- 'to constraints.', name=name)
+ return InvalidTestStatistic(
+ "Covariance is singular, possibly due " "to constraints.", name=name
+ )
if debiased:
total_reg = np.sum(list(map(lambda s: s.shape[1], self._wx)))
df_denom = len(self._wx) * nobs - total_reg
- wald = WaldTestStatistic(stat / df, null, df, df_denom=df_denom,
- name=name)
+ wald = WaldTestStatistic(stat / df, null, df, df_denom=df_denom, name=name)
else:
return WaldTestStatistic(stat, null=null, df=df, name=name)
return wald
- def _common_indiv_results(self, index, beta, cov, wresid, resid, method,
- cov_type, cov_est, iter_count, debiased, constant, total_ss,
- *, weight_est=None):
+ def _common_indiv_results(
+ self,
+ index,
+ beta,
+ cov,
+ wresid,
+ resid,
+ method,
+ cov_type,
+ cov_est,
+ iter_count,
+ debiased,
+ constant,
+ total_ss,
+ *,
+ weight_est=None
+ ):
loc = 0
for i in range(index):
loc += self._wx[i].shape[1]
i = index
stats = AttrDict()
# Static properties
- stats['eq_label'] = self._eq_labels[i]
- stats['dependent'] = self._dependent[i].cols[0]
- stats['instruments'] = self._instr[i].cols if self._instr[i].shape[1] > 0 else None
- stats['endog'] = self._endog[i].cols if self._endog[i].shape[1] > 0 else None
- stats['method'] = method
- stats['cov_type'] = cov_type
- stats['cov_estimator'] = cov_est
- stats['cov_config'] = cov_est.cov_config
- stats['weight_estimator'] = weight_est
- stats['index'] = self._dependent[i].rows
- stats['original_index'] = self._original_index
- stats['iter'] = iter_count
- stats['debiased'] = debiased
- stats['has_constant'] = bool(constant)
- stats['constant_loc'] = self._constant_loc[i]
+ stats["eq_label"] = self._eq_labels[i]
+ stats["dependent"] = self._dependent[i].cols[0]
+ stats["instruments"] = (
+ self._instr[i].cols if self._instr[i].shape[1] > 0 else None
+ )
+ stats["endog"] = self._endog[i].cols if self._endog[i].shape[1] > 0 else None
+ stats["method"] = method
+ stats["cov_type"] = cov_type
+ stats["cov_estimator"] = cov_est
+ stats["cov_config"] = cov_est.cov_config
+ stats["weight_estimator"] = weight_est
+ stats["index"] = self._dependent[i].rows
+ stats["original_index"] = self._original_index
+ stats["iter"] = iter_count
+ stats["debiased"] = debiased
+ stats["has_constant"] = bool(constant)
+ stats["constant_loc"] = self._constant_loc[i]
# Parameters, errors and measures of fit
wxi = self._wx[i]
nobs, df = wxi.shape
- b = beta[loc:loc + df]
+ b = beta[loc : loc + df]
e = wresid[:, [i]]
nobs = e.shape[0]
- df_c = (nobs - constant)
- df_r = (nobs - df)
-
- stats['params'] = b
- stats['cov'] = cov[loc:loc + df, loc:loc + df]
- stats['wresid'] = e
- stats['nobs'] = nobs
- stats['df_model'] = df
- stats['resid'] = resid[:, [i]]
- stats['fitted'] = self._x[i] @ b
- stats['resid_ss'] = float(resid[:, [i]].T @ resid[:, [i]])
- stats['total_ss'] = total_ss
- stats['r2'] = 1.0 - stats.resid_ss / stats.total_ss
- stats['r2a'] = 1.0 - (stats.resid_ss / df_r) / (stats.total_ss / df_c)
-
- names = self._param_names[loc:loc + df]
+ df_c = nobs - constant
+ df_r = nobs - df
+
+ stats["params"] = b
+ stats["cov"] = cov[loc : loc + df, loc : loc + df]
+ stats["wresid"] = e
+ stats["nobs"] = nobs
+ stats["df_model"] = df
+ stats["resid"] = resid[:, [i]]
+ stats["fitted"] = self._x[i] @ b
+ stats["resid_ss"] = float(resid[:, [i]].T @ resid[:, [i]])
+ stats["total_ss"] = total_ss
+ stats["r2"] = 1.0 - stats.resid_ss / stats.total_ss
+ stats["r2a"] = 1.0 - (stats.resid_ss / df_r) / (stats.total_ss / df_c)
+
+ names = self._param_names[loc : loc + df]
offset = len(stats.eq_label) + 1
- stats['param_names'] = [n[offset:] for n in names]
+ stats["param_names"] = [n[offset:] for n in names]
# F-statistic
- stats['f_stat'] = self._f_stat(stats, debiased)
+ stats["f_stat"] = self._f_stat(stats, debiased)
return stats
- def _common_results(self, beta, cov, method, iter_count, nobs, cov_type,
- sigma, individual, debiased):
+ def _common_results(
+ self, beta, cov, method, iter_count, nobs, cov_type, sigma, individual, debiased
+ ):
results = AttrDict()
- results['method'] = method
- results['iter'] = iter_count
- results['nobs'] = nobs
- results['cov_type'] = cov_type
- results['index'] = self._dependent[0].rows
- results['original_index'] = self._original_index
- results['sigma'] = sigma
- results['individual'] = individual
- results['params'] = beta
- results['df_model'] = beta.shape[0]
- results['param_names'] = self._param_names
- results['cov'] = cov
- results['debiased'] = debiased
+ results["method"] = method
+ results["iter"] = iter_count
+ results["nobs"] = nobs
+ results["cov_type"] = cov_type
+ results["index"] = self._dependent[0].rows
+ results["original_index"] = self._original_index
+ names = list(individual.keys())
+ results["sigma"] = DataFrame(sigma, columns=names, index=names)
+ results["individual"] = individual
+ results["params"] = beta
+ results["df_model"] = beta.shape[0]
+ results["param_names"] = self._param_names
+ results["cov"] = cov
+ results["debiased"] = debiased
total_ss = resid_ss = 0.0
resid = []
@@ -1041,12 +1159,12 @@ def _common_results(self, beta, cov, method, iter_count, nobs, cov_type,
resid.append(individual[key].resid)
resid = np.hstack(resid)
- results['resid_ss'] = resid_ss
- results['total_ss'] = total_ss
- results['r2'] = 1.0 - results.resid_ss / results.total_ss
- results['resid'] = resid
- results['constraints'] = self._constraints
- results['model'] = self
+ results["resid_ss"] = resid_ss
+ results["total_ss"] = total_ss
+ results["r2"] = 1.0 - results.resid_ss / results.total_ss
+ results["resid"] = resid
+ results["constraints"] = self._constraints
+ results["model"] = self
x = self._x
k = len(x)
@@ -1054,17 +1172,80 @@ def _common_results(self, beta, cov, method, iter_count, nobs, cov_type,
fitted = []
for i in range(k):
nb = x[i].shape[1]
- b = beta[loc:loc + nb]
+ b = beta[loc : loc + nb]
fitted.append(x[i] @ b)
loc += nb
fitted = np.hstack(fitted)
- results['fitted'] = fitted
+ results["fitted"] = fitted
return results
- def _gls_finalize(self, beta, sigma, full_sigma, gls_eps, eps,
- cov_type, iter_count, **cov_config):
+ def _system_r2(self, eps, sigma, method, full_cov, debiased, r2s):
+ sigma_resid = sigma
+
+ # System regression on a constant using weights if provided
+ wy, w = self._wy, self._w
+ wi = [np.sqrt(weights) for weights in w]
+ if method == "ols":
+ est_sigma = np.eye(len(wy))
+ else: # gls
+ est_sigma = sigma
+ if not full_cov:
+ est_sigma = np.diag(np.diag(est_sigma))
+ est_sigma_inv = inv(est_sigma)
+ nobs = wy[0].shape[0]
+ k = len(wy)
+ xpx = blocked_inner_prod(wi, est_sigma_inv)
+ xpy = np.zeros((k, 1))
+ for i in range(k):
+ sy = np.zeros((nobs, 1))
+ for j in range(k):
+ sy += est_sigma_inv[i, j] * wy[j]
+ xpy[i : (i + 1)] = wi[i].T @ sy
+
+ mu = _parameters_from_xprod(xpx, xpy)
+ eps_const = np.hstack([self._y[j] - mu[j] for j in range(k)])
+ # Judge
+ judge = 1 - (eps ** 2).sum() / (eps_const ** 2).sum()
+ # Dhrymes
+ tot_eps_const_sq = (eps_const ** 2).sum(0)
+ r2s = np.asarray(r2s)
+ dhrymes = (r2s * tot_eps_const_sq).sum() / tot_eps_const_sq.sum()
+
+ # Berndt
+ sigma_y = (eps_const.T @ eps_const / nobs) * self._sigma_scale(debiased)
+ berndt = np.nan
+ # Avoid division by 0
+ if np.linalg.det(sigma_y) > 0:
+ berndt = 1 - np.linalg.det(sigma_resid) / np.linalg.det(sigma_y)
+
+ mcelroy = np.nan
+ # Check that the matrix is invertible
+ if np.linalg.matrix_rank(sigma) == sigma.shape[0]:
+ # McElroy
+ sigma_m12 = inv_matrix_sqrt(sigma)
+ std_eps = eps @ sigma_m12
+ numerator = (std_eps ** 2).sum()
+ std_eps_const = eps_const @ sigma_m12
+ denom = (std_eps_const ** 2).sum()
+ mcelroy = 1.0 - numerator / denom
+ r2 = dict(mcelroy=mcelroy, berndt=berndt, judge=judge, dhrymes=dhrymes)
+ return Series(r2)
+
+ def _gls_finalize(
+ self,
+ beta,
+ sigma,
+ full_sigma,
+ est_sigma,
+ gls_eps,
+ eps,
+ full_cov,
+ cov_type,
+ iter_count,
+ **cov_config
+ ):
"""Collect results to return after GLS estimation"""
k = len(self._wy)
@@ -1072,14 +1253,21 @@ def _gls_finalize(self, beta, sigma, full_sigma, gls_eps, eps,
cov_est = COV_EST[cov_type]
gls_eps = np.reshape(gls_eps, (k, gls_eps.shape[0] // k)).T
eps = np.reshape(eps, (k, eps.shape[0] // k)).T
- cov_est = cov_est(self._wxhat, gls_eps, sigma, full_sigma, gls=True,
- constraints=self._constraints, **cov_config)
+ cov_est = cov_est(
+ self._wxhat,
+ gls_eps,
+ sigma,
+ full_sigma,
+ gls=True,
+ constraints=self._constraints,
+ **cov_config
+ )
cov = cov_est.cov
# Repackage results for individual equations
individual = AttrDict()
- debiased = cov_config.get('debiased', False)
- method = 'Iterative GLS' if iter_count > 1 else 'GLS'
+ debiased = cov_config.get("debiased", False)
+ method = "Iterative GLS" if iter_count > 1 else "GLS"
for i in range(k):
cons = int(self.has_constant.iloc[i])
@@ -1089,25 +1277,50 @@ def _gls_finalize(self, beta, sigma, full_sigma, gls_eps, eps,
else:
ye = self._wy[i]
total_ss = float(ye.T @ ye)
- stats = self._common_indiv_results(i, beta, cov, gls_eps, eps,
- method, cov_type, cov_est, iter_count,
- debiased, cons, total_ss)
+ stats = self._common_indiv_results(
+ i,
+ beta,
+ cov,
+ gls_eps,
+ eps,
+ method,
+ cov_type,
+ cov_est,
+ iter_count,
+ debiased,
+ cons,
+ total_ss,
+ )
key = self._eq_labels[i]
individual[key] = stats
# Populate results dictionary
nobs = eps.size
- results = self._common_results(beta, cov, method, iter_count, nobs,
- cov_type, sigma, individual, debiased)
+ results = self._common_results(
+ beta,
+ cov,
+ method,
+ iter_count,
+ nobs,
+ cov_type,
+ est_sigma,
+ individual,
+ debiased,
+ )
# wresid is different between GLS and OLS
wresid = []
for key in individual:
wresid.append(individual[key].wresid)
wresid = np.hstack(wresid)
- results['wresid'] = wresid
- results['cov_estimator'] = cov_est
- results['cov_config'] = cov_est.cov_config
+ results["wresid"] = wresid
+ results["cov_estimator"] = cov_est
+ results["cov_config"] = cov_est.cov_config
+ individual = results["individual"]
+ r2s = [individual[eq].r2 for eq in individual]
+ results["system_r2"] = self._system_r2(
+ eps, sigma, "gls", full_cov, debiased, r2s
+ )
return SystemResults(results)
@@ -1153,8 +1366,9 @@ def add_constraints(self, r, q=None):
The property `param_names` can be used to determine the order of
parameters.
"""
- self._constraints = LinearConstraint(r, q=q, num_params=len(self._param_names),
- require_pandas=True)
+ self._constraints = LinearConstraint(
+ r, q=q, num_params=len(self._param_names), require_pandas=True
+ )
def reset_constraints(self):
"""Remove all model constraints"""
@@ -1185,7 +1399,7 @@ class SUR(IV3SLS):
value must be either a tuple of the form (dependent,
exog, [weights]) or a dictionary with keys 'dependent' and 'exog' and
the optional key 'weights'.
- sigma : array-like
+ sigma : array_like
Prespecified residual covariance to use in GLS estimation. If not
provided, FGLS is implemented based on an estimate of sigma.
@@ -1251,10 +1465,10 @@ class SUR(IV3SLS):
def __init__(self, equations, *, sigma=None):
if not isinstance(equations, Mapping):
- raise TypeError('equations must be a dictionary-like')
+ raise TypeError("equations must be a dictionary-like")
for key in equations:
if not isinstance(key, str):
- raise ValueError('Equation labels (keys) must be strings')
+ raise ValueError("Equation labels (keys) must be strings")
reformatted = equations.__class__()
for key in equations:
eqn = equations[key]
@@ -1267,7 +1481,7 @@ def __init__(self, equations, *, sigma=None):
eqn = eqn + (None, None)
reformatted[key] = eqn
super(SUR, self).__init__(reformatted, sigma=sigma)
- self._model_name = 'Seemingly Unrelated Regression (SUR)'
+ self._model_name = "Seemingly Unrelated Regression (SUR)"
@classmethod
def multivariate_ls(cls, dependent, exog):
@@ -1276,9 +1490,9 @@ def multivariate_ls(cls, dependent, exog):
Parameters
----------
- dependent : array-like
+ dependent : array_like
nobs by ndep array of dependent variables
- exog : array-like
+ exog : array_like
nobs by nvar array of exogenous regressors common to all models
Returns
@@ -1306,8 +1520,8 @@ def multivariate_ls(cls, dependent, exog):
>>> mod = SUR.multivariate_ls(portfolios, factors)
"""
equations = OrderedDict()
- dependent = IVData(dependent, var_name='dependent')
- exog = IVData(exog, var_name='exog')
+ dependent = IVData(dependent, var_name="dependent")
+ exog = IVData(exog, var_name="exog")
for col in dependent.pandas:
equations[col] = (dependent.pandas[[col]], exog.pandas)
return cls(equations)
@@ -1325,7 +1539,7 @@ def from_formula(cls, formula, data, *, sigma=None, weights=None):
description of the accepted syntax
data : DataFrame
Frame containing named variables
- sigma : array-like
+ sigma : array_like
Prespecified residual covariance to use in GLS estimation. If
not provided, FGLS is implemented based on an estimate of sigma.
weights : dict-like
@@ -1388,7 +1602,7 @@ class IVSystemGMM(IV3SLS):
'exog'. The dictionary may contain optional keys for 'endog',
'instruments', and 'weights'. Endogenous and/or Instrument can be empty
if all variables in an equation are exogenous.
- sigma : array-like
+ sigma : array_like
Prespecified residual covariance to use in GLS estimation. If not
provided, FGLS is implemented based on an estimate of sigma. Only used
if weight_type is 'unadjusted'
@@ -1448,24 +1662,35 @@ class IVSystemGMM(IV3SLS):
where :math:`W` is a positive definite weighting matrix.
"""
- def __init__(self, equations, *, sigma=None, weight_type='robust', **weight_config):
+ def __init__(self, equations, *, sigma=None, weight_type="robust", **weight_config):
super().__init__(equations, sigma=sigma)
self._weight_type = weight_type
self._weight_config = weight_config
if weight_type not in COV_TYPES:
- raise ValueError('Unknown estimator for weight_type')
+ raise ValueError("Unknown estimator for weight_type")
- if weight_type not in ('unadjusted', 'homoskedastic') and sigma is not None:
+ if weight_type not in ("unadjusted", "homoskedastic") and sigma is not None:
import warnings
- warnings.warn('sigma has been provided but the estimated weight '
- 'matrix not unadjusted (homoskedastic). sigma will '
- 'be ignored.', UserWarning)
+
+ warnings.warn(
+ "sigma has been provided but the estimated weight "
+ "matrix not unadjusted (homoskedastic). sigma will "
+ "be ignored.",
+ UserWarning,
+ )
weight_type = COV_TYPES[weight_type]
self._weight_est = GMM_W_EST[weight_type](**weight_config)
- def fit(self, *, iter_limit=2, tol=1e-6, initial_weight=None,
- cov_type='robust', **cov_config):
+ def fit(
+ self,
+ *,
+ iter_limit=2,
+ tol=1e-6,
+ initial_weight=None,
+ cov_type="robust",
+ **cov_config
+ ):
"""
Estimate model parameters
@@ -1496,7 +1721,7 @@ def fit(self, *, iter_limit=2, tol=1e-6, initial_weight=None,
Estimation results
"""
if cov_type not in COV_TYPES:
- raise ValueError('Unknown cov_type: {0}'.format(cov_type))
+ raise ValueError("Unknown cov_type: {0}".format(cov_type))
# Parameter estimation
wx, wy, wz = self._wx, self._wy, self._wz
k = len(wx)
@@ -1506,12 +1731,14 @@ def fit(self, *, iter_limit=2, tol=1e-6, initial_weight=None,
w = blocked_inner_prod(wz, np.eye(k_total)) / nobs
else:
w = initial_weight
- beta_last = beta = self._blocked_gmm(wx, wy, wz, w=w, constraints=self.constraints)
+ beta_last = beta = self._blocked_gmm(
+ wx, wy, wz, w=w, constraints=self.constraints
+ )
eps = []
loc = 0
for i in range(k):
nb = wx[i].shape[1]
- b = beta[loc:loc + nb]
+ b = beta[loc : loc + nb]
eps.append(wy[i] - wx[i] @ b)
loc += nb
eps = np.hstack(eps)
@@ -1520,7 +1747,9 @@ def fit(self, *, iter_limit=2, tol=1e-6, initial_weight=None,
iters = 1
norm = 10 * tol + 1
while iters < iter_limit and norm > tol:
- sigma = self._weight_est.sigma(eps, wx) if self._sigma is None else self._sigma
+ sigma = (
+ self._weight_est.sigma(eps, wx) if self._sigma is None else self._sigma
+ )
w = self._weight_est.weight_matrix(wx, wz, eps, sigma=sigma)
beta = self._blocked_gmm(wx, wy, wz, w=w, constraints=self.constraints)
delta = beta_last - beta
@@ -1537,7 +1766,7 @@ def fit(self, *, iter_limit=2, tol=1e-6, initial_weight=None,
loc = 0
for i in range(k):
nb = wx[i].shape[1]
- b = beta[loc:loc + nb]
+ b = beta[loc : loc + nb]
eps.append(wy[i] - wx[i] @ b)
loc += nb
eps = np.hstack(eps)
@@ -1545,7 +1774,9 @@ def fit(self, *, iter_limit=2, tol=1e-6, initial_weight=None,
cov_type = COV_TYPES[cov_type]
cov_est = GMM_COV_EST[cov_type]
- cov = cov_est(wx, wz, eps, w, sigma=sigma, constraints=self._constraints, **cov_config)
+ cov = cov_est(
+ wx, wz, eps, w, sigma=sigma, constraints=self._constraints, **cov_config
+ )
weps = eps
eps = []
@@ -1553,13 +1784,14 @@ def fit(self, *, iter_limit=2, tol=1e-6, initial_weight=None,
x, y = self._x, self._y
for i in range(k):
nb = x[i].shape[1]
- b = beta[loc:loc + nb]
+ b = beta[loc : loc + nb]
eps.append(y[i] - x[i] @ b)
loc += nb
eps = np.hstack(eps)
iters += 1
- return self._finalize_results(beta, cov.cov, weps, eps, w, sigma,
- iters - 1, cov_type, cov_config, cov)
+ return self._finalize_results(
+ beta, cov.cov, weps, eps, w, sigma, iters - 1, cov_type, cov_config, cov
+ )
@staticmethod
def _blocked_gmm(x, y, z, *, w=None, constraints=None):
@@ -1576,16 +1808,27 @@ def _blocked_gmm(x, y, z, *, w=None, constraints=None):
return params
- def _finalize_results(self, beta, cov, weps, eps, wmat, sigma,
- iter_count, cov_type, cov_config, cov_est):
+ def _finalize_results(
+ self,
+ beta,
+ cov,
+ weps,
+ eps,
+ wmat,
+ sigma,
+ iter_count,
+ cov_type,
+ cov_config,
+ cov_est,
+ ):
"""Collect results to return after GLS estimation"""
k = len(self._wy)
# Repackage results for individual equations
individual = AttrDict()
- debiased = cov_config.get('debiased', False)
- method = '{0}-Step System GMM'.format(iter_count)
+ debiased = cov_config.get("debiased", False)
+ method = "{0}-Step System GMM".format(iter_count)
if iter_count > 2:
- method = 'Iterative System GMM'
+ method = "Iterative System GMM"
for i in range(k):
cons = int(self.has_constant.iloc[i])
@@ -1595,37 +1838,53 @@ def _finalize_results(self, beta, cov, weps, eps, wmat, sigma,
else:
ye = self._wy[i]
total_ss = float(ye.T @ ye)
- stats = self._common_indiv_results(i, beta, cov, weps, eps,
- method, cov_type, cov_est,
- iter_count, debiased, cons, total_ss,
- weight_est=self._weight_est)
+ stats = self._common_indiv_results(
+ i,
+ beta,
+ cov,
+ weps,
+ eps,
+ method,
+ cov_type,
+ cov_est,
+ iter_count,
+ debiased,
+ cons,
+ total_ss,
+ weight_est=self._weight_est,
+ )
key = self._eq_labels[i]
individual[key] = stats
# Populate results dictionary
nobs = eps.size
- results = self._common_results(beta, cov, method, iter_count, nobs,
- cov_type, sigma, individual, debiased)
+ results = self._common_results(
+ beta, cov, method, iter_count, nobs, cov_type, sigma, individual, debiased
+ )
# wresid is different between GLS and OLS
wresid = []
for key in individual:
wresid.append(individual[key].wresid)
wresid = np.hstack(wresid)
- results['wresid'] = wresid
- results['wmat'] = wmat
- results['weight_type'] = self._weight_type
- results['weight_config'] = self._weight_est.config
- results['cov_estimator'] = cov_est
- results['cov_config'] = cov_est.cov_config
- results['weight_estimator'] = self._weight_est
- results['j_stat'] = self._j_statistic(beta, wmat)
+ results["wresid"] = wresid
+ results["wmat"] = wmat
+ results["weight_type"] = self._weight_type
+ results["weight_config"] = self._weight_est.config
+ results["cov_estimator"] = cov_est
+ results["cov_config"] = cov_est.cov_config
+ results["weight_estimator"] = self._weight_est
+ results["j_stat"] = self._j_statistic(beta, wmat)
+ r2s = [individual[eq].r2 for eq in individual]
+ results["system_r2"] = self._system_r2(eps, sigma, "gls", False, debiased, r2s)
return GMMSystemResults(results)
@classmethod
- def from_formula(cls, formula, data, *, weights=None, weight_type='robust', **weight_config):
+ def from_formula(
+ cls, formula, data, *, weights=None, weight_type="robust", **weight_config
+ ):
"""
Specify a 3SLS using the formula interface
@@ -1721,7 +1980,7 @@ def _j_statistic(self, params, weight_mat):
idx = 0
for i in range(k):
kx = x[i].shape[1]
- beta = params[idx:idx + kx]
+ beta = params[idx : idx + kx]
eps = y[i] - x[i] @ beta
ze.append(z[i] * eps)
idx += kx
@@ -1729,7 +1988,7 @@ def _j_statistic(self, params, weight_mat):
g_bar = ze.mean(0)
nobs = x[0].shape[0]
stat = float(nobs * g_bar.T @ np.linalg.inv(weight_mat) @ g_bar.T)
- null = 'Expected moment conditions are equal to 0'
+ null = "Expected moment conditions are equal to 0"
ninstr = sum(map(lambda a: a.shape[1], z))
nvar = sum(map(lambda a: a.shape[1], x))
ncons = 0 if self.constraints is None else self.constraints.r.shape[0]
diff --git a/linearmodels/system/results.py b/linearmodels/system/results.py
index b4724633b2..475c24b6b3 100644
--- a/linearmodels/system/results.py
+++ b/linearmodels/system/results.py
@@ -2,16 +2,16 @@
import datetime as dt
-from property_cached import cached_property
import numpy as np
from pandas import DataFrame, Series, concat
+from property_cached import cached_property
from scipy import stats
from statsmodels.iolib.summary import SimpleTable, fmt_2cols
from linearmodels.utility import (AttrDict, _str, _SummaryStr, format_wide,
param_table, pval_format)
-__all__ = ['SystemResults', 'SystemEquationResult', 'GMMSystemResults']
+__all__ = ["SystemResults", "SystemEquationResult", "GMMSystemResults"]
class _CommonResults(_SummaryStr):
@@ -46,8 +46,7 @@ def method(self):
@property
def cov(self):
"""Estimated covariance of parameters"""
- return DataFrame(self._cov, index=self._param_names,
- columns=self._param_names)
+ return DataFrame(self._cov, index=self._param_names, columns=self._param_names)
@property
def cov_estimator(self):
@@ -72,18 +71,18 @@ def debiased(self):
@property
def params(self):
"""Estimated parameters"""
- return Series(self._params.squeeze(), index=self._param_names, name='params')
+ return Series(self._params.squeeze(), index=self._param_names, name="params")
@property
def std_errors(self):
"""Estimated parameter standard errors"""
std_errors = np.sqrt(np.diag(self.cov))
- return Series(std_errors, index=self._param_names, name='stderr')
+ return Series(std_errors, index=self._param_names, name="stderr")
@property
def tstats(self):
"""Parameter t-statistics"""
- return Series(self.params / self.std_errors, name='tstat')
+ return Series(self.params / self.std_errors, name="tstat")
@cached_property
def pvalues(self):
@@ -95,11 +94,33 @@ def pvalues(self):
else:
pvals = 2 - 2 * stats.norm.cdf(np.abs(self.tstats))
- return Series(pvals, index=self._param_names, name='pvalue')
+ return Series(pvals, index=self._param_names, name="pvalue")
@property
def rsquared(self):
- """Coefficient of determination (R**2)"""
+ r"""
+ Coefficient of determination (R2)
+
+ Returns
+ -------
+ float
+ The coefficient of determinations.
+
+ Notes
+ -----
+ The overall R2 is similar to Judge's system R2 since no weighting is
+ used. These two only differ if one or more equations do not include
+ constants. It is defined as
+
+ .. math::
+
+ 1 - \frac{\sum_i \sum_j \hat{\epsilon}_{ij}^2}{\sum_i \sum_j \hat{\eta}_{ij}^2}
+
+ where :math:`\eta` is the residual from a regression on only a
+ constant. Note that if a constant is not present in an equation
+ then the term in the denominator is **not** demeaned so that
+ :math:`\hat{\eta}_{ij}=y_{ij}`.
+ """
return self._r2
@property
@@ -143,7 +164,7 @@ def conf_int(self, level=0.95):
Returns
-------
- ci : DataFrame
+ DataFrame
Confidence interval of the form [lower, upper] for each parameters
Notes
@@ -157,7 +178,7 @@ def conf_int(self, level=0.95):
q = stats.norm.ppf(ci_quantiles)
q = q[None, :]
ci = self.params[:, None] + self.std_errors[:, None] * q
- return DataFrame(ci, index=self._param_names, columns=['lower', 'upper'])
+ return DataFrame(ci, index=self._param_names, columns=["lower", "upper"])
class SystemResults(_CommonResults):
@@ -175,13 +196,14 @@ def __init__(self, results):
self._individual = AttrDict()
for key in results.individual:
self._individual[key] = SystemEquationResult(results.individual[key])
+ self._system_r2 = results.system_r2
self._sigma = results.sigma
self._model = results.model
self._constraints = results.constraints
- self._num_constraints = 'None'
+ self._num_constraints = "None"
if results.constraints is not None:
self._num_constraints = str(results.constraints.r.shape[0])
- self._weight_estimtor = results.get('weight_estimator', None)
+ self._weight_estimtor = results.get("weight_estimator", None)
@property
def model(self):
@@ -210,21 +232,33 @@ def fitted_values(self):
def _out_of_sample(self, equations, data, missing, dataframe):
if equations is not None and data is not None:
- raise ValueError('Predictions can only be constructed using one '
- 'of eqns or data, but not both.')
- pred = self.model.predict(self.params, equations=equations, data=data) # type: DataFrame
+ raise ValueError(
+ "Predictions can only be constructed using one "
+ "of eqns or data, but not both."
+ )
+ pred = self.model.predict(
+ self.params, equations=equations, data=data
+ ) # type: DataFrame
if not dataframe:
pred = {col: pred[[col]] for col in pred}
if not missing:
for key in pred:
pred[key] = pred[key].dropna()
else:
- pred = pred.dropna(how='all', axis=1)
+ pred = pred.dropna(how="all", axis=1)
return pred
- def predict(self, equations=None, *, data=None, fitted=True,
- idiosyncratic=False, missing=False, dataframe=False):
+ def predict(
+ self,
+ equations=None,
+ *,
+ data=None,
+ fitted=True,
+ idiosyncratic=False,
+ missing=False,
+ dataframe=False
+ ):
"""
In- and out-of-sample predictions
@@ -254,7 +288,7 @@ def predict(self, equations=None, *, data=None, fitted=True,
Returns
-------
- predictions : DataFrame, dict
+ predictions : {DataFrame, dict}
DataFrame or dictionary containing selected outputs
Notes
@@ -278,14 +312,17 @@ def predict(self, equations=None, *, data=None, fitted=True,
if equations is not None or data is not None:
return self._out_of_sample(equations, data, missing, dataframe)
if not (fitted or idiosyncratic):
- raise ValueError('At least one output must be selected')
+ raise ValueError("At least one output must be selected")
if dataframe:
if fitted and not idiosyncratic:
out = self.fitted_values
elif idiosyncratic and not fitted:
out = self.resids
else:
- out = {'fitted_values': self.fitted_values, 'idiosyncratic': self.resids}
+ out = {
+ "fitted_values": self.fitted_values,
+ "idiosyncratic": self.resids,
+ }
else:
out = {}
for key in self.equation_labels:
@@ -314,6 +351,73 @@ def sigma(self):
"""Estimated residual covariance"""
return self._sigma
+ @property
+ def system_rsquared(self):
+ r"""
+ Alternative measure of system fit
+
+ Returns
+ -------
+ Series
+ The measures of overall system fit.
+
+ Notes
+ -----
+ McElroy's R2 is defined as
+
+ .. math::
+
+ 1 - \frac{SSR_{\Omega}}{TSS_{\Omega}}
+
+ where
+
+ .. math::
+
+ SSR_{\Omega} = \hat{\epsilon}^\prime\hat{\Omega}^{-1}\hat{\epsilon}
+
+ and
+
+ .. math::
+
+ TSS_{\Omega} = \hat{\eta}^\prime\hat{\Omega}^{-1}\hat{\eta}
+
+ where :math:`\eta` is the residual from a regression on only a constant.
+
+ Judge's system R2 is defined as
+
+ .. math::
+
+ 1 - \frac{\sum_i \sum_j \hat{\epsilon}_ij^2}{\sum_i \sum_j \hat{\eta}_ij^2}
+
+ where :math:`\eta` is the residual from a regression on only a constant.
+
+ Berndt's system R2 is defined as
+
+ .. math::
+
+ 1 - \frac{|\hat{\Sigma}_\epsilon|}{|\hat{\Sigma}_\eta|}
+
+ where :math:`\hat{\Sigma}_\epsilon` and :math:`\hat{\Sigma}_\eta` are the
+ estimated covariances :math:`\epsilon` and :math:`\eta`, respectively.
+
+ Dhrymes's system R2 is defined as a weighted average of the R2 of each
+ equation
+
+ .. math::
+
+ \sum__i w_i R^2_i
+
+ where the weight is
+
+ .. math::
+
+ w_i = \frac{\hat{\Sigma}_{\eta}^{[ii]}}{\tr{\hat{\Sigma}_{\eta}}}
+
+ the ratio of the variance the dependent in an equation to the total
+ variance of all dependent variables.
+ """
+ return self._system_r2
+
@property
def summary(self):
""":obj:`statsmodels.iolib.summary.Summary` : Summary table of model estimation results
@@ -322,23 +426,27 @@ def summary(self):
``summary.as_html()`` and ``summary.as_latex()``.
"""
- title = 'System ' + self._method + ' Estimation Summary'
-
- top_left = [('Estimator:', self._method),
- ('No. Equations.:', str(len(self.equation_labels))),
- ('No. Observations:', str(self.resids.shape[0])),
- ('Date:', self._datetime.strftime('%a, %b %d %Y')),
- ('Time:', self._datetime.strftime('%H:%M:%S')),
- ('', ''),
- ('', '')]
-
- top_right = [('Overall R-squared:', _str(self.rsquared)),
- ('Cov. Estimator:', self._cov_type),
- ('Num. Constraints: ', self._num_constraints),
- ('', ''),
- ('', ''),
- ('', ''),
- ('', '')]
+ title = "System " + self._method + " Estimation Summary"
+
+ top_left = [
+ ("Estimator:", self._method),
+ ("No. Equations.:", str(len(self.equation_labels))),
+ ("No. Observations:", str(self.resids.shape[0])),
+ ("Date:", self._datetime.strftime("%a, %b %d %Y")),
+ ("Time:", self._datetime.strftime("%H:%M:%S")),
+ ("", ""),
+ ("", ""),
+ ]
+
+ top_right = [
+ ("Overall R-squared:", _str(self.rsquared)),
+ ("McElroy's R-squared:", _str(self.system_rsquared.mcelroy)),
+ ("Judge's (OLS) R-squared:", _str(self.system_rsquared.judge)),
+ ("Berndt's R-squared:", _str(self.system_rsquared.berndt)),
+ ("Dhrymes's R-squared:", _str(self.system_rsquared.dhrymes)),
+ ("Cov. Estimator:", self._cov_type),
+ ("Num. Constraints: ", self._num_constraints),
+ ]
stubs = []
vals = []
@@ -352,9 +460,9 @@ def summary(self):
# Top Table
# Parameter table
fmt = fmt_2cols
- fmt['data_fmts'][1] = '%10s'
+ fmt["data_fmts"][1] = "%10s"
- top_right = [('%-21s' % (' ' + k), v) for k, v in top_right]
+ top_right = [("%-21s" % (" " + k), v) for k, v in top_right]
stubs = []
vals = []
for stub, val in top_right:
@@ -367,20 +475,20 @@ def summary(self):
last_row = i == (len(self.equation_labels) - 1)
results = self.equations[eqlabel]
dep_name = results.dependent
- title = 'Equation: {0}, Dependent Variable: {1}'.format(eqlabel, dep_name)
+ title = "Equation: {0}, Dependent Variable: {1}".format(eqlabel, dep_name)
pad_bottom = results.instruments is not None and not last_row
smry.tables.append(param_table(results, title, pad_bottom=pad_bottom))
if results.instruments:
formatted = format_wide(results.instruments, 80)
if not last_row:
- formatted.append([' '])
- smry.tables.append(SimpleTable(formatted, headers=['Instruments']))
- extra_text = ['Covariance Estimator:']
- for line in str(self._cov_estimator).split('\n'):
+ formatted.append([" "])
+ smry.tables.append(SimpleTable(formatted, headers=["Instruments"]))
+ extra_text = ["Covariance Estimator:"]
+ for line in str(self._cov_estimator).split("\n"):
extra_text.append(line)
if self._weight_estimtor:
- extra_text.append('Weight Estimator:')
- for line in str(self._weight_estimtor).split('\n'):
+ extra_text.append("Weight Estimator:")
+ for line in str(self._weight_estimtor).split("\n"):
extra_text.append(line)
smry.add_extra_txt(extra_text)
@@ -405,7 +513,7 @@ def __init__(self, results):
self._r2a = results.r2a
self._instruments = results.instruments
self._endog = results.endog
- self._weight_estimator = results.get('weight_estimator', None)
+ self._weight_estimator = results.get("weight_estimator", None)
@property
def equation_label(self):
@@ -430,24 +538,27 @@ def summary(self):
``summary.as_html()`` and ``summary.as_latex()``.
"""
- title = self._method + ' Estimation Summary'
-
- top_left = [('Eq. Label:', self.equation_label),
- ('Dep. Variable:', self.dependent),
- ('Estimator:', self._method),
- ('No. Observations:', self.nobs),
- ('Date:', self._datetime.strftime('%a, %b %d %Y')),
- ('Time:', self._datetime.strftime('%H:%M:%S')),
-
- ('', '')]
-
- top_right = [('R-squared:', _str(self.rsquared)),
- ('Adj. R-squared:', _str(self.rsquared_adj)),
- ('Cov. Estimator:', self._cov_type),
- ('F-statistic:', _str(self.f_statistic.stat)),
- ('P-value (F-stat)', pval_format(self.f_statistic.pval)),
- ('Distribution:', str(self.f_statistic.dist_name)),
- ('', '')]
+ title = self._method + " Estimation Summary"
+
+ top_left = [
+ ("Eq. Label:", self.equation_label),
+ ("Dep. Variable:", self.dependent),
+ ("Estimator:", self._method),
+ ("No. Observations:", self.nobs),
+ ("Date:", self._datetime.strftime("%a, %b %d %Y")),
+ ("Time:", self._datetime.strftime("%H:%M:%S")),
+ ("", ""),
+ ]
+
+ top_right = [
+ ("R-squared:", _str(self.rsquared)),
+ ("Adj. R-squared:", _str(self.rsquared_adj)),
+ ("Cov. Estimator:", self._cov_type),
+ ("F-statistic:", _str(self.f_statistic.stat)),
+ ("P-value (F-stat)", pval_format(self.f_statistic.pval)),
+ ("Distribution:", str(self.f_statistic.dist_name)),
+ ("", ""),
+ ]
stubs = []
vals = []
@@ -461,9 +572,9 @@ def summary(self):
# Top Table
# Parameter table
fmt = fmt_2cols
- fmt['data_fmts'][1] = '%10s'
+ fmt["data_fmts"][1] = "%10s"
- top_right = [('%-21s' % (' ' + k), v) for k, v in top_right]
+ top_right = [("%-21s" % (" " + k), v) for k, v in top_right]
stubs = []
vals = []
for stub, val in top_right:
@@ -471,22 +582,22 @@ def summary(self):
vals.append([val])
table.extend_right(SimpleTable(vals, stubs=stubs))
smry.tables.append(table)
- smry.tables.append(param_table(self, 'Parameter Estimates', pad_bottom=True))
+ smry.tables.append(param_table(self, "Parameter Estimates", pad_bottom=True))
extra_text = []
instruments = self._instruments
if instruments:
endog = self._endog
extra_text = []
- extra_text.append('Endogenous: ' + ', '.join(endog))
- extra_text.append('Instruments: ' + ', '.join(instruments))
+ extra_text.append("Endogenous: " + ", ".join(endog))
+ extra_text.append("Instruments: " + ", ".join(instruments))
- extra_text.append('Covariance Estimator:')
- for line in str(self._cov_estimator).split('\n'):
+ extra_text.append("Covariance Estimator:")
+ for line in str(self._cov_estimator).split("\n"):
extra_text.append(line)
if self._weight_estimator:
- extra_text.append('Weight Estimator:')
- for line in str(self._weight_estimator).split('\n'):
+ extra_text.append("Weight Estimator:")
+ for line in str(self._weight_estimator).split("\n"):
extra_text.append(line)
smry.add_extra_txt(extra_text)
@@ -499,7 +610,7 @@ def f_statistic(self):
Returns
-------
- f : WaldTestStatistic
+ WaldTestStatistic
Test statistic for null all coefficients excluding constant terms
are zero.
@@ -518,17 +629,17 @@ def f_statistic(self):
@property
def resids(self):
"""Estimated residuals"""
- return Series(self._resid.squeeze(), index=self._index, name='resid')
+ return Series(self._resid.squeeze(), index=self._index, name="resid")
@property
def wresids(self):
"""Weighted estimated residuals"""
- return Series(self._wresid.squeeze(), index=self._index, name='wresid')
+ return Series(self._wresid.squeeze(), index=self._index, name="wresid")
@property
def fitted_values(self):
"""Fitted values"""
- return Series(self._fitted.squeeze(), index=self._index, name='fitted_values')
+ return Series(self._fitted.squeeze(), index=self._index, name="fitted_values")
@property
def rsquared_adj(self):
@@ -575,7 +686,7 @@ def j_stat(self):
Returns
-------
- j : WaldTestStatistic
+ WaldTestStatistic
J statistic test of overidentifying restrictions
Notes
diff --git a/linearmodels/tests/asset_pricing/_utility.py b/linearmodels/tests/asset_pricing/_utility.py
index 8856e3cc7c..b13733aa1e 100644
--- a/linearmodels/tests/asset_pricing/_utility.py
+++ b/linearmodels/tests/asset_pricing/_utility.py
@@ -4,8 +4,9 @@
from linearmodels.utility import AttrDict
-def generate_data(nfactor=3, nportfolio=25, nobs=1000, premia=None, output='pandas',
- alpha=False):
+def generate_data(
+ nfactor=3, nportfolio=25, nobs=1000, premia=None, output="pandas", alpha=False
+):
np.random.seed(12345)
if premia is None:
premia = np.arange(1, nfactor + 1) / (10 * nfactor)
@@ -19,16 +20,12 @@ def generate_data(nfactor=3, nportfolio=25, nobs=1000, premia=None, output='pand
portfolios = factors @ betas + idio
if alpha:
portfolios += np.arange(nportfolio)[None, :] / nportfolio / 100
- index = pd.date_range('1930-1-1', periods=nobs, freq='D')
- if output == 'pandas':
- cols = ['factor_{0}'.format(i) for i in range(1, nfactor + 1)]
- factors = pd.DataFrame(factors,
- columns=cols,
- index=index)
- cols = ['port_{0}'.format(i) for i in range(1, nportfolio + 1)]
- portfolios = pd.DataFrame(portfolios,
- columns=cols,
- index=index)
+ index = pd.date_range("1930-1-1", periods=nobs, freq="D")
+ if output == "pandas":
+ cols = ["factor_{0}".format(i) for i in range(1, nfactor + 1)]
+ factors = pd.DataFrame(factors, columns=cols, index=index)
+ cols = ["port_{0}".format(i) for i in range(1, nportfolio + 1)]
+ portfolios = pd.DataFrame(portfolios, columns=cols, index=index)
return AttrDict(factors=factors, portfolios=portfolios)
@@ -36,7 +33,7 @@ def generate_data(nfactor=3, nportfolio=25, nobs=1000, premia=None, output='pand
def get_all(res):
attrs = dir(res)
for attr_name in attrs:
- if attr_name.startswith('_'):
+ if attr_name.startswith("_"):
continue
attr = getattr(res, attr_name)
if callable(attr):
diff --git a/linearmodels/tests/asset_pricing/test_covariance.py b/linearmodels/tests/asset_pricing/test_covariance.py
index a8cc540394..74b49fb21a 100644
--- a/linearmodels/tests/asset_pricing/test_covariance.py
+++ b/linearmodels/tests/asset_pricing/test_covariance.py
@@ -14,17 +14,16 @@ def data():
moments = np.random.randn(500, 10)
jacobian = np.random.rand(10, 8)
jacobian_inv = np.eye(10)
- return AttrDict(moments=moments, jacobian=jacobian,
- inv_jacobian=jacobian_inv)
+ return AttrDict(moments=moments, jacobian=jacobian, inv_jacobian=jacobian_inv)
def test_kernel_errors(data):
with pytest.raises(ValueError):
- KernelWeight(data.moments, kernel='unknown')
+ KernelWeight(data.moments, kernel="unknown")
with pytest.raises(ValueError):
- KernelWeight(data.moments, bandwidth=-.5)
+ KernelWeight(data.moments, bandwidth=-0.5)
with pytest.raises(ValueError):
- KernelCovariance(data.moments, jacobian=data.jacobian, kernel='unknown')
+ KernelCovariance(data.moments, jacobian=data.jacobian, kernel="unknown")
with pytest.raises(ValueError):
KernelCovariance(data.moments, jacobian=data.jacobian, bandwidth=-4)
@@ -33,7 +32,9 @@ def test_no_jacobian(data):
with pytest.raises(ValueError):
KernelCovariance(data.moments)
with pytest.raises(ValueError):
- KernelCovariance(data.moments, jacobian=data.jacobian, inv_jacobian=data.inv_jacobian)
+ KernelCovariance(
+ data.moments, jacobian=data.jacobian, inv_jacobian=data.inv_jacobian
+ )
def test_alt_jacobians(data):
diff --git a/linearmodels/tests/asset_pricing/test_formulas.py b/linearmodels/tests/asset_pricing/test_formulas.py
index 1b56d8bd25..53597b4e94 100644
--- a/linearmodels/tests/asset_pricing/test_formulas.py
+++ b/linearmodels/tests/asset_pricing/test_formulas.py
@@ -9,28 +9,31 @@
TradedFactorModel)
from linearmodels.tests.asset_pricing._utility import generate_data
-FORMULA_FACTORS = 'factor_1 + factor_2 + factor_3'
-FORMULA_PORT = 'port_1 + port_2 + port_3 + port_4 + port_5 + port_6 + port_7 + ' \
- 'port_8 + port_9 + port_10'
-FORMULA = ' ~ '.join((FORMULA_PORT, FORMULA_FACTORS))
+FORMULA_FACTORS = "factor_1 + factor_2 + factor_3"
+FORMULA_PORT = (
+ "port_1 + port_2 + port_3 + port_4 + port_5 + port_6 + port_7 + "
+ "port_8 + port_9 + port_10"
+)
+FORMULA = " ~ ".join((FORMULA_PORT, FORMULA_FACTORS))
-@pytest.fixture(scope='module', params=[TradedFactorModel, LinearFactorModel,
- LinearFactorModelGMM])
+@pytest.fixture(
+ scope="module", params=[TradedFactorModel, LinearFactorModel, LinearFactorModelGMM]
+)
def model(request):
return request.param
-@pytest.fixture(scope='module', params=[LinearFactorModel, LinearFactorModelGMM])
+@pytest.fixture(scope="module", params=[LinearFactorModel, LinearFactorModelGMM])
def non_traded_model(request):
return request.param
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
def data():
- premia = np.array([.1, .1, .1])
- out = generate_data(nportfolio=10, output='pandas', alpha=True, premia=premia)
- out['joined'] = concat([out.factors, out.portfolios], 1)
+ premia = np.array([0.1, 0.1, 0.1])
+ out = generate_data(nportfolio=10, output="pandas", alpha=True, premia=premia)
+ out["joined"] = concat([out.factors, out.portfolios], 1)
return out
@@ -68,8 +71,9 @@ def test_non_traded_risk_free(data, non_traded_model):
assert mod1.formula == FORMULA
assert mod2.formula is None
- mod1 = non_traded_model.from_formula(FORMULA_FACTORS, data.joined,
- portfolios=data.portfolios, risk_free=True)
+ mod1 = non_traded_model.from_formula(
+ FORMULA_FACTORS, data.joined, portfolios=data.portfolios, risk_free=True
+ )
mod2 = non_traded_model(data.portfolios, data.factors, risk_free=True)
res1 = mod1.fit()
res2 = mod2.fit()
diff --git a/linearmodels/tests/asset_pricing/test_linear_factor_gmm.py b/linearmodels/tests/asset_pricing/test_linear_factor_gmm.py
index eef9319eea..59d33204e0 100644
--- a/linearmodels/tests/asset_pricing/test_linear_factor_gmm.py
+++ b/linearmodels/tests/asset_pricing/test_linear_factor_gmm.py
@@ -6,17 +6,19 @@
from linearmodels.tests.asset_pricing._utility import generate_data, get_all
-@pytest.fixture(params=['numpy', 'pandas'])
+@pytest.fixture(params=["numpy", "pandas"])
def data(request):
return generate_data(nportfolio=10, output=request.param)
def test_linear_model_gmm_moments_jacobian(data):
mod = LinearFactorModelGMM(data.portfolios, data.factors)
- res = mod.fit(cov_type='robust', disp=0, debiased=False)
- params = np.r_[res.betas.values.ravel(),
- res.risk_premia.values.ravel(),
- mod.factors.ndarray.mean(0)]
+ res = mod.fit(cov_type="robust", disp=0, debiased=False)
+ params = np.r_[
+ res.betas.values.ravel(),
+ res.risk_premia.values.ravel(),
+ mod.factors.ndarray.mean(0),
+ ]
mod_mom = mod._moments(params[:, None], True)
mom = []
@@ -29,7 +31,7 @@ def test_linear_model_gmm_moments_jacobian(data):
x = f - mu + lam
b = res.betas.values
for i in range(p.shape[1]):
- eps = p[:, i:(i + 1)] - x @ b[[i]].T
+ eps = p[:, i : (i + 1)] - x @ b[[i]].T
for j in range(fc.shape[1]):
mom.append(eps * fc[:, [j]])
mom.append(f - mu)
@@ -39,19 +41,19 @@ def test_linear_model_gmm_moments_jacobian(data):
jac = np.zeros((mom.shape[1], params.shape[0]))
nport, nf = p.shape[1], f.shape[1]
# 1,1
- jac[:(nport * (nf + 1)), :nport * nf] = np.kron(np.eye(nport), fc.T @ x / n)
+ jac[: (nport * (nf + 1)), : nport * nf] = np.kron(np.eye(nport), fc.T @ x / n)
# 1, 2
col = []
for i in range(nport):
col.append(fc.T @ np.ones((n, 1)) @ b[[i]] / n)
col = np.vstack(tuple(col))
- jac[:(nport * (nf + 1)), nport * nf:nport * nf + nf] = col
+ jac[: (nport * (nf + 1)), nport * nf : nport * nf + nf] = col
# 1, 3
col = []
for i in range(nport):
col.append(-fc.T @ np.ones((n, 1)) @ b[[i]] / n)
col = np.vstack(tuple(col))
- jac[:(nport * (nf + 1)), -nf:] = col
+ jac[: (nport * (nf + 1)), -nf:] = col
# 2,2
jac[-nf:, -nf:] = np.eye(nf)
@@ -69,13 +71,13 @@ def test_linear_model_gmm_moments_jacobian(data):
def test_linear_model_gmm_smoke_iterate(data):
mod = LinearFactorModelGMM(data.portfolios, data.factors)
- res = mod.fit(cov_type='robust', disp=5, steps=20)
+ res = mod.fit(cov_type="robust", disp=5, steps=20)
get_all(res)
def test_linear_model_gmm_smoke_risk_free(data):
mod = LinearFactorModelGMM(data.portfolios, data.factors, risk_free=True)
- res = mod.fit(cov_type='robust', disp=10)
+ res = mod.fit(cov_type="robust", disp=10)
get_all(res)
str(res._cov_est)
res._cov_est.__repr__()
@@ -84,7 +86,7 @@ def test_linear_model_gmm_smoke_risk_free(data):
def test_linear_model_gmm_kernel_smoke(data):
mod = LinearFactorModelGMM(data.portfolios, data.factors)
- res = mod.fit(cov_type='kernel', disp=10)
+ res = mod.fit(cov_type="kernel", disp=10)
get_all(res)
str(res._cov_est)
res._cov_est.__repr__()
@@ -93,11 +95,11 @@ def test_linear_model_gmm_kernel_smoke(data):
def test_linear_model_gmm_kernel_bandwidth_smoke(data):
mod = LinearFactorModelGMM(data.portfolios, data.factors)
- res = mod.fit(cov_type='kernel', bandwidth=10, disp=10)
+ res = mod.fit(cov_type="kernel", bandwidth=10, disp=10)
get_all(res)
def test_linear_model_gmm_cue_smoke(data):
mod = LinearFactorModelGMM(data.portfolios, data.factors, risk_free=True)
- res = mod.fit(cov_type='robust', disp=10, use_cue=True)
+ res = mod.fit(cov_type="robust", disp=10, use_cue=True)
get_all(res)
diff --git a/linearmodels/tests/asset_pricing/test_linear_factor_model.py b/linearmodels/tests/asset_pricing/test_linear_factor_model.py
index 1170cb8139..975873bcf7 100644
--- a/linearmodels/tests/asset_pricing/test_linear_factor_model.py
+++ b/linearmodels/tests/asset_pricing/test_linear_factor_model.py
@@ -11,7 +11,7 @@
from linearmodels.tests.asset_pricing._utility import generate_data, get_all
-@pytest.fixture(params=['numpy', 'pandas'])
+@pytest.fixture(params=["numpy", "pandas"])
def data(request):
return generate_data(nportfolio=10, output=request.param)
@@ -64,7 +64,7 @@ def test_linear_model_parameters(data):
block[j, k] = b[i][j] * lam[k - 1]
if j + 1 == k:
block[j, k] -= alphas[i]
- jac[block1:block2, loc:loc + nf + 1] = block
+ jac[block1:block2, loc : loc + nf + 1] = block
loc += nf + 1
# 2, 2
jac[block1:block2, block1:block2] = b.T @ b
@@ -77,14 +77,16 @@ def test_linear_model_parameters(data):
block[row, col] = lam[j - 1]
col += 1
row += 1
- jac[-nport:, :(nport * (nf + 1))] = block
+ jac[-nport:, : (nport * (nf + 1))] = block
# 3, 2
- jac[-nport:, (nport * (nf + 1)):(nport * (nf + 1)) + nf] = b
+ jac[-nport:, (nport * (nf + 1)) : (nport * (nf + 1)) + nf] = b
# 3, 3: already done since eye
mod_jac = mod._jacobian(b, lam, alphas)
assert_allclose(mod_jac[:block1], jac[:block1])
assert_allclose(mod_jac[block1:block2, :block1], jac[block1:block2, :block1])
- assert_allclose(mod_jac[block1:block2, block1:block2], jac[block1:block2, block1:block2])
+ assert_allclose(
+ mod_jac[block1:block2, block1:block2], jac[block1:block2, block1:block2]
+ )
assert_allclose(mod_jac[block1:block2, block2:], jac[block1:block2, block2:])
assert_allclose(mod_jac[block2:], jac[block2:])
@@ -100,14 +102,16 @@ def test_linear_model_parameters(data):
cov = (cov + cov.T) / 2
assert_allclose(cov, res.cov)
- acov = cov[:block1:(nf + 1), :block1:(nf + 1)]
+ acov = cov[: block1 : (nf + 1), : block1 : (nf + 1)]
jstat = float(alphas.T @ np.linalg.pinv(acov) @ alphas)
assert_allclose(res.j_statistic.stat, jstat)
assert_allclose(res.j_statistic.pval, 1 - stats.chi2(nport - nf).cdf(jstat))
get_all(res)
- res = LinearFactorModel(data.portfolios, data.factors).fit(cov_type='kernel', debiased=False)
+ res = LinearFactorModel(data.portfolios, data.factors).fit(
+ cov_type="kernel", debiased=False
+ )
std_mom = moments / moments.std(0)[None, :]
mom = std_mom.sum(1)
bw = kernel_optimal_bandwidth(mom)
@@ -168,7 +172,7 @@ def test_linear_model_parameters_risk_free(data):
block[j, k] = bc[i][j] * lam[k]
if j == k:
block[j, k] -= alphas[i]
- jac[block1:block2, loc:loc + nf + 1] = block
+ jac[block1:block2, loc : loc + nf + 1] = block
loc += nf + 1
# 2, 2
jac[block1:block2, block1:block2] = bc.T @ bc
@@ -181,14 +185,16 @@ def test_linear_model_parameters_risk_free(data):
block[row, col] = lam[j]
col += 1
row += 1
- jac[-nport:, :(nport * (nf + 1))] = block
+ jac[-nport:, : (nport * (nf + 1))] = block
# 3, 2
- jac[-nport:, (nport * (nf + 1)):(nport * (nf + 1)) + nf + 1] = bc
+ jac[-nport:, (nport * (nf + 1)) : (nport * (nf + 1)) + nf + 1] = bc
# 3, 3: already done since eye
mod_jac = mod._jacobian(bc, lam, alphas)
assert_allclose(mod_jac[:block1], jac[:block1])
assert_allclose(mod_jac[block1:block2, :block1], jac[block1:block2, :block1])
- assert_allclose(mod_jac[block1:block2, block1:block2], jac[block1:block2, block1:block2])
+ assert_allclose(
+ mod_jac[block1:block2, block1:block2], jac[block1:block2, block1:block2]
+ )
assert_allclose(mod_jac[block1:block2, block2:], jac[block1:block2, block2:])
assert_allclose(mod_jac[block2:], jac[block2:])
@@ -204,11 +210,13 @@ def test_linear_model_parameters_risk_free(data):
cov = (cov + cov.T) / 2
assert_allclose(cov, res.cov)
- acov = cov[:block1:(nf + 1), :block1:(nf + 1)]
+ acov = cov[: block1 : (nf + 1), : block1 : (nf + 1)]
jstat = float(alphas.T @ np.linalg.pinv(acov) @ alphas)
- assert_allclose(res.cov.values[:block1:(nf + 1), :block1:(nf + 1)], acov)
+ assert_allclose(res.cov.values[: block1 : (nf + 1), : block1 : (nf + 1)], acov)
assert_allclose(res.j_statistic.stat, jstat, rtol=1e-1)
- assert_allclose(res.j_statistic.pval, 1 - stats.chi2(nport - nf - 1).cdf(jstat), rtol=1e-2)
+ assert_allclose(
+ res.j_statistic.pval, 1 - stats.chi2(nport - nf - 1).cdf(jstat), rtol=1e-2
+ )
get_all(res)
@@ -222,7 +230,7 @@ def test_linear_model_parameters_risk_free_gls(data):
sigma_inv = np.linalg.inv(sigma)
mod = LinearFactorModel(data.portfolios, data.factors, risk_free=True, sigma=sigma)
- assert 'using GLS' in str(mod)
+ assert "using GLS" in str(mod)
res = mod.fit()
f = mod.factors.ndarray
p = mod.portfolios.ndarray
@@ -272,7 +280,7 @@ def test_linear_model_parameters_risk_free_gls(data):
block[j, k] = bct[i][j] * lam[k]
if j == k:
block[j, k] -= at[i]
- jac[block1:block2, loc:loc + nf + 1] = block
+ jac[block1:block2, loc : loc + nf + 1] = block
loc += nf + 1
# 2, 2
jac[block1:block2, block1:block2] = bc.T @ sigma_inv @ bc
@@ -285,14 +293,16 @@ def test_linear_model_parameters_risk_free_gls(data):
block[row, col] = lam[j]
col += 1
row += 1
- jac[-nport:, :(nport * (nf + 1))] = block
+ jac[-nport:, : (nport * (nf + 1))] = block
# 3, 2
- jac[-nport:, (nport * (nf + 1)):(nport * (nf + 1)) + nf + 1] = bc
+ jac[-nport:, (nport * (nf + 1)) : (nport * (nf + 1)) + nf + 1] = bc
# 3, 3: already done since eye
mod_jac = mod._jacobian(bc, lam, alphas)
assert_allclose(mod_jac[:block1], jac[:block1])
assert_allclose(mod_jac[block1:block2, :block1], jac[block1:block2, :block1])
- assert_allclose(mod_jac[block1:block2, block1:block2], jac[block1:block2, block1:block2])
+ assert_allclose(
+ mod_jac[block1:block2, block1:block2], jac[block1:block2, block1:block2]
+ )
assert_allclose(mod_jac[block1:block2, block2:], jac[block1:block2, block2:])
assert_allclose(mod_jac[block2:], jac[block2:])
@@ -308,16 +318,18 @@ def test_linear_model_parameters_risk_free_gls(data):
cov = (cov + cov.T) / 2
assert_allclose(cov, res.cov)
- acov = cov[:block1:(nf + 1), :block1:(nf + 1)]
+ acov = cov[: block1 : (nf + 1), : block1 : (nf + 1)]
jstat = float(alphas.T @ np.linalg.pinv(acov) @ alphas)
- assert_allclose(res.cov.values[:block1:(nf + 1), :block1:(nf + 1)], acov)
+ assert_allclose(res.cov.values[: block1 : (nf + 1), : block1 : (nf + 1)], acov)
assert_allclose(res.j_statistic.stat, jstat, rtol=1e-1)
- assert_allclose(res.j_statistic.pval, 1 - stats.chi2(nport - nf - 1).cdf(jstat), rtol=1e-2)
+ assert_allclose(
+ res.j_statistic.pval, 1 - stats.chi2(nport - nf - 1).cdf(jstat), rtol=1e-2
+ )
get_all(res)
-@pytest.mark.parametrize('output', ['numpy', 'pandas'])
+@pytest.mark.parametrize("output", ["numpy", "pandas"])
def test_infeasible(output):
data = generate_data(nfactor=10, nportfolio=20, nobs=10, output=output)
with pytest.raises(ValueError):
diff --git a/linearmodels/tests/asset_pricing/test_model.py b/linearmodels/tests/asset_pricing/test_model.py
index becb322a4f..100dbcd873 100644
--- a/linearmodels/tests/asset_pricing/test_model.py
+++ b/linearmodels/tests/asset_pricing/test_model.py
@@ -12,47 +12,49 @@
from linearmodels.iv.model import _OLS
from linearmodels.tests.asset_pricing._utility import generate_data, get_all
-pytestmark = pytest.mark.filterwarnings('ignore::linearmodels.utility.MissingValueWarning')
+pytestmark = pytest.mark.filterwarnings(
+ "ignore::linearmodels.utility.MissingValueWarning"
+)
-@pytest.fixture(params=['numpy', 'pandas'])
+@pytest.fixture(params=["numpy", "pandas"])
def data(request):
return generate_data(nportfolio=10, output=request.param)
def test_linear_model_gmm_smoke(data):
mod = LinearFactorModelGMM(data.portfolios, data.factors)
- res = mod.fit(cov_type='robust', disp=5)
+ res = mod.fit(cov_type="robust", disp=5)
get_all(res)
def test_linear_model_gmm_smoke_iterate(data):
mod = LinearFactorModelGMM(data.portfolios, data.factors)
- res = mod.fit(cov_type='robust', disp=5, steps=20)
+ res = mod.fit(cov_type="robust", disp=5, steps=20)
get_all(res)
def test_linear_model_gmm_smoke_risk_free(data):
mod = LinearFactorModelGMM(data.portfolios, data.factors, risk_free=True)
- res = mod.fit(cov_type='robust', disp=10)
+ res = mod.fit(cov_type="robust", disp=10)
get_all(res)
def test_linear_model_gmm_kernel_smoke(data):
mod = LinearFactorModelGMM(data.portfolios, data.factors)
- res = mod.fit(cov_type='kernel', disp=10)
+ res = mod.fit(cov_type="kernel", disp=10)
get_all(res)
def test_linear_model_gmm_kernel_bandwidth_smoke(data):
mod = LinearFactorModelGMM(data.portfolios, data.factors)
- res = mod.fit(cov_type='kernel', bandwidth=10, disp=10)
+ res = mod.fit(cov_type="kernel", bandwidth=10, disp=10)
get_all(res)
def test_linear_model_gmm_cue_smoke(data):
mod = LinearFactorModelGMM(data.portfolios, data.factors, risk_free=True)
- res = mod.fit(cov_type='robust', disp=10, use_cue=True)
+ res = mod.fit(cov_type="robust", disp=10, use_cue=True)
get_all(res)
@@ -71,16 +73,18 @@ def test_linear_model_time_series(data):
loc = 0
for i in range(data.portfolios.shape[1]):
if isinstance(data.portfolios, pd.DataFrame):
- p = data.portfolios.iloc[:, i:(i + 1)]
+ p = data.portfolios.iloc[:, i : (i + 1)]
else:
- p = data.portfolios[:, i:(i + 1)]
- ols_res = _OLS(p, factors).fit(cov_type='robust', debiased=True)
+ p = data.portfolios[:, i : (i + 1)]
+ ols_res = _OLS(p, factors).fit(cov_type="robust", debiased=True)
all_params.extend(list(ols_res.params))
all_tstats.extend(list(ols_res.tstats))
- x[:, loc:(loc + nf + 1)] = factors
- e[:, loc:(loc + nf + 1)] = ols_res.resids.values[:, None]
+ x[:, loc : (loc + nf + 1)] = factors
+ e[:, loc : (loc + nf + 1)] = ols_res.resids.values[:, None]
loc += nf + 1
- cov = res.cov.values[(nf + 1) * i:(nf + 1) * (i + 1), (nf + 1) * i:(nf + 1) * (i + 1)]
+ cov = res.cov.values[
+ (nf + 1) * i : (nf + 1) * (i + 1), (nf + 1) * i : (nf + 1) * (i + 1)
+ ]
ols_cov = ols_res.cov.values
assert_allclose(cov, ols_cov)
@@ -102,7 +106,7 @@ def test_linear_model_time_series(data):
assert_allclose(cov, res.cov.values)
alphas = np.array(all_params)[0::nfp1][:, None]
- alpha_cov = cov[0:(nfp1 * nport):nfp1, 0:(nfp1 * nport):nfp1]
+ alpha_cov = cov[0 : (nfp1 * nport) : nfp1, 0 : (nfp1 * nport) : nfp1]
stat_direct = float(alphas.T @ np.linalg.inv(alpha_cov) @ alphas)
assert_allclose(res.j_statistic.stat, stat_direct)
assert_allclose(1.0 - stats.chi2.cdf(stat_direct, nport), res.j_statistic.pval)
@@ -110,13 +114,13 @@ def test_linear_model_time_series(data):
def test_linear_model_time_series_kernel_smoke(data):
mod = TradedFactorModel(data.portfolios, data.factors)
- mod.fit(cov_type='kernel')
+ mod.fit(cov_type="kernel")
def test_linear_model_time_series_error(data):
mod = TradedFactorModel(data.portfolios, data.factors)
with pytest.raises(ValueError):
- mod.fit(cov_type='unknown')
+ mod.fit(cov_type="unknown")
def test_errors(data):
@@ -126,26 +130,26 @@ def test_errors(data):
p2 = p.copy()
p3 = p.copy().iloc[:-1]
p4 = p.copy()
- p5 = p.copy().iloc[:f.shape[1] - 1, :1]
- p4 = p4.iloc[:, :(f.shape[1] - 1)]
- p2['dupe'] = p.iloc[:, 0]
- p['const'] = 1.0
+ p5 = p.copy().iloc[: f.shape[1] - 1, :1]
+ p4 = p4.iloc[:, : (f.shape[1] - 1)]
+ p2["dupe"] = p.iloc[:, 0]
+ p["const"] = 1.0
f5 = f.copy()
- f5 = f5.iloc[:p5.shape[0]]
+ f5 = f5.iloc[: p5.shape[0]]
f2 = f.copy()
- f2['dupe'] = f.iloc[:, 0]
- f['const'] = 1.0
+ f2["dupe"] = f.iloc[:, 0]
+ f["const"] = 1.0
else:
p2 = np.c_[p, p[:, [0]]]
p3 = p.copy()[:-1]
p4 = p.copy()
- p5 = p.copy()[:f.shape[1] - 1, :1]
- p4 = p4[:, :(f.shape[1] - 1)]
+ p5 = p.copy()[: f.shape[1] - 1, :1]
+ p4 = p4[:, : (f.shape[1] - 1)]
p = np.c_[np.ones((p.shape[0], 1)), p]
f5 = f.copy()
- f5 = f5[:p5.shape[0]]
+ f5 = f5[: p5.shape[0]]
f2 = np.c_[f, f[:, [0]]]
f = np.c_[np.ones((f.shape[0], 1)), f]
@@ -187,10 +191,10 @@ def test_drop_missing(data):
def test_unknown_kernel(data):
mod = LinearFactorModel(data.portfolios, data.factors)
with pytest.raises(ValueError):
- mod.fit(cov_type='unknown')
+ mod.fit(cov_type="unknown")
mod = LinearFactorModelGMM(data.portfolios, data.factors)
with pytest.raises(ValueError):
- mod.fit(cov_type='unknown')
+ mod.fit(cov_type="unknown")
def test_all_missing():
@@ -202,13 +206,13 @@ def test_all_missing():
def test_repr(data):
mod = LinearFactorModelGMM(data.portfolios, data.factors)
- assert 'LinearFactorModelGMM' in mod.__repr__()
- assert str(data.portfolios.shape[1]) + ' test portfolios' in mod.__repr__()
- assert str(data.factors.shape[1]) + ' factors' in mod.__repr__()
+ assert "LinearFactorModelGMM" in mod.__repr__()
+ assert str(data.portfolios.shape[1]) + " test portfolios" in mod.__repr__()
+ assert str(data.factors.shape[1]) + " factors" in mod.__repr__()
mod = LinearFactorModel(data.portfolios, data.factors, risk_free=True)
- assert 'LinearFactorModel' in mod.__repr__()
- assert 'Estimated risk-free' in mod.__repr__()
- assert 'True' in mod.__repr__()
+ assert "LinearFactorModel" in mod.__repr__()
+ assert "Estimated risk-free" in mod.__repr__()
+ assert "True" in mod.__repr__()
mod = TradedFactorModel(data.portfolios, data.factors)
- assert 'TradedFactorModel' in mod.__repr__()
+ assert "TradedFactorModel" in mod.__repr__()
assert str(hex(id(mod))) in mod.__repr__()
diff --git a/linearmodels/tests/datasets/test_datasets.py b/linearmodels/tests/datasets/test_datasets.py
index d25011d42d..02a40442fb 100644
--- a/linearmodels/tests/datasets/test_datasets.py
+++ b/linearmodels/tests/datasets/test_datasets.py
@@ -5,9 +5,20 @@
fringe, jobtraining, meps, mroz, munnell,
wage, wage_panel)
-DATASETS = [birthweight, card, fertility, french, fringe,
- jobtraining, meps, mroz, munnell, wage, wage_panel]
-ids = list(map(lambda x: x.__name__.split('.')[-1], DATASETS))
+DATASETS = [
+ birthweight,
+ card,
+ fertility,
+ french,
+ fringe,
+ jobtraining,
+ meps,
+ mroz,
+ munnell,
+ wage,
+ wage_panel,
+]
+ids = list(map(lambda x: x.__name__.split(".")[-1], DATASETS))
@pytest.fixture(params=DATASETS, ids=ids)
diff --git a/linearmodels/tests/iv/_utility.py b/linearmodels/tests/iv/_utility.py
index d75aaccf7b..3083627cc6 100644
--- a/linearmodels/tests/iv/_utility.py
+++ b/linearmodels/tests/iv/_utility.py
@@ -17,11 +17,11 @@ def generate_data(nkp=(1000, 5, 3)):
v = np.random.multivariate_normal(np.zeros(r.shape[0]), r, n)
x = v[:, :k]
- z = v[:, 2:k + p]
+ z = v[:, 2 : k + p]
e = v[:, [-1]]
endog = x[:, :2]
exog = x[:, 2:]
- instr = z[:, k - 2:]
+ instr = z[:, k - 2 :]
params = np.arange(1, k + 1) / k
params = params[:, None]
y = x @ params + e
@@ -37,9 +37,27 @@ def generate_data(nkp=(1000, 5, 3)):
xzizx = x.T @ z @ z.T @ x / nobs
xzizx_inv = np.linalg.inv(xzizx)
- return AttrDict(nobs=nobs, e=e, x=x, y=y, z=z, xhat=xhat,
- params=params, s2=s2, s2_debiased=s2_debiased,
- clusters=clusters, nvar=nvar, v=v, vinv=vinv, vk=vk,
- i=np.eye(k + p - 2), kappa=kappa,
- xzizx=xzizx, xzizx_inv=xzizx_inv,
- dep=dep, exog=exog, endog=endog, instr=instr)
+ return AttrDict(
+ nobs=nobs,
+ e=e,
+ x=x,
+ y=y,
+ z=z,
+ xhat=xhat,
+ params=params,
+ s2=s2,
+ s2_debiased=s2_debiased,
+ clusters=clusters,
+ nvar=nvar,
+ v=v,
+ vinv=vinv,
+ vk=vk,
+ i=np.eye(k + p - 2),
+ kappa=kappa,
+ xzizx=xzizx,
+ xzizx_inv=xzizx_inv,
+ dep=dep,
+ exog=exog,
+ endog=endog,
+ instr=instr,
+ )
diff --git a/linearmodels/tests/iv/results/execute-stata-simulated-data.py b/linearmodels/tests/iv/results/execute-stata-simulated-data.py
index 676638dbb9..928cbec108 100644
--- a/linearmodels/tests/iv/results/execute-stata-simulated-data.py
+++ b/linearmodels/tests/iv/results/execute-stata-simulated-data.py
@@ -3,48 +3,74 @@
from os.path import join
import subprocess
-STATA_PATH = join('C:\\', 'Program Files (x86)', 'Stata13', 'StataMP-64.exe')
+STATA_PATH = join("C:\\", "Program Files (x86)", "Stata13", "StataMP-64.exe")
-dtafile = join(os.getcwd(), 'simulated-data.dta')
+dtafile = join(os.getcwd(), "simulated-data.dta")
start = """
use {dtafile}, clear \n
tsset time \n
-""".format(dtafile=dtafile)
+""".format(
+ dtafile=dtafile
+)
model = r"""
ivregress {method} {depvar} {exog_var} ///
({endog_var} = {instr}) {weight_opt}, {variance_option} {other_option}
"""
-methods = ['2sls', 'liml', 'gmm']
-depvars = ['y_unadjusted', 'y_robust', 'y_clustered', 'y_kernel']
-variance_options = ['vce(unadjusted)', 'vce(robust)', 'vce(cluster cluster_id)',
- 'vce(hac bartlett 12)']
+methods = ["2sls", "liml", "gmm"]
+depvars = ["y_unadjusted", "y_robust", "y_clustered", "y_kernel"]
+variance_options = [
+ "vce(unadjusted)",
+ "vce(robust)",
+ "vce(cluster cluster_id)",
+ "vce(hac bartlett 12)",
+]
depvar_with_var = list(zip(depvars, variance_options))
-exog_vars = ['', 'x3 x4 x5']
-endog_vars = ['x1', 'x1 x2']
-instr = ['z1', 'z1 z2']
-other_options = ['', 'small', 'noconstant', 'small noconstant', 'small center',
- 'center', 'center noconstant', 'small center noconstant']
-weight_options = [' ', ' [aweight=weights] ']
-inputs = [methods, depvar_with_var, exog_vars, endog_vars, instr, other_options, weight_options]
+exog_vars = ["", "x3 x4 x5"]
+endog_vars = ["x1", "x1 x2"]
+instr = ["z1", "z1 z2"]
+other_options = [
+ "",
+ "small",
+ "noconstant",
+ "small noconstant",
+ "small center",
+ "center",
+ "center noconstant",
+ "small center noconstant",
+]
+weight_options = [" ", " [aweight=weights] "]
+inputs = [
+ methods,
+ depvar_with_var,
+ exog_vars,
+ endog_vars,
+ instr,
+ other_options,
+ weight_options,
+]
configs = []
for val in product(*inputs):
method, dvo, exog, endog, instr, other_opt, weight_opt = val
depvar, var_opt = dvo
- if (len(endog) > len(instr)) or (other_opt.find('center') >= 0 and method != 'gmm'):
+ if (len(endog) > len(instr)) or (other_opt.find("center") >= 0 and method != "gmm"):
continue
- if method == 'gmm':
- var_opt = var_opt.replace('vce', 'wmatrix')
-
- configs.append({'method': method,
- 'depvar': depvar,
- 'exog_var': exog,
- 'endog_var': endog,
- 'instr': instr,
- 'variance_option': var_opt,
- 'other_option': other_opt,
- 'weight_opt': weight_opt})
+ if method == "gmm":
+ var_opt = var_opt.replace("vce", "wmatrix")
+
+ configs.append(
+ {
+ "method": method,
+ "depvar": depvar,
+ "exog_var": exog,
+ "endog_var": endog,
+ "instr": instr,
+ "variance_option": var_opt,
+ "other_option": other_opt,
+ "weight_opt": weight_opt,
+ }
+ )
results = """
estout using {outfile}, cells(b(fmt(%13.12g)) t(fmt(%13.12g))) """
@@ -65,8 +91,8 @@
estout matrix(W, fmt(%13.12g)) using {outfile}, append
"""
-m = '{method}-num_endog_{num_endog}-num_exog_{num_exog}-num_instr_{num_instr}'
-m = m + '-weighted_{weighted}-{variance}-{other}'
+m = "{method}-num_endog_{num_endog}-num_exog_{num_exog}-num_instr_{num_instr}"
+m = m + "-weighted_{weighted}-{variance}-{other}"
section_header = """
file open myfile using {outfile}, write append
file write myfile _n _n "########## !"""
@@ -75,45 +101,47 @@
file close myfile
"""
-outfile = os.path.join(os.getcwd(), 'stata-iv-simulated-results.txt')
+outfile = os.path.join(os.getcwd(), "stata-iv-simulated-results.txt")
if os.path.exists(outfile):
os.unlink(outfile)
def count_vars(v):
- if v.strip() == '':
+ if v.strip() == "":
return 0
v = v.strip()
- while ' ' in v:
- v = v.replace(' ', ' ')
- return len(v.split(' '))
+ while " " in v:
+ v = v.replace(" ", " ")
+ return len(v.split(" "))
-with open('simulated-results.do', 'w') as stata:
+with open("simulated-results.do", "w") as stata:
stata.write(start)
for config in configs:
- sec_header = {'method': config['method'],
- 'num_endog': count_vars(config['endog_var']),
- 'num_exog': count_vars(config['exog_var']),
- 'num_instr': count_vars(config['instr']),
- 'variance': config['variance_option'],
- 'other': config['other_option'].replace(' ', '_'),
- 'outfile': outfile,
- 'weighted': 'aweight' in config['weight_opt']}
+ sec_header = {
+ "method": config["method"],
+ "num_endog": count_vars(config["endog_var"]),
+ "num_exog": count_vars(config["exog_var"]),
+ "num_instr": count_vars(config["instr"]),
+ "variance": config["variance_option"],
+ "other": config["other_option"].replace(" ", "_"),
+ "outfile": outfile,
+ "weighted": "aweight" in config["weight_opt"],
+ }
stata.write(section_header.format(**sec_header))
stata.write(model.format(**config))
- small = config['other_option'].find('small') >= 0
- extra = ' J ' if config['method'] == 'gmm' else ' kappa '
- extra += ' F p ' if small else ' chi2 p '
+ small = config["other_option"].find("small") >= 0
+ extra = " J " if config["method"] == "gmm" else " kappa "
+ extra += " F p " if small else " chi2 p "
stata.write(results.format(outfile=outfile, extra=extra))
- if config['method'] == 'gmm':
+ if config["method"] == "gmm":
stata.write(gmm_extra.format(outfile=outfile))
- stata.write('\n')
+ stata.write("\n")
-do_file = join(os.getcwd(), 'simulated-results.do')
-cmd = [STATA_PATH, '/e', 'do', do_file]
-print(' '.join(cmd))
+do_file = join(os.getcwd(), "simulated-results.do")
+cmd = [STATA_PATH, "/e", "do", do_file]
+print(" ".join(cmd))
subprocess.call(cmd)
diff --git a/linearmodels/tests/iv/results/execute-stata.py b/linearmodels/tests/iv/results/execute-stata.py
index cd2d39eafe..2b01f1e340 100644
--- a/linearmodels/tests/iv/results/execute-stata.py
+++ b/linearmodels/tests/iv/results/execute-stata.py
@@ -2,7 +2,7 @@
from os.path import join
import subprocess
-STATA_PATH = join('C:\\', 'Program Files (x86)', 'Stata13', 'StataMP-64.exe')
+STATA_PATH = join("C:\\", "Program Files (x86)", "Stata13", "StataMP-64.exe")
start = """
use http://www.stata-press.com/data/r13/hsng, clear \n
@@ -33,36 +33,40 @@
file close myfile
"""
-methods = ['2sls', 'liml', 'gmm']
-outfile = os.path.join(os.getcwd(), 'stata-iv-housing-results.txt')
+methods = ["2sls", "liml", "gmm"]
+outfile = os.path.join(os.getcwd(), "stata-iv-housing-results.txt")
if os.path.exists(outfile):
os.unlink(outfile)
-variance_options = [', vce(unadjusted)', ', vce(robust)', ', vce(cluster division)']
-descr = ['unadjusted', 'robust', 'cluster']
+variance_options = [", vce(unadjusted)", ", vce(robust)", ", vce(cluster division)"]
+descr = ["unadjusted", "robust", "cluster"]
-with open('temp.do', 'w') as stata:
+with open("temp.do", "w") as stata:
stata.write(start)
for small in (True, False):
for method in methods:
for vo, desc in zip(variance_options, descr):
- small_text = 'small' if small else 'asymptotic'
- stata.write(section_header.format(outfile=outfile, method=method, desc=desc,
- small=small_text))
- desc += '-small' if small else ''
- vo += ' small' if small else ''
+ small_text = "small" if small else "asymptotic"
+ stata.write(
+ section_header.format(
+ outfile=outfile, method=method, desc=desc, small=small_text
+ )
+ )
+ desc += "-small" if small else ""
+ vo += " small" if small else ""
of = outfile.format(method=method, descr=desc)
- extra = ' J ' if method == 'gmm' else ' kappa '
- extra += ' F p ' if small else ' chi2 p '
- cmd = iv_tempplate.format(outfile=of, variance_option=vo, method=method,
- extra=extra)
- if 'gmm' in method:
- cmd = cmd.replace('vce', 'wmatrix')
+ extra = " J " if method == "gmm" else " kappa "
+ extra += " F p " if small else " chi2 p "
+ cmd = iv_tempplate.format(
+ outfile=of, variance_option=vo, method=method, extra=extra
+ )
+ if "gmm" in method:
+ cmd = cmd.replace("vce", "wmatrix")
stata.write(cmd)
- if 'gmm' in method:
+ if "gmm" in method:
stata.write(gmm_extra.format(outfile=of))
- stata.write('\n')
+ stata.write("\n")
-do_file = join(os.getcwd(), 'temp.do')
-stata_cmd = [STATA_PATH, '/e', 'do', do_file]
-print(' '.join(stata_cmd))
+do_file = join(os.getcwd(), "temp.do")
+stata_cmd = [STATA_PATH, "/e", "do", do_file]
+print(" ".join(stata_cmd))
subprocess.call(stata_cmd)
diff --git a/linearmodels/tests/iv/results/read_stata_results.py b/linearmodels/tests/iv/results/read_stata_results.py
index 535ee7c95a..d7572e0b7e 100644
--- a/linearmodels/tests/iv/results/read_stata_results.py
+++ b/linearmodels/tests/iv/results/read_stata_results.py
@@ -9,32 +9,32 @@ def repl_const(df):
index = list(df.index)
replace_cols = list(df.columns) == index
for i, v in enumerate(index):
- if v == '_cons':
- index[i] = 'const'
+ if v == "_cons":
+ index[i] = "const"
df.index = index
if replace_cols:
df.columns = index
for c in df:
- df[c] = pd.to_numeric(df[c], errors='coerce')
+ df[c] = pd.to_numeric(df[c], errors="coerce")
return df
def parse_file(name):
blocks = defaultdict(list)
- current_key = ''
- with open(name, 'r') as stata:
+ current_key = ""
+ with open(name, "r") as stata:
for line in stata:
- if line.strip() == '':
+ if line.strip() == "":
continue
- if line.startswith('###'):
- current_key = line.split('!')[1]
+ if line.startswith("###"):
+ current_key = line.split("!")[1]
continue
blocks[current_key].append(line)
return blocks
def parse_block(block):
- block = [l.strip().split('\t') for l in block]
+ block = [l.strip().split("\t") for l in block]
params = []
cov = []
weight_mat = []
@@ -44,33 +44,33 @@ def parse_block(block):
if len(line) == 2:
params.append(line)
elif len(line) == 1:
- if line[0].startswith('***'):
+ if line[0].startswith("***"):
break
try:
float(line[0])
params[-1].append(line[0])
except ValueError:
pass
- params = pd.DataFrame(params, columns=['variable', 'params', 'tstats'])
- params = repl_const(params.set_index('variable'))
- stats = params.loc[params.tstats.isnull(), 'params']
+ params = pd.DataFrame(params, columns=["variable", "params", "tstats"])
+ params = repl_const(params.set_index("variable"))
+ stats = params.loc[params.tstats.isnull(), "params"]
params = params.loc[params.tstats.notnull()]
- for line in block[last + 2:]:
- if len(line) == 1 and line[0].startswith('***'):
+ for line in block[last + 2 :]:
+ if len(line) == 1 and line[0].startswith("***"):
break
cov.append(line)
- cov[0].insert(0, 'variable')
+ cov[0].insert(0, "variable")
last += i + 2
cov = pd.DataFrame(cov[1:], columns=cov[0])
- cov = repl_const(cov.set_index('variable'))
+ cov = repl_const(cov.set_index("variable"))
if len(block) > (last + 1):
- weight_mat = block[last + 2:]
- weight_mat[0].insert(0, 'variable')
+ weight_mat = block[last + 2 :]
+ weight_mat[0].insert(0, "variable")
weight_mat = pd.DataFrame(weight_mat[1:], columns=weight_mat[0])
- weight_mat = repl_const(weight_mat.set_index('variable'))
+ weight_mat = repl_const(weight_mat.set_index("variable"))
return AttrDict(params=params, cov=cov, weight_mat=weight_mat, stats=stats)
@@ -78,21 +78,28 @@ def parse_block(block):
def finalize(params, stats, cov, weight_mat):
tstats = params.tstats
params = params.params
- out = AttrDict(params=params, tstats=tstats, stats=stats, cov=cov, weight_mat=weight_mat)
+ out = AttrDict(
+ params=params, tstats=tstats, stats=stats, cov=cov, weight_mat=weight_mat
+ )
for key in stats.index:
out[key] = stats[key]
- fixes = {'model_ss': 'mss', 'resid_ss': 'rss', 'rsquared': 'r2', 'rsquared_adj': 'r2_a'}
+ fixes = {
+ "model_ss": "mss",
+ "resid_ss": "rss",
+ "rsquared": "r2",
+ "rsquared_adj": "r2_a",
+ }
for key in fixes:
if fixes[key] in out:
out[key] = out[fixes[key]]
else:
out[key] = None
- if 'chi2' in out:
- out['f_statistic'] = out['chi2']
- elif 'F' in out:
- out['f_statistic'] = out['F']
+ if "chi2" in out:
+ out["f_statistic"] = out["chi2"]
+ elif "F" in out:
+ out["f_statistic"] = out["F"]
else:
- out['f_statistic'] = None
+ out["f_statistic"] = None
return out
@@ -105,10 +112,10 @@ def process_results(filename):
return blocks
-if __name__ == '__main__':
+if __name__ == "__main__":
import os
- blocks = parse_file(os.path.join(os.getcwd(), 'stata-iv-simulated-results.txt'))
+ blocks = parse_file(os.path.join(os.getcwd(), "stata-iv-simulated-results.txt"))
for key in blocks:
out = parse_block(blocks[key])
- finalize(out['params'], out['stats'], out['cov'], out['weight_mat']).keys()
+ finalize(out["params"], out["stats"], out["cov"], out["weight_mat"]).keys()
diff --git a/linearmodels/tests/iv/results/simulated-test-data.py b/linearmodels/tests/iv/results/simulated-test-data.py
index 729f5727da..e5eea4d5c8 100644
--- a/linearmodels/tests/iv/results/simulated-test-data.py
+++ b/linearmodels/tests/iv/results/simulated-test-data.py
@@ -24,13 +24,13 @@
k, p, n = 5, 2, 600
r = np.empty((k + p + 1, k + p + 1))
r[:, :] = 0.5
-r[p:k + p, -1] = r[-1, p:k + 1 + p] = 0
+r[p : k + p, -1] = r[-1, p : k + 1 + p] = 0
r[-1, -1] = 0.5
r += 0.5 * np.eye(k + p + 1)
w = multivariate_normal(np.zeros(k + p + 1), r, n)
x = w[:, :k]
-z = w[:, k:k + p]
+z = w[:, k : k + p]
e = w[:, -1]
x = add_constant(x)
beta = np.arange(k + 1) / k
@@ -52,7 +52,7 @@
r += 0.5 * np.eye(cluster_size)
rsqrt = np.linalg.cholesky(r)
for i in range(0, len(r), 5):
- e[i:i + 5] = (rsqrt @ e[i:i + 5][:, None]).squeeze()
+ e[i : i + 5] = (rsqrt @ e[i : i + 5][:, None]).squeeze()
e_cluster = e
clusters = np.tile(np.arange(n // 5)[None, :], (5, 1)).T.ravel()
@@ -70,10 +70,43 @@
weights = weights / weights.mean()
time = np.arange(n)
-data = np.c_[time, y_unadjusted, y_robust, y_clustered, y_kernel, x, z, e_homo, e_hetero,
- e_cluster, e_autoc, clusters, weights]
-data = pd.DataFrame(data, columns=['time', 'y_unadjusted', 'y_robust', 'y_clustered',
- 'y_kernel', '_cons', 'x1', 'x2', 'x3',
- 'x4', 'x5', 'z1', 'z2', 'e_homo', 'e_hetero', 'e_cluster',
- 'e_autoc', 'cluster_id', 'weights'])
-data.to_stata('simulated-data.dta')
+data = np.c_[
+ time,
+ y_unadjusted,
+ y_robust,
+ y_clustered,
+ y_kernel,
+ x,
+ z,
+ e_homo,
+ e_hetero,
+ e_cluster,
+ e_autoc,
+ clusters,
+ weights,
+]
+data = pd.DataFrame(
+ data,
+ columns=[
+ "time",
+ "y_unadjusted",
+ "y_robust",
+ "y_clustered",
+ "y_kernel",
+ "_cons",
+ "x1",
+ "x2",
+ "x3",
+ "x4",
+ "x5",
+ "z1",
+ "z2",
+ "e_homo",
+ "e_hetero",
+ "e_cluster",
+ "e_autoc",
+ "cluster_id",
+ "weights",
+ ],
+)
+data.to_stata("simulated-data.dta")
diff --git a/linearmodels/tests/iv/test_absorbing.py b/linearmodels/tests/iv/test_absorbing.py
index 2af481a4bc..a720c03bc3 100644
--- a/linearmodels/tests/iv/test_absorbing.py
+++ b/linearmodels/tests/iv/test_absorbing.py
@@ -23,7 +23,9 @@
from linearmodels.utility import AttrDict, MissingValueWarning
NOBS = 100
-pytestmark = pytest.mark.filterwarnings('ignore:the matrix subclass:PendingDeprecationWarning')
+pytestmark = pytest.mark.filterwarnings(
+ "ignore:the matrix subclass:PendingDeprecationWarning"
+)
class Hasher(object):
@@ -31,9 +33,11 @@ class Hasher(object):
def hash_func(self):
try:
import xxhash
+
return xxhash.xxh64()
except ImportError:
import hashlib
+
return hashlib.sha1()
def single(self, value):
@@ -45,7 +49,7 @@ def single(self, value):
hasher = Hasher()
-@pytest.fixture(scope='function')
+@pytest.fixture(scope="function")
def rs(request):
return np.random.RandomState(12345678)
@@ -64,21 +68,26 @@ def random_cont(size, rs=None):
return pd.DataFrame(series)
-@pytest.fixture(scope='module', params=[1, 2, 3])
+@pytest.fixture(scope="module", params=[1, 2, 3])
def cat(request):
rs = np.random.RandomState(0)
return pd.DataFrame(
- {str(i): random_cat(4, NOBS, rs=rs) for i in range(request.param)})
+ {str(i): random_cat(4, NOBS, rs=rs) for i in range(request.param)}
+ )
-@pytest.fixture(scope='module', params=[1, 2])
+@pytest.fixture(scope="module", params=[1, 2])
def cont(request):
rs = np.random.RandomState(0)
return pd.DataFrame(
- {'cont' + str(i): pd.Series(rs.standard_normal(NOBS)) for i in range(request.param)})
+ {
+ "cont" + str(i): pd.Series(rs.standard_normal(NOBS))
+ for i in range(request.param)
+ }
+ )
-@pytest.fixture(scope='module', params=[True, False])
+@pytest.fixture(scope="module", params=[True, False])
def weights(request):
if not request.param:
return None
@@ -86,7 +95,7 @@ def weights(request):
return rs.chisquare(10, NOBS) / 10.0
-@pytest.fixture(scope='module', params=[0, 1, 2])
+@pytest.fixture(scope="module", params=[0, 1, 2])
def interact(request):
if not request.param:
return None
@@ -99,8 +108,18 @@ def interact(request):
return interactions
-def generate_data(k=3, const=True, nfactors=1, factor_density=10, nobs=2000, cont_interactions=1,
- format='interaction', singleton_interaction=False, weighted=False, ncont=0):
+def generate_data(
+ k=3,
+ const=True,
+ nfactors=1,
+ factor_density=10,
+ nobs=2000,
+ cont_interactions=1,
+ format="interaction",
+ singleton_interaction=False,
+ weighted=False,
+ ncont=0,
+):
rs = np.random.RandomState(1234567890)
density = [factor_density] * max(nfactors, cont_interactions)
x = rs.standard_normal((nobs, k))
@@ -122,9 +141,11 @@ def generate_data(k=3, const=True, nfactors=1, factor_density=10, nobs=2000, con
if factors:
factors = pd.concat(factors, 1)
- if format == 'interaction':
+ if format == "interaction":
if nfactors and ncont:
- factors = Interaction(factors.iloc[:, :nfactors], factors.iloc[:, nfactors:])
+ factors = Interaction(
+ factors.iloc[:, :nfactors], factors.iloc[:, nfactors:]
+ )
elif nfactors:
factors = Interaction(factors, None)
else:
@@ -138,10 +159,12 @@ def generate_data(k=3, const=True, nfactors=1, factor_density=10, nobs=2000, con
fact = rs.randint(ncat, size=(nobs))
effects = rs.standard_normal(nobs)
y += effects
- df = pd.DataFrame(pd.Series(pd.Categorical(fact)), columns=['fact{0}'.format(i)])
- df_eff = pd.DataFrame(effects[:, None], columns=['effect_{0}'.format(i)])
+ df = pd.DataFrame(
+ pd.Series(pd.Categorical(fact)), columns=["fact{0}".format(i)]
+ )
+ df_eff = pd.DataFrame(effects[:, None], columns=["effect_{0}".format(i)])
interactions.append(Interaction(df, df_eff))
- if format == 'pandas':
+ if format == "pandas":
for i, interact in enumerate(interactions):
interactions[i] = pd.concat([interact.cat, interact.cont], 1)
interactions = interactions if interactions else None
@@ -152,7 +175,9 @@ def generate_data(k=3, const=True, nfactors=1, factor_density=10, nobs=2000, con
else:
weights = None
- return AttrDict(y=y, x=x, absorb=factors, interactions=interactions, weights=weights)
+ return AttrDict(
+ y=y, x=x, absorb=factors, interactions=interactions, weights=weights
+ )
# Permutations, k in (0,3), const in (True,False), factors=(0,1,2), interactions in (0,1)
@@ -160,55 +185,66 @@ def generate_data(k=3, const=True, nfactors=1, factor_density=10, nobs=2000, con
# k=3, const=True, nfactors=1, factor_density=10, nobs=2000, cont_interactions=1,
# format='interaction', singleton_interaction=False
-configs = product([0, 3], # k
- [False, True], # constant
- [1, 2, 0], # factors
- [10], # density
- [2000], # nobs
- [0, 1], # cont interactions
- ['interaction', 'pandas'], # format
- [False, True], # singleton
- [False, True], # weighted
- [0, 1] # ncont
- )
+configs = product(
+ [0, 3], # k
+ [False, True], # constant
+ [1, 2, 0], # factors
+ [10], # density
+ [2000], # nobs
+ [0, 1], # cont interactions
+ ["interaction", "pandas"], # format
+ [False, True], # singleton
+ [False, True], # weighted
+ [0, 1], # ncont
+)
configs = [c for c in configs if (c[2] or c[5] or c[9])]
-id_str = 'k: {0}, const: {1}, nfactors: {2}, density: {3}, nobs: {4}, ' \
- 'cont_interacts: {5}, format:{6}, singleton:{7}, weighted: {8}, ncont: {9}'
+id_str = (
+ "k: {0}, const: {1}, nfactors: {2}, density: {3}, nobs: {4}, "
+ "cont_interacts: {5}, format:{6}, singleton:{7}, weighted: {8}, ncont: {9}"
+)
ids = [id_str.format(*config) for config in configs]
-@pytest.fixture(scope='module', params=configs, ids=ids)
+@pytest.fixture(scope="module", params=configs, ids=ids)
def data(request):
return generate_data(*request.param)
-configs_ols = product([0, 3], # k
- [False, True], # constant
- [1, 2, 0], # factors
- [50], # density
- [500], # nobs
- [0, 1], # cont interactions
- ['interaction'], # format
- [False], # singleton
- [False, True], # weighted
- [0, 1] # ncont
- )
+configs_ols = product(
+ [0, 3], # k
+ [False, True], # constant
+ [1, 2, 0], # factors
+ [50], # density
+ [500], # nobs
+ [0, 1], # cont interactions
+ ["interaction"], # format
+ [False], # singleton
+ [False, True], # weighted
+ [0, 1], # ncont
+)
configs_ols = [c for c in configs_ols if (c[0] or c[1])]
-id_str = 'k: {0}, const: {1}, nfactors: {2}, density: {3}, nobs: {4}, ' \
- 'cont_interacts: {5}, format:{6}, singleton:{7}, weighted: {8}, ncont: {9}'
+id_str = (
+ "k: {0}, const: {1}, nfactors: {2}, density: {3}, nobs: {4}, "
+ "cont_interacts: {5}, format:{6}, singleton:{7}, weighted: {8}, ncont: {9}"
+)
ids_ols = [id_str.format(*config) for config in configs_ols]
-@pytest.fixture(scope='module', params=configs_ols, ids=ids_ols)
+@pytest.fixture(scope="module", params=configs_ols, ids=ids_ols)
def ols_data(request):
return generate_data(*request.param)
def test_smoke(data):
- mod = AbsorbingLS(data.y, data.x, absorb=data.absorb, interactions=data.interactions,
- weights=data.weights)
+ mod = AbsorbingLS(
+ data.y,
+ data.x,
+ absorb=data.absorb,
+ interactions=data.interactions,
+ weights=data.weights,
+ )
res = mod.fit()
assert isinstance(res.summary, Summary)
assert isinstance(str(res.summary), str)
@@ -216,29 +252,44 @@ def test_smoke(data):
def test_absorbing_exceptions(rs):
with pytest.raises(TypeError):
- AbsorbingLS(rs.standard_normal(NOBS), rs.standard_normal((NOBS, 2)),
- absorb=rs.standard_normal((NOBS, 2)))
+ AbsorbingLS(
+ rs.standard_normal(NOBS),
+ rs.standard_normal((NOBS, 2)),
+ absorb=rs.standard_normal((NOBS, 2)),
+ )
with pytest.raises(ValueError):
AbsorbingLS(rs.standard_normal(NOBS), rs.standard_normal((NOBS - 1, 2)))
with pytest.raises(ValueError):
- AbsorbingLS(rs.standard_normal(NOBS), rs.standard_normal((NOBS, 2)),
- absorb=pd.DataFrame(rs.standard_normal((NOBS - 1, 1))))
+ AbsorbingLS(
+ rs.standard_normal(NOBS),
+ rs.standard_normal((NOBS, 2)),
+ absorb=pd.DataFrame(rs.standard_normal((NOBS - 1, 1))),
+ )
with pytest.raises(ValueError):
- AbsorbingLS(rs.standard_normal(NOBS), rs.standard_normal((NOBS, 2)),
- interactions=random_cat(10, NOBS - 1, frame=True, rs=rs))
- mod = AbsorbingLS(rs.standard_normal(NOBS), rs.standard_normal((NOBS, 2)),
- interactions=random_cat(10, NOBS, frame=True, rs=rs))
+ AbsorbingLS(
+ rs.standard_normal(NOBS),
+ rs.standard_normal((NOBS, 2)),
+ interactions=random_cat(10, NOBS - 1, frame=True, rs=rs),
+ )
+ mod = AbsorbingLS(
+ rs.standard_normal(NOBS),
+ rs.standard_normal((NOBS, 2)),
+ interactions=random_cat(10, NOBS, frame=True, rs=rs),
+ )
with pytest.raises(RuntimeError):
mod.absorbed_dependent
with pytest.raises(RuntimeError):
mod.absorbed_exog
with pytest.raises(TypeError):
- AbsorbingLS(rs.standard_normal(NOBS), rs.standard_normal((NOBS, 2)),
- interactions=rs.randint(0, 10, size=(NOBS, 2)))
+ AbsorbingLS(
+ rs.standard_normal(NOBS),
+ rs.standard_normal((NOBS, 2)),
+ interactions=rs.randint(0, 10, size=(NOBS, 2)),
+ )
def test_clear_cache():
- _VARIABLE_CACHE['key'] = 'value'
+ _VARIABLE_CACHE["key"] = "value"
clear_cache()
assert len(_VARIABLE_CACHE) == 0
@@ -248,15 +299,15 @@ def test_category_product(cat):
if cat.shape[1] == 1:
assert_series_equal(prod, cat.iloc[:, 0], check_names=False)
else:
- alt = cat.iloc[:, 0].astype('int64')
+ alt = cat.iloc[:, 0].astype("int64")
for i in range(1, cat.shape[1]):
- alt += 10 ** (4 * i) * cat.iloc[:, i].astype('int64')
+ alt += 10 ** (4 * i) * cat.iloc[:, i].astype("int64")
alt = pd.Categorical(alt)
alt = pd.Series(alt)
- df = pd.DataFrame([prod.cat.codes, alt.cat.codes], index=['cat_prod', 'alt']).T
- g = df.groupby('cat_prod').alt
+ df = pd.DataFrame([prod.cat.codes, alt.cat.codes], index=["cat_prod", "alt"]).T
+ g = df.groupby("cat_prod").alt
assert (g.nunique() == 1).all()
- g = df.groupby('alt').cat_prod
+ g = df.groupby("alt").cat_prod
assert (g.nunique() == 1).all()
@@ -291,7 +342,7 @@ def test_category_interaction():
def test_category_continuous_interaction():
c = pd.Series(pd.Categorical([0, 0, 0, 1, 1, 1]))
- v = pd.Series(np.arange(6.))
+ v = pd.Series(np.arange(6.0))
actual = category_continuous_interaction(c, v, precondition=False)
expected = np.zeros((6, 2))
expected[:3, 0] = v[:3]
@@ -307,7 +358,7 @@ def test_category_continuous_interaction():
def test_category_continuous_interaction_interwoven():
c = pd.Series(pd.Categorical([0, 1, 0, 1, 0, 1]))
- v = pd.Series(np.arange(6.))
+ v = pd.Series(np.arange(6.0))
actual = category_continuous_interaction(c, v, precondition=False)
expected = np.zeros((6, 2))
expected[::2, 0] = v[::2]
@@ -378,7 +429,9 @@ def test_interaction_cat_cont_convert(cat, cont):
def test_absorbing_regressors(cat, cont, interact, weights):
- areg = AbsorbingRegressor(cat=cat, cont=cont, interactions=interact, weights=weights)
+ areg = AbsorbingRegressor(
+ cat=cat, cont=cont, interactions=interact, weights=weights
+ )
rank = areg.approx_rank
expected_rank = 0
@@ -393,9 +446,9 @@ def test_absorbing_regressors(cat, cont, interact, weights):
interact_mat = inter.sparse
expected_rank += interact_mat.shape[1]
expected.append(interact_mat)
- expected = sp.hstack(expected, format='csc')
+ expected = sp.hstack(expected, format="csc")
if weights is not None:
- expected = (sp.diags(np.sqrt(weights)).dot(expected)).asformat('csc')
+ expected = (sp.diags(np.sqrt(weights)).dot(expected)).asformat("csc")
actual = areg.regressors
assert expected.shape == actual.shape
assert_array_equal(expected.indptr, actual.indptr)
@@ -405,7 +458,9 @@ def test_absorbing_regressors(cat, cont, interact, weights):
def test_absorbing_regressors_hash(cat, cont, interact, weights):
- areg = AbsorbingRegressor(cat=cat, cont=cont, interactions=interact, weights=weights)
+ areg = AbsorbingRegressor(
+ cat=cat, cont=cont, interactions=interact, weights=weights
+ )
# Build hash
hashes = []
for col in cat:
@@ -429,8 +484,13 @@ def test_empty_absorbing_regressor():
def test_against_ols(ols_data):
- mod = AbsorbingLS(ols_data.y, ols_data.x, absorb=ols_data.absorb,
- interactions=ols_data.interactions, weights=ols_data.weights)
+ mod = AbsorbingLS(
+ ols_data.y,
+ ols_data.x,
+ absorb=ols_data.absorb,
+ interactions=ols_data.interactions,
+ weights=ols_data.weights,
+ )
res = mod.fit()
absorb = []
has_dummy = False
@@ -451,7 +511,7 @@ def test_against_ols(ols_data):
else:
root_w = np.sqrt(mod.weights.ndarray)
wabsorb = annihilate(root_w * absorb, root_w)
- absorb = (1. / root_w) * wabsorb
+ absorb = (1.0 / root_w) * wabsorb
rank = np.linalg.matrix_rank(absorb)
if rank < absorb.shape[1]:
a, b = np.linalg.eig(absorb.T @ absorb)
@@ -467,9 +527,11 @@ def test_against_ols(ols_data):
def test_cache():
- gen = generate_data(2, True, 2, format='pandas', ncont=0, cont_interactions=1)
+ gen = generate_data(2, True, 2, format="pandas", ncont=0, cont_interactions=1)
first = len(_VARIABLE_CACHE)
- mod = AbsorbingLS(gen.y, gen.x, absorb=gen.absorb.iloc[:, :1], interactions=gen.interactions)
+ mod = AbsorbingLS(
+ gen.y, gen.x, absorb=gen.absorb.iloc[:, :1], interactions=gen.interactions
+ )
mod.fit()
second = len(_VARIABLE_CACHE)
mod = AbsorbingLS(gen.y, gen.x, absorb=gen.absorb, interactions=gen.interactions)
@@ -484,18 +546,27 @@ def test_cache():
def test_instrments():
- gen = generate_data(2, True, 2, format='pandas', ncont=0, cont_interactions=1)
- mod = AbsorbingLS(gen.y, gen.x, absorb=gen.absorb.iloc[:, :1], interactions=gen.interactions)
+ gen = generate_data(2, True, 2, format="pandas", ncont=0, cont_interactions=1)
+ mod = AbsorbingLS(
+ gen.y, gen.x, absorb=gen.absorb.iloc[:, :1], interactions=gen.interactions
+ )
assert mod.instruments.shape[1] == 0
def assert_results_equal(o_res: OLSResults, a_res: AbsorbingLSResults, k: int = None):
if k is None:
k = a_res.params.shape[0]
- attrs = [v for v in dir(o_res) if not v.startswith('_')]
- callables = ['conf_int']
- skip = ['summary', 'test_linear_constraint', 'predict', 'model', 'f_statistic', 'wald_test',
- 'method']
+ attrs = [v for v in dir(o_res) if not v.startswith("_")]
+ callables = ["conf_int"]
+ skip = [
+ "summary",
+ "test_linear_constraint",
+ "predict",
+ "model",
+ "f_statistic",
+ "wald_test",
+ "method",
+ ]
for attr in attrs:
if attr in skip:
continue
@@ -507,9 +578,9 @@ def assert_results_equal(o_res: OLSResults, a_res: AbsorbingLSResults, k: int =
if isinstance(left, np.ndarray):
raise NotImplementedError
elif isinstance(left, pd.DataFrame):
- if attr == 'conf_int':
+ if attr == "conf_int":
left = left.iloc[:k]
- elif attr == 'cov':
+ elif attr == "cov":
left = left.iloc[:k, :k]
assert_allclose(left, right, rtol=2e-4, atol=1e-6)
elif isinstance(left, pd.Series):
@@ -526,22 +597,22 @@ def assert_results_equal(o_res: OLSResults, a_res: AbsorbingLSResults, k: int =
def test_center_cov_arg():
- gen = generate_data(2, True, 2, format='pandas', ncont=0, cont_interactions=1)
+ gen = generate_data(2, True, 2, format="pandas", ncont=0, cont_interactions=1)
mod = AbsorbingLS(gen.y, gen.x, absorb=gen.absorb, interactions=gen.interactions)
res = mod.fit(center=True)
- assert 'center' not in res.cov_config
+ assert "center" not in res.cov_config
def test_drop_missing():
- gen = generate_data(2, True, 2, format='pandas', ncont=0, cont_interactions=1)
+ gen = generate_data(2, True, 2, format="pandas", ncont=0, cont_interactions=1)
gen.y[::53] = np.nan
gen.x[::79] = np.nan
with pytest.warns(MissingValueWarning):
AbsorbingLS(gen.y, gen.x, absorb=gen.absorb, interactions=gen.interactions)
- gen = generate_data(2, True, 2, format='pandas', ncont=0, cont_interactions=1)
+ gen = generate_data(2, True, 2, format="pandas", ncont=0, cont_interactions=1)
for col in gen.absorb:
- gen.absorb[col] = gen.absorb[col].astype('int64').astype('object')
+ gen.absorb[col] = gen.absorb[col].astype("int64").astype("object")
col_iloc = gen.absorb.columns.get_loc(col)
gen.absorb.iloc[::91, col_iloc] = np.nan
gen.absorb[col] = pd.Categorical(to_numpy(gen.absorb[col]))
diff --git a/linearmodels/tests/iv/test_against_stata.py b/linearmodels/tests/iv/test_against_stata.py
index 96e04d5921..c357db9da2 100644
--- a/linearmodels/tests/iv/test_against_stata.py
+++ b/linearmodels/tests/iv/test_against_stata.py
@@ -10,46 +10,50 @@
from linearmodels.iv import IV2SLS, IVGMM, IVLIML
from linearmodels.tests.iv.results.read_stata_results import process_results
-pytestmark = pytest.mark.filterwarnings('ignore::linearmodels.utility.MissingValueWarning')
+pytestmark = pytest.mark.filterwarnings(
+ "ignore::linearmodels.utility.MissingValueWarning"
+)
CWD = os.path.split(os.path.abspath(__file__))[0]
-HOUSING_DATA = pd.read_csv(os.path.join(CWD, 'results', 'housing.csv'), index_col=0)
-HOUSING_DATA.region = HOUSING_DATA.region.astype('category')
-HOUSING_DATA.state = HOUSING_DATA.state.astype('category')
-HOUSING_DATA.division = HOUSING_DATA.division.astype('category')
+HOUSING_DATA = pd.read_csv(os.path.join(CWD, "results", "housing.csv"), index_col=0)
+HOUSING_DATA.region = HOUSING_DATA.region.astype("category")
+HOUSING_DATA.state = HOUSING_DATA.state.astype("category")
+HOUSING_DATA.division = HOUSING_DATA.division.astype("category")
-SIMULATED_DATA = pd.read_stata(os.path.join(CWD, 'results', 'simulated-data.dta'))
+SIMULATED_DATA = pd.read_stata(os.path.join(CWD, "results", "simulated-data.dta"))
-filepath = os.path.join(CWD, 'results', 'stata-iv-housing-results.txt')
+filepath = os.path.join(CWD, "results", "stata-iv-housing-results.txt")
HOUSING_RESULTS = process_results(filepath)
-filepath = os.path.join(CWD, 'results', 'stata-iv-simulated-results.txt')
+filepath = os.path.join(CWD, "results", "stata-iv-simulated-results.txt")
SIMULATED_RESULTS = process_results(filepath)
-MODELS = {'2sls': IV2SLS, 'gmm': IVGMM, 'liml': IVLIML}
-COV_OPTIONS = {'cluster': {'cov_type': 'clustered', 'clusters': HOUSING_DATA.division},
- 'robust': {'cov_type': 'robust'},
- 'unadjusted': {'cov_type': 'unadjusted'},
- 'bartlett_12': {'cov_type': 'kernel', 'kernel': 'bartlett', 'bandwidth': 12}}
+MODELS = {"2sls": IV2SLS, "gmm": IVGMM, "liml": IVLIML}
+COV_OPTIONS = {
+ "cluster": {"cov_type": "clustered", "clusters": HOUSING_DATA.division},
+ "robust": {"cov_type": "robust"},
+ "unadjusted": {"cov_type": "unadjusted"},
+ "bartlett_12": {"cov_type": "kernel", "kernel": "bartlett", "bandwidth": 12},
+}
-@pytest.fixture(params=list(HOUSING_RESULTS.keys()), scope='module')
+@pytest.fixture(params=list(HOUSING_RESULTS.keys()), scope="module")
def housing(request):
result = HOUSING_RESULTS[request.param]
- keys = request.param.split('-')
+ keys = request.param.split("-")
mod = MODELS[keys[0]]
data = HOUSING_DATA
endog = data.rent
exog = add_constant(data.pcturban)
instd = data.hsngval
- instr = data[['faminc', 'region']]
+ instr = data[["faminc", "region"]]
cov_opts = deepcopy(COV_OPTIONS[keys[1]])
- cov_opts['debiased'] = keys[2] == 'small'
- if keys[0] == 'gmm':
+ cov_opts["debiased"] = keys[2] == "small"
+ if keys[0] == "gmm":
weight_opts = deepcopy(COV_OPTIONS[keys[1]])
- weight_opts['weight_type'] = weight_opts['cov_type']
- del weight_opts['cov_type']
+ weight_opts["weight_type"] = weight_opts["cov_type"]
+ del weight_opts["cov_type"]
else:
weight_opts = {}
@@ -96,50 +100,59 @@ def test_cov(self, housing):
SIMULATED_COV_OPTIONS = {
- 'vce(cluster cluster_id)': {'cov_type': 'clustered', 'clusters': SIMULATED_DATA.cluster_id},
- 'vce(robust)': {'cov_type': 'robust'},
- 'vce(unadjusted)': {'cov_type': 'unadjusted'},
- 'vce(hac bartlett 12)': {'cov_type': 'kernel', 'kernel': 'bartlett', 'bandwidth': 12}}
+ "vce(cluster cluster_id)": {
+ "cov_type": "clustered",
+ "clusters": SIMULATED_DATA.cluster_id,
+ },
+ "vce(robust)": {"cov_type": "robust"},
+ "vce(unadjusted)": {"cov_type": "unadjusted"},
+ "vce(hac bartlett 12)": {
+ "cov_type": "kernel",
+ "kernel": "bartlett",
+ "bandwidth": 12,
+ },
+}
def construct_model(key):
- model, nendog, nexog, ninstr, weighted, var, other = key.split('-')
- var = var.replace('wmatrix', 'vce')
+ model, nendog, nexog, ninstr, weighted, var, other = key.split("-")
+ var = var.replace("wmatrix", "vce")
mod = MODELS[model]
data = SIMULATED_DATA
- endog = data[['x1', 'x2']] if '2' in nendog else data.x1
- exog = data[['x3', 'x4', 'x5']] if '3' in nexog else None
- instr = data[['z1', 'z2']] if '2' in ninstr else data.z1
- deps = {'vce(unadjusted)': data.y_unadjusted,
- 'vce(robust)': data.y_robust,
- 'vce(cluster cluster_id)': data.y_clustered,
- 'vce(hac bartlett 12)': data.y_kernel}
+ endog = data[["x1", "x2"]] if "2" in nendog else data.x1
+ exog = data[["x3", "x4", "x5"]] if "3" in nexog else None
+ instr = data[["z1", "z2"]] if "2" in ninstr else data.z1
+ deps = {
+ "vce(unadjusted)": data.y_unadjusted,
+ "vce(robust)": data.y_robust,
+ "vce(cluster cluster_id)": data.y_clustered,
+ "vce(hac bartlett 12)": data.y_kernel,
+ }
dep = deps[var]
- if 'noconstant' not in other:
+ if "noconstant" not in other:
if exog is not None:
exog = add_constant(exog)
else:
exog = add_constant(pd.DataFrame(np.empty((dep.shape[0], 0))))
cov_opts = deepcopy(SIMULATED_COV_OPTIONS[var])
- cov_opts['debiased'] = 'small' in other
+ cov_opts["debiased"] = "small" in other
mod_options = {}
- if 'True' in weighted:
- mod_options['weights'] = data.weights
- if model == 'gmm':
+ if "True" in weighted:
+ mod_options["weights"] = data.weights
+ if model == "gmm":
mod_options.update(deepcopy(SIMULATED_COV_OPTIONS[var]))
- mod_options['weight_type'] = mod_options['cov_type']
- del mod_options['cov_type']
- mod_options['center'] = 'center' in other
+ mod_options["weight_type"] = mod_options["cov_type"]
+ del mod_options["cov_type"]
+ mod_options["center"] = "center" in other
model_result = mod(dep, exog, endog, instr, **mod_options).fit(**cov_opts)
- if model == 'gmm' and 'True' in weighted:
- pytest.skip('Weighted GMM differs slightly')
+ if model == "gmm" and "True" in weighted:
+ pytest.skip("Weighted GMM differs slightly")
return model_result
-@pytest.fixture(params=list(SIMULATED_RESULTS.keys()),
- scope='module')
+@pytest.fixture(params=list(SIMULATED_RESULTS.keys()), scope="module")
def simulated(request):
result = SIMULATED_RESULTS[request.param]
model_result = construct_model(request.param)
@@ -170,7 +183,7 @@ def test_residual_ss(self, simulated):
def test_fstat(self, simulated):
res, stata = simulated
if stata.f_statistic is None:
- pytest.skip('Comparison result not available')
+ pytest.skip("Comparison result not available")
assert_allclose(res.f_statistic.stat, stata.f_statistic)
def test_params(self, simulated):
@@ -191,7 +204,9 @@ def test_cov(self, simulated):
def test_weight_mat(self, simulated):
res, stata = simulated
- if not hasattr(stata, 'weight_mat') or not isinstance(stata.weight_mat, pd.DataFrame):
+ if not hasattr(stata, "weight_mat") or not isinstance(
+ stata.weight_mat, pd.DataFrame
+ ):
return
stata_weight_mat = stata.weight_mat.reindex_like(res.weight_matrix)
stata_weight_mat = stata_weight_mat[res.weight_matrix.columns]
@@ -199,12 +214,12 @@ def test_weight_mat(self, simulated):
def test_j_stat(self, simulated):
res, stata = simulated
- if not hasattr(stata, 'J') or stata.J is None:
+ if not hasattr(stata, "J") or stata.J is None:
return
assert_allclose(res.j_stat.stat, stata.J, atol=1e-6, rtol=1e-4)
def test_kappa(self, simulated):
res, stata = simulated
- if not hasattr(stata, 'kappa') or stata.kappa is None:
+ if not hasattr(stata, "kappa") or stata.kappa is None:
return
assert_allclose(res.kappa, stata.kappa, rtol=1e-4)
diff --git a/linearmodels/tests/iv/test_covariance.py b/linearmodels/tests/iv/test_covariance.py
index 494fcb791d..72b46e1c12 100644
--- a/linearmodels/tests/iv/test_covariance.py
+++ b/linearmodels/tests/iv/test_covariance.py
@@ -16,23 +16,22 @@
from linearmodels.utility import AttrDict
-@pytest.fixture(params=['bartlett', 'qs', 'parzen'], scope='module')
+@pytest.fixture(params=["bartlett", "qs", "parzen"], scope="module")
def kernel(request):
kernel_name = request.param
- if kernel_name == 'bartlett':
+ if kernel_name == "bartlett":
weight_func = kernel_weight_bartlett
- alt_names = ['newey-west']
- elif kernel_name == 'parzen':
+ alt_names = ["newey-west"]
+ elif kernel_name == "parzen":
weight_func = kernel_weight_parzen
- alt_names = ['gallant']
+ alt_names = ["gallant"]
else:
weight_func = kernel_weight_quadratic_spectral
- alt_names = ['quadratic-spectral', 'andrews']
- return AttrDict(kernel=kernel_name, alt_names=alt_names,
- weight=weight_func)
+ alt_names = ["quadratic-spectral", "andrews"]
+ return AttrDict(kernel=kernel_name, alt_names=alt_names, weight=weight_func)
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
def data():
return generate_data()
@@ -87,44 +86,46 @@ def test_asymptotic(self, data):
xhat = data.xhat
s2 = data.s2
assert c.debiased is False
- assert c.config == {'debiased': False, 'kappa': 1}
+ assert c.config == {"debiased": False, "kappa": 1}
assert_allclose(c.s2, data.s2)
assert_allclose(c.cov, data.s2 * inv(xhat.T @ xhat / nobs) / nobs)
assert_allclose(c.s, s2 * data.v)
assert_allclose(c.s, s2 * (xhat.T @ xhat / nobs))
def test_debiased(self, data):
- c = HomoskedasticCovariance(data.x, data.y, data.z, data.params,
- debiased=True)
+ c = HomoskedasticCovariance(data.x, data.y, data.z, data.params, debiased=True)
assert c.debiased is True
- assert c.config == {'debiased': True, 'kappa': 1}
+ assert c.config == {"debiased": True, "kappa": 1}
assert_allclose(c.s2, data.s2_debiased)
assert_allclose(c.s, data.s2_debiased * data.v)
assert_allclose(c.cov, data.s2_debiased * data.vinv / data.nobs)
s = str(c)
- assert 'Kappa' not in s
- assert 'Debiased: True' in s
- assert 'id' in c.__repr__()
+ assert "Kappa" not in s
+ assert "Debiased: True" in s
+ assert "id" in c.__repr__()
def test_kappa(self, data):
- c = HomoskedasticCovariance(data.x, data.y, data.z, data.params, kappa=data.kappa)
+ c = HomoskedasticCovariance(
+ data.x, data.y, data.z, data.params, kappa=data.kappa
+ )
assert c.debiased is False
- assert c.config == {'debiased': False, 'kappa': .99}
+ assert c.config == {"debiased": False, "kappa": 0.99}
assert_allclose(c.s, data.s2 * data.vk)
assert_allclose(c.cov, data.s2 * inv(data.vk) / data.nobs)
s = str(c)
- assert 'Debiased: False' in s
- assert 'Kappa' in s
+ assert "Debiased: False" in s
+ assert "Kappa" in s
def test_kappa_debiased(self, data):
- c = HomoskedasticCovariance(data.x, data.y, data.z, data.params,
- debiased=True, kappa=data.kappa)
+ c = HomoskedasticCovariance(
+ data.x, data.y, data.z, data.params, debiased=True, kappa=data.kappa
+ )
assert c.debiased is True
- assert c.config == {'debiased': True, 'kappa': data.kappa}
+ assert c.config == {"debiased": True, "kappa": data.kappa}
assert_allclose(c.s, data.s2_debiased * data.vk)
assert_allclose(c.cov, data.s2_debiased * inv(data.vk) / data.nobs)
s = str(c)
- assert 'Debiased: True' in s
+ assert "Debiased: True" in s
def test_errors(self, data):
with pytest.raises(ValueError):
@@ -137,38 +138,42 @@ class TestHeteroskedasticCovariance(object):
def test_asymptotic(self, data):
c = HeteroskedasticCovariance(data.x, data.y, data.z, data.params)
assert c.debiased is False
- assert c.config == {'debiased': False, 'kappa': 1}
+ assert c.config == {"debiased": False, "kappa": 1}
assert_allclose(c.s2, data.s2)
xhat, eps, nobs = data.xhat, data.e, data.nobs
assert_allclose(c.s, (xhat * eps).T @ (xhat * eps) / nobs)
def test_debiased(self, data):
- c = HeteroskedasticCovariance(data.x, data.y, data.z, data.params,
- debiased=True)
+ c = HeteroskedasticCovariance(
+ data.x, data.y, data.z, data.params, debiased=True
+ )
xhat, eps, nobs, nvar = data.xhat, data.e, data.nobs, data.nvar
assert c.debiased is True
- assert c.config == {'debiased': True, 'kappa': 1}
+ assert c.config == {"debiased": True, "kappa": 1}
s = (xhat * eps).T @ (xhat * eps) / (nobs - nvar)
assert_allclose(c.s, s)
assert_allclose(c.cov, data.vinv @ s @ data.vinv / nobs)
def test_kappa_debiased(self, data):
- c = HeteroskedasticCovariance(data.x, data.y, data.z, data.params,
- debiased=True, kappa=.99)
+ c = HeteroskedasticCovariance(
+ data.x, data.y, data.z, data.params, debiased=True, kappa=0.99
+ )
assert c.debiased is True
- assert c.config == {'debiased': True, 'kappa': 0.99}
- c2 = HeteroskedasticCovariance(data.x, data.y, data.z, data.params,
- debiased=True)
+ assert c.config == {"debiased": True, "kappa": 0.99}
+ c2 = HeteroskedasticCovariance(
+ data.x, data.y, data.z, data.params, debiased=True
+ )
assert_allclose(c.s, c2.s)
assert c.s2 == c2.s2
vk_inv = inv(data.vk)
assert_allclose(c.cov, vk_inv @ c.s @ vk_inv / data.nobs)
def test_kappa(self, data):
- c = HeteroskedasticCovariance(data.x, data.y, data.z, data.params,
- debiased=False, kappa=.99)
+ c = HeteroskedasticCovariance(
+ data.x, data.y, data.z, data.params, debiased=False, kappa=0.99
+ )
assert c.debiased is False
- assert c.config == {'debiased': False, 'kappa': 0.99}
+ assert c.config == {"debiased": False, "kappa": 0.99}
c2 = HeteroskedasticCovariance(data.x, data.y, data.z, data.params)
assert_allclose(c.s, c2.s)
assert c.s2 == c2.s2
@@ -178,12 +183,13 @@ def test_kappa(self, data):
class TestClusteredCovariance(object):
def test_asymptotic(self, data):
- c = ClusteredCovariance(data.x, data.y, data.z, data.params,
- clusters=data.clusters)
+ c = ClusteredCovariance(
+ data.x, data.y, data.z, data.params, clusters=data.clusters
+ )
assert c._kappa == 1
assert c.debiased is False
- assert c.config['debiased'] is False
- assert_equal(c.config['clusters'], data.clusters)
+ assert c.config["debiased"] is False
+ assert_equal(c.config["clusters"], data.clusters)
assert_allclose(c.s2, data.s2)
sums = np.zeros((len(np.unique(data.clusters)), data.nvar))
xe = data.xhat * data.e
@@ -196,15 +202,16 @@ def test_asymptotic(self, data):
assert_allclose(c.s, s)
assert_allclose(c.cov, data.vinv @ s @ data.vinv / data.nobs)
cs = str(c)
- assert 'Debiased: False' in cs
- assert 'Num Clusters: {0}'.format(len(sums)) in cs
+ assert "Debiased: False" in cs
+ assert "Num Clusters: {0}".format(len(sums)) in cs
def test_debiased(self, data):
- c = ClusteredCovariance(data.x, data.y, data.z, data.params,
- debiased=True, clusters=data.clusters)
+ c = ClusteredCovariance(
+ data.x, data.y, data.z, data.params, debiased=True, clusters=data.clusters
+ )
assert c.debiased is True
- assert c.config['debiased'] is True
- assert_equal(c.config['clusters'], data.clusters)
+ assert c.config["debiased"] is True
+ assert_equal(c.config["clusters"], data.clusters)
ngroups = len(np.unique(data.clusters))
sums = np.zeros((ngroups, data.nvar))
@@ -215,38 +222,43 @@ def test_debiased(self, data):
for j in range(len(sums)):
op += sums[[j]].T @ sums[[j]]
# This is a strange choice
- s = op / data.nobs * ((data.nobs - 1) / (data.nobs - data.nvar)) * ngroups / (ngroups - 1)
+ s = (
+ op
+ / data.nobs
+ * ((data.nobs - 1) / (data.nobs - data.nvar))
+ * ngroups
+ / (ngroups - 1)
+ )
assert_allclose(c.s, s)
assert_allclose(c.cov, data.vinv @ s @ data.vinv / data.nobs)
cs = str(c)
- assert 'Debiased: True' in cs
- assert 'Num Clusters: {0}'.format(len(sums)) in cs
- assert 'id' in c.__repr__()
+ assert "Debiased: True" in cs
+ assert "Num Clusters: {0}".format(len(sums)) in cs
+ assert "id" in c.__repr__()
def test_errors(self, data):
with pytest.raises(ValueError):
- ClusteredCovariance(data.x, data.y, data.z, data.params,
- clusters=data.clusters[:10])
+ ClusteredCovariance(
+ data.x, data.y, data.z, data.params, clusters=data.clusters[:10]
+ )
class TestKernelCovariance(object):
def test_asymptotic(self, data, kernel):
- c = KernelCovariance(data.x, data.y, data.z, data.params,
- kernel=kernel.kernel)
+ c = KernelCovariance(data.x, data.y, data.z, data.params, kernel=kernel.kernel)
cs = str(c)
- assert '\nBandwidth' not in cs
+ assert "\nBandwidth" not in cs
for name in kernel.alt_names:
- c2 = KernelCovariance(data.x, data.y, data.z, data.params,
- kernel=name)
+ c2 = KernelCovariance(data.x, data.y, data.z, data.params, kernel=name)
assert_equal(c.cov, c2.cov)
assert c.debiased is False
- assert c.config['debiased'] is False
- assert_equal(c.config['kernel'], kernel.kernel)
+ assert c.config["debiased"] is False
+ assert_equal(c.config["kernel"], kernel.kernel)
assert_allclose(c.s2, data.s2)
- bw = c.config['bandwidth']
+ bw = c.config["bandwidth"]
xe = data.xhat * data.e
s = xe.T @ xe
w = kernel.weight(bw, xe.shape[0] - 1)
@@ -257,39 +269,43 @@ def test_asymptotic(self, data, kernel):
assert_allclose(c.cov, data.vinv @ s @ data.vinv / data.nobs)
cs = str(c)
- assert 'Kappa' not in cs
- assert 'Kernel: {0}'.format(kernel.kernel) in cs
- assert 'Bandwidth: {0}'.format(bw) in cs
+ assert "Kappa" not in cs
+ assert "Kernel: {0}".format(kernel.kernel) in cs
+ assert "Bandwidth: {0}".format(bw) in cs
def test_debiased(self, data, kernel):
- c = KernelCovariance(data.x, data.y, data.z, data.params,
- kernel=kernel.kernel, debiased=True)
+ c = KernelCovariance(
+ data.x, data.y, data.z, data.params, kernel=kernel.kernel, debiased=True
+ )
for name in kernel.alt_names:
- c2 = KernelCovariance(data.x, data.y, data.z, data.params,
- kernel=name, debiased=True)
+ c2 = KernelCovariance(
+ data.x, data.y, data.z, data.params, kernel=name, debiased=True
+ )
assert_equal(c.cov, c2.cov)
assert c._kappa == 1
assert c.debiased is True
- assert c.config['debiased'] is True
- assert_equal(c.config['kernel'], kernel.kernel)
+ assert c.config["debiased"] is True
+ assert_equal(c.config["kernel"], kernel.kernel)
assert_allclose(c.s2, data.s2_debiased)
- c2 = KernelCovariance(data.x, data.y, data.z, data.params,
- kernel=kernel.kernel, debiased=False)
+ c2 = KernelCovariance(
+ data.x, data.y, data.z, data.params, kernel=kernel.kernel, debiased=False
+ )
scale = data.nobs / (data.nobs - data.nvar)
assert_allclose(c.s, scale * c2.s)
assert_allclose(c.cov, scale * c2.cov)
cs = str(c)
- assert 'Debiased: True' in cs
- assert 'Kernel: {0}'.format(kernel.kernel) in cs
- assert 'Bandwidth: {0}'.format(c.config['bandwidth']) in cs
- assert 'id' in c.__repr__()
+ assert "Debiased: True" in cs
+ assert "Kernel: {0}".format(kernel.kernel) in cs
+ assert "Bandwidth: {0}".format(c.config["bandwidth"]) in cs
+ assert "id" in c.__repr__()
def test_unknown_kernel(self, data, kernel):
with pytest.raises(ValueError):
- KernelCovariance(data.x, data.y, data.z, data.params,
- kernel=kernel.kernel + '_unknown')
+ KernelCovariance(
+ data.x, data.y, data.z, data.params, kernel=kernel.kernel + "_unknown"
+ )
class TestAutomaticBandwidth(object):
@@ -303,4 +319,4 @@ def test_smoke(self, data, kernel):
def test_unknown_kernel(self, data, kernel):
with pytest.raises(ValueError):
- kernel_optimal_bandwidth(data.e, kernel.kernel + '_unknown')
+ kernel_optimal_bandwidth(data.e, kernel.kernel + "_unknown")
diff --git a/linearmodels/tests/iv/test_data.py b/linearmodels/tests/iv/test_data.py
index 27c74fc76e..c80e335b1e 100644
--- a/linearmodels/tests/iv/test_data.py
+++ b/linearmodels/tests/iv/test_data.py
@@ -21,7 +21,7 @@ def test_numpy_2d(self):
x = np.empty((10, 2))
xdh = IVData(x)
assert xdh.ndim == x.ndim
- assert xdh.cols == ['x.0', 'x.1']
+ assert xdh.cols == ["x.0", "x.1"]
assert xdh.rows == list(np.arange(10))
assert_equal(xdh.ndarray, x)
df = pd.DataFrame(x, columns=xdh.cols, index=xdh.rows)
@@ -33,7 +33,7 @@ def test_numpy_1d(self):
x = np.empty(10)
xdh = IVData(x)
assert xdh.ndim == 2
- assert xdh.cols == ['x']
+ assert xdh.cols == ["x"]
assert xdh.rows == list(np.arange(10))
assert_equal(xdh.ndarray, x[:, None])
df = pd.DataFrame(x[:, None], columns=xdh.cols, index=xdh.rows)
@@ -42,8 +42,8 @@ def test_numpy_1d(self):
def test_pandas_df_numeric(self):
x = np.empty((10, 2))
- index = pd.date_range('2017-01-01', periods=10)
- xdf = pd.DataFrame(x, columns=['a', 'b'], index=index)
+ index = pd.date_range("2017-01-01", periods=10)
+ xdf = pd.DataFrame(x, columns=["a", "b"], index=index)
xdh = IVData(xdf)
assert xdh.ndim == 2
assert xdh.cols == list(xdf.columns)
@@ -55,8 +55,8 @@ def test_pandas_df_numeric(self):
def test_pandas_series_numeric(self):
x = np.empty(10)
- index = pd.date_range('2017-01-01', periods=10)
- xs = pd.Series(x, name='charlie', index=index)
+ index = pd.date_range("2017-01-01", periods=10)
+ xs = pd.Series(x, name="charlie", index=index)
xdh = IVData(xs)
assert xdh.ndim == 2
assert xdh.cols == [xs.name]
@@ -66,46 +66,43 @@ def test_pandas_series_numeric(self):
assert_frame_equal(xdh.pandas, df)
assert xdh.shape == (10, 1)
- @pytest.mark.skipif(MISSING_XARRAY, reason='xarray not installed')
+ @pytest.mark.skipif(MISSING_XARRAY, reason="xarray not installed")
def test_xarray_1d(self):
x_np = np.random.randn(10)
x = xr.DataArray(x_np)
- dh = IVData(x, 'some_variable')
+ dh = IVData(x, "some_variable")
assert_equal(dh.ndarray, x_np[:, None])
assert dh.rows == list(np.arange(10))
- assert dh.cols == ['some_variable.0']
+ assert dh.cols == ["some_variable.0"]
expected = pd.DataFrame(x_np, columns=dh.cols, index=dh.rows)
assert_frame_equal(expected, dh.pandas)
- index = pd.date_range('2017-01-01', periods=10)
- x = xr.DataArray(x_np,
- [('time', index)])
- dh = IVData(x, 'some_variable')
+ index = pd.date_range("2017-01-01", periods=10)
+ x = xr.DataArray(x_np, [("time", index)])
+ dh = IVData(x, "some_variable")
assert_equal(dh.ndarray, x_np[:, None])
assert_series_equal(pd.Series(dh.rows), pd.Series(list(index)))
- assert dh.cols == ['some_variable.0']
+ assert dh.cols == ["some_variable.0"]
expected = pd.DataFrame(x_np[:, None], columns=dh.cols, index=dh.rows)
assert_frame_equal(expected, dh.pandas)
- @pytest.mark.skipif(MISSING_XARRAY, reason='xarray not installed')
+ @pytest.mark.skipif(MISSING_XARRAY, reason="xarray not installed")
def test_xarray_2d(self):
x_np = np.random.randn(10, 2)
x = xr.DataArray(x_np)
dh = IVData(x)
assert_equal(dh.ndarray, x_np)
assert dh.rows == list(np.arange(10))
- assert dh.cols == ['x.0', 'x.1']
+ assert dh.cols == ["x.0", "x.1"]
expected = pd.DataFrame(x_np, columns=dh.cols, index=dh.rows)
assert_frame_equal(expected, dh.pandas)
- index = pd.date_range('2017-01-01', periods=10)
- x = xr.DataArray(x_np,
- [('time', index),
- ('variables', ['apple', 'banana'])])
+ index = pd.date_range("2017-01-01", periods=10)
+ x = xr.DataArray(x_np, [("time", index), ("variables", ["apple", "banana"])])
dh = IVData(x)
assert_equal(dh.ndarray, x_np)
assert_series_equal(pd.Series(dh.rows), pd.Series(list(index)))
- assert dh.cols == ['apple', 'banana']
+ assert dh.cols == ["apple", "banana"]
expected = pd.DataFrame(x_np, columns=dh.cols, index=dh.rows)
assert_frame_equal(expected, dh.pandas)
@@ -115,6 +112,7 @@ def test_invalid_types(self):
with pytest.raises(ValueError):
IVData(np.empty((10, 2, 2)))
with pytest.raises(TypeError):
+
class AnotherClass(object):
@property
def ndim(self):
@@ -123,21 +121,22 @@ def ndim(self):
IVData(AnotherClass())
def test_string_cat_equiv(self):
- s1 = pd.Series(['a', 'b', 'a', 'b', 'c', 'd', 'a', 'b'])
+ s1 = pd.Series(["a", "b", "a", "b", "c", "d", "a", "b"])
s2 = pd.Series(np.arange(8.0))
- s3 = pd.Series(['apple', 'banana', 'apple', 'banana',
- 'cherry', 'date', 'apple', 'banana'])
- df = pd.DataFrame({'string': s1, 'number': s2, 'other_string': s3})
+ s3 = pd.Series(
+ ["apple", "banana", "apple", "banana", "cherry", "date", "apple", "banana"]
+ )
+ df = pd.DataFrame({"string": s1, "number": s2, "other_string": s3})
dh = IVData(df)
df_cat = df.copy()
- df_cat['string'] = df_cat['string'].astype('category')
+ df_cat["string"] = df_cat["string"].astype("category")
dh_cat = IVData(df_cat)
assert_frame_equal(dh.pandas, dh_cat.pandas)
def test_existing_datahandler(self):
x = np.empty((10, 2))
- index = pd.date_range('2017-01-01', periods=10)
- xdf = pd.DataFrame(x, columns=['a', 'b'], index=index)
+ index = pd.date_range("2017-01-01", periods=10)
+ xdf = pd.DataFrame(x, columns=["a", "b"], index=index)
xdh = IVData(xdf)
xdh2 = IVData(xdh)
assert xdh is not xdh2
@@ -148,57 +147,57 @@ def test_existing_datahandler(self):
assert_frame_equal(xdh.pandas, xdh2.pandas)
def test_categorical(self):
- index = pd.date_range('2017-01-01', periods=10)
- cat = pd.Categorical(['a', 'b', 'a', 'b', 'a', 'a', 'b', 'c', 'c', 'a'])
+ index = pd.date_range("2017-01-01", periods=10)
+ cat = pd.Categorical(["a", "b", "a", "b", "a", "a", "b", "c", "c", "a"])
num = np.empty(10)
df = pd.DataFrame(OrderedDict(cat=cat, num=num), index=index)
dh = IVData(df)
assert dh.ndim == 2
assert dh.shape == (10, 3)
- assert sorted(dh.cols) == sorted(['cat.b', 'cat.c', 'num'])
+ assert sorted(dh.cols) == sorted(["cat.b", "cat.c", "num"])
assert dh.rows == list(index)
- assert_equal(dh.pandas['num'].values, num)
- assert_equal(dh.pandas['cat.b'].values, (cat == 'b').astype(np.float))
- assert_equal(dh.pandas['cat.c'].values, (cat == 'c').astype(np.float))
+ assert_equal(dh.pandas["num"].values, num)
+ assert_equal(dh.pandas["cat.b"].values, (cat == "b").astype(np.float))
+ assert_equal(dh.pandas["cat.c"].values, (cat == "c").astype(np.float))
def test_categorical_series(self):
- index = pd.date_range('2017-01-01', periods=10)
- cat = pd.Categorical(['a', 'b', 'a', 'b', 'a', 'a', 'b', 'c', 'c', 'a'])
- s = pd.Series(cat, name='cat', index=index)
+ index = pd.date_range("2017-01-01", periods=10)
+ cat = pd.Categorical(["a", "b", "a", "b", "a", "a", "b", "c", "c", "a"])
+ s = pd.Series(cat, name="cat", index=index)
dh = IVData(s)
assert dh.ndim == 2
assert dh.shape == (10, 2)
- assert sorted(dh.cols) == sorted(['cat.b', 'cat.c'])
+ assert sorted(dh.cols) == sorted(["cat.b", "cat.c"])
assert dh.rows == list(index)
- assert_equal(dh.pandas['cat.b'].values, (cat == 'b').astype(np.float))
- assert_equal(dh.pandas['cat.c'].values, (cat == 'c').astype(np.float))
+ assert_equal(dh.pandas["cat.b"].values, (cat == "b").astype(np.float))
+ assert_equal(dh.pandas["cat.c"].values, (cat == "c").astype(np.float))
def test_categorical_no_conversion(self):
- index = pd.date_range('2017-01-01', periods=10)
- cat = pd.Categorical(['a', 'b', 'a', 'b', 'a', 'a', 'b', 'c', 'c', 'a'])
- s = pd.Series(cat, index=index, name='cat')
+ index = pd.date_range("2017-01-01", periods=10)
+ cat = pd.Categorical(["a", "b", "a", "b", "a", "a", "b", "c", "c", "a"])
+ s = pd.Series(cat, index=index, name="cat")
dh = IVData(s, convert_dummies=False)
assert dh.ndim == 2
assert dh.shape == (10, 1)
- assert dh.cols == ['cat']
+ assert dh.cols == ["cat"]
assert dh.rows == list(index)
df = pd.DataFrame(s)
assert_frame_equal(dh.pandas, df)
def test_categorical_keep_first(self):
- index = pd.date_range('2017-01-01', periods=10)
- cat = pd.Categorical(['a', 'b', 'a', 'b', 'a', 'a', 'b', 'c', 'c', 'a'])
+ index = pd.date_range("2017-01-01", periods=10)
+ cat = pd.Categorical(["a", "b", "a", "b", "a", "a", "b", "c", "c", "a"])
num = np.empty(10)
df = pd.DataFrame(OrderedDict(cat=cat, num=num), index=index)
dh = IVData(df, drop_first=False)
assert dh.ndim == 2
assert dh.shape == (10, 4)
- assert sorted(dh.cols) == sorted(['cat.a', 'cat.b', 'cat.c', 'num'])
+ assert sorted(dh.cols) == sorted(["cat.a", "cat.b", "cat.c", "num"])
assert dh.rows == list(index)
- assert_equal(dh.pandas['num'].values, num)
- assert_equal(dh.pandas['cat.a'].values, (cat == 'a').astype(np.float))
- assert_equal(dh.pandas['cat.b'].values, (cat == 'b').astype(np.float))
- assert_equal(dh.pandas['cat.c'].values, (cat == 'c').astype(np.float))
+ assert_equal(dh.pandas["num"].values, num)
+ assert_equal(dh.pandas["cat.a"].values, (cat == "a").astype(np.float))
+ assert_equal(dh.pandas["cat.b"].values, (cat == "b").astype(np.float))
+ assert_equal(dh.pandas["cat.c"].values, (cat == "c").astype(np.float))
def test_nobs_missing_error(self):
with pytest.raises(ValueError):
@@ -210,12 +209,12 @@ def test_incorrect_nobs(self):
IVData(x, nobs=100)
def test_mixed_data(self):
- s = pd.Series([1, 2, 'a', -3.0])
+ s = pd.Series([1, 2, "a", -3.0])
with pytest.raises(ValueError):
IVData(s)
def test_duplicate_column_names():
- x = pd.DataFrame(np.ones((3, 2)), columns=['x', 'x'])
+ x = pd.DataFrame(np.ones((3, 2)), columns=["x", "x"])
with pytest.raises(ValueError):
IVData(x)
diff --git a/linearmodels/tests/iv/test_formulas.py b/linearmodels/tests/iv/test_formulas.py
index 431909d20f..010ac36979 100644
--- a/linearmodels/tests/iv/test_formulas.py
+++ b/linearmodels/tests/iv/test_formulas.py
@@ -10,9 +10,12 @@
from linearmodels.iv import IV2SLS, IVGMM, IVGMMCUE, IVLIML
-@pytest.fixture(scope='module',
- params=list(zip([IV2SLS, IVLIML, IVGMMCUE, IVGMM],
- [iv_2sls, iv_liml, iv_gmm_cue, iv_gmm])))
+@pytest.fixture(
+ scope="module",
+ params=list(
+ zip([IV2SLS, IVLIML, IVGMMCUE, IVGMM], [iv_2sls, iv_liml, iv_gmm_cue, iv_gmm])
+ ),
+)
def model_and_func(request):
return request.param
@@ -21,16 +24,18 @@ def sigmoid(v):
return np.exp(v) / (1 + np.exp(v))
-formulas = ['y ~ 1 + x3 + x4 + x5 + [x1 + x2 ~ z1 + z2 + z3]',
- 'y ~ 1 + x3 + x4 + [x1 + x2 ~ z1 + z2 + z3] + x5']
+formulas = [
+ "y ~ 1 + x3 + x4 + x5 + [x1 + x2 ~ z1 + z2 + z3]",
+ "y ~ 1 + x3 + x4 + [x1 + x2 ~ z1 + z2 + z3] + x5",
+]
-@pytest.fixture(scope='module', params=formulas)
+@pytest.fixture(scope="module", params=formulas)
def formula(request):
return request.param
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
def data():
n, k, p = 1000, 5, 3
np.random.seed(12345)
@@ -44,16 +49,16 @@ def data():
v = np.random.multivariate_normal(np.zeros(r.shape[0]), r, n)
x = v[:, :k]
- z = v[:, k:k + p]
+ z = v[:, k : k + p]
e = v[:, [-1]]
params = np.arange(1, k + 1) / k
params = params[:, None]
y = x @ params + e
- cols = ['y'] + ['x' + str(i) for i in range(1, 6)]
- cols += ['z' + str(i) for i in range(1, 4)]
+ cols = ["y"] + ["x" + str(i) for i in range(1, 6)]
+ cols += ["z" + str(i) for i in range(1, 4)]
data = DataFrame(np.c_[y, x, z], columns=cols)
- data['Intercept'] = 1.0
- data['weights'] = np.random.chisquare(10, size=data.shape[0]) / 10
+ data["Intercept"] = 1.0
+ data["weights"] = np.random.chisquare(10, size=data.shape[0]) / 10
return data
@@ -61,9 +66,9 @@ def test_formula(data, model_and_func, formula):
model, func = model_and_func
mod = model.from_formula(formula, data)
res = mod.fit()
- exog = data[['Intercept', 'x3', 'x4', 'x5']]
- endog = data[['x1', 'x2']]
- instr = data[['z1', 'z2', 'z3']]
+ exog = data[["Intercept", "x3", "x4", "x5"]]
+ endog = data[["x1", "x2"]]
+ instr = data[["z1", "z2", "z3"]]
res2 = model(data.y, exog, endog, instr).fit()
assert res.rsquared == res2.rsquared
assert mod.formula == formula
@@ -78,9 +83,9 @@ def test_formula_weights(data, model_and_func, formula):
model, func = model_and_func
mod = model.from_formula(formula, data, weights=data.weights)
res = mod.fit()
- exog = data[['Intercept', 'x3', 'x4', 'x5']]
- endog = data[['x1', 'x2']]
- instr = data[['z1', 'z2', 'z3']]
+ exog = data[["Intercept", "x3", "x4", "x5"]]
+ endog = data[["x1", "x2"]]
+ instr = data[["z1", "z2", "z3"]]
res2 = model(data.y, exog, endog, instr, weights=data.weights).fit()
assert res.rsquared == res2.rsquared
assert mod.formula == formula
@@ -94,14 +99,14 @@ def test_formula_weights(data, model_and_func, formula):
def test_formula_kernel(data, model_and_func, formula):
model, func = model_and_func
mod = model.from_formula(formula, data)
- mod.fit(cov_type='kernel')
- func(formula, data).fit(cov_type='kernel')
+ mod.fit(cov_type="kernel")
+ func(formula, data).fit(cov_type="kernel")
def test_formula_ols(data, model_and_func):
model, func = model_and_func
- formula = 'y ~ 1 + x1 + x2 + x3 + x4 + x5'
- exog = data[['Intercept', 'x1', 'x2', 'x3', 'x4', 'x5']]
+ formula = "y ~ 1 + x1 + x2 + x3 + x4 + x5"
+ exog = data[["Intercept", "x1", "x2", "x3", "x4", "x5"]]
res2 = model(data.y, exog, None, None)
res2 = res2.fit()
res = model.from_formula(formula, data).fit()
@@ -113,8 +118,8 @@ def test_formula_ols(data, model_and_func):
def test_formula_ols_weights(data, model_and_func):
model, func = model_and_func
- formula = 'y ~ 1 + x1 + x2 + x3 + x4 + x5'
- exog = data[['Intercept', 'x1', 'x2', 'x3', 'x4', 'x5']]
+ formula = "y ~ 1 + x1 + x2 + x3 + x4 + x5"
+ exog = data[["Intercept", "x1", "x2", "x3", "x4", "x5"]]
res2 = model(data.y, exog, None, None, weights=data.weights)
res2 = res2.fit()
res = model.from_formula(formula, data, weights=data.weights).fit()
@@ -126,7 +131,7 @@ def test_formula_ols_weights(data, model_and_func):
def test_no_exog(data, model_and_func):
model, func = model_and_func
- formula = 'y ~ [x1 + x2 ~ z1 + z2 + z3]'
+ formula = "y ~ [x1 + x2 ~ z1 + z2 + z3]"
mod = model.from_formula(formula, data)
res = mod.fit()
res2 = func(formula, data).fit()
@@ -134,7 +139,7 @@ def test_no_exog(data, model_and_func):
assert res.rsquared == res2.rsquared
assert mod.formula == formula
- mod2 = model(data.y, None, data[['x1', 'x2']], data[['z1', 'z2', 'z3']])
+ mod2 = model(data.y, None, data[["x1", "x2"]], data[["z1", "z2", "z3"]])
res3 = mod2.fit()
assert_allclose(res.rsquared, res3.rsquared)
@@ -142,38 +147,38 @@ def test_no_exog(data, model_and_func):
def test_invalid_formula(data, model_and_func):
model, func = model_and_func
- formula = 'y ~ 1 + x1 + x2 ~ x3 + [x4 x5 ~ z1 z2]'
+ formula = "y ~ 1 + x1 + x2 ~ x3 + [x4 x5 ~ z1 z2]"
with pytest.raises(ValueError):
model.from_formula(formula, data).fit()
with pytest.raises(ValueError):
func(formula, data).fit()
- formula = 'y ~ 1 + x1 + x2 + x3 + x4 + x5 ~ z1 z2'
+ formula = "y ~ 1 + x1 + x2 + x3 + x4 + x5 ~ z1 z2"
with pytest.raises(ValueError):
model.from_formula(formula, data).fit()
- formula = 'y y2 ~ 1 + x1 + x2 + x3 + [x4 + x5 ~ + z1 + z2]'
+ formula = "y y2 ~ 1 + x1 + x2 + x3 + [x4 + x5 ~ + z1 + z2]"
with pytest.raises(ValueError):
model.from_formula(formula, data).fit()
- formula = 'y y2 ~ 1 + x1 + x2 + x3 [ + x4 + x5 ~ z1 + z2]'
+ formula = "y y2 ~ 1 + x1 + x2 + x3 [ + x4 + x5 ~ z1 + z2]"
with pytest.raises(ValueError):
model.from_formula(formula, data).fit()
- formula = 'y y2 ~ 1 + x1 + x2 + x3 + [x4 + x5 ~ z1 + z2]'
+ formula = "y y2 ~ 1 + x1 + x2 + x3 + [x4 + x5 ~ z1 + z2]"
with pytest.raises(SyntaxError):
model.from_formula(formula, data).fit()
def test_categorical(model_and_func):
- formula = 'y ~ 1 + d + x1'
+ formula = "y ~ 1 + d + x1"
y = np.random.randn(1000)
x1 = np.random.randn(1000)
d = np.random.randint(0, 4, 1000)
d = Categorical(d)
- data = DataFrame({'y': y, 'x1': x1, 'd': d})
- data['Intercept'] = 1.0
+ data = DataFrame({"y": y, "x1": x1, "d": d})
+ data["Intercept"] = 1.0
model, func = model_and_func
mod = model.from_formula(formula, data)
res3 = mod.fit()
res2 = func(formula, data).fit()
- res = model(data.y, data[['Intercept', 'x1', 'd']], None, None).fit()
+ res = model(data.y, data[["Intercept", "x1", "d"]], None, None).fit()
assert_allclose(res.rsquared, res2.rsquared)
assert_allclose(res2.rsquared, res3.rsquared)
@@ -184,8 +189,8 @@ def test_predict_formula(data, model_and_func, formula):
model, func = model_and_func
mod = model.from_formula(formula, data)
res = mod.fit()
- exog = data[['Intercept', 'x3', 'x4', 'x5']]
- endog = data[['x1', 'x2']]
+ exog = data[["Intercept", "x3", "x4", "x5"]]
+ endog = data[["x1", "x2"]]
pred = res.predict(exog, endog)
pred2 = res.predict(data=data)
assert_frame_equal(pred, pred2)
@@ -194,16 +199,20 @@ def test_predict_formula(data, model_and_func, formula):
def test_formula_function(data, model_and_func):
model, func = model_and_func
- fmla = 'y ~ 1 + sigmoid(x3) + x4 + [x1 + x2 ~ z1 + z2 + z3] + np.exp(x5)'
+ fmla = "y ~ 1 + sigmoid(x3) + x4 + [x1 + x2 ~ z1 + z2 + z3] + np.exp(x5)"
mod = model.from_formula(fmla, data)
res = mod.fit()
dep = data.y
- exog = [data[['Intercept']], sigmoid(data[['x3']]), data[['x4']],
- np.exp(data[['x5']])]
+ exog = [
+ data[["Intercept"]],
+ sigmoid(data[["x3"]]),
+ data[["x4"]],
+ np.exp(data[["x5"]]),
+ ]
exog = concat(exog, 1)
- endog = data[['x1', 'x2']]
- instr = data[['z1', 'z2', 'z3']]
+ endog = data[["x1", "x2"]]
+ instr = data[["z1", "z2", "z3"]]
mod = model(dep, exog, endog, instr)
res2 = mod.fit()
assert_equal(res.params.values, res2.params.values)
@@ -216,14 +225,18 @@ def test_formula_function(data, model_and_func):
def test_predict_formula_function(data, model_and_func):
model, func = model_and_func
- fmla = 'y ~ 1 + sigmoid(x3) + x4 + [x1 + x2 ~ z1 + z2 + z3] + np.exp(x5)'
+ fmla = "y ~ 1 + sigmoid(x3) + x4 + [x1 + x2 ~ z1 + z2 + z3] + np.exp(x5)"
mod = model.from_formula(fmla, data)
res = mod.fit()
- exog = [data[['Intercept']], sigmoid(data[['x3']]), data[['x4']],
- np.exp(data[['x5']])]
+ exog = [
+ data[["Intercept"]],
+ sigmoid(data[["x3"]]),
+ data[["x4"]],
+ np.exp(data[["x5"]]),
+ ]
exog = concat(exog, 1)
- endog = data[['x1', 'x2']]
+ endog = data[["x1", "x2"]]
pred = res.predict(exog, endog)
pred2 = res.predict(data=data)
assert_frame_equal(pred, pred2)
@@ -240,8 +253,8 @@ def test_predict_formula_error(data, model_and_func, formula):
model, func = model_and_func
mod = model.from_formula(formula, data)
res = mod.fit()
- exog = data[['Intercept', 'x3', 'x4', 'x5']]
- endog = data[['x1', 'x2']]
+ exog = data[["Intercept", "x3", "x4", "x5"]]
+ endog = data[["x1", "x2"]]
with pytest.raises(ValueError):
res.predict(exog, endog, data=data)
with pytest.raises(ValueError):
@@ -251,21 +264,21 @@ def test_predict_formula_error(data, model_and_func, formula):
def test_single_character_names(data, model_and_func):
# GH 149
data = data.copy()
- data['x'] = data['x1']
- data['v'] = data['x2']
- data['z'] = data['z1']
- data['a'] = data['z2']
- fmla = 'y ~ 1 + [x ~ z]'
+ data["x"] = data["x1"]
+ data["v"] = data["x2"]
+ data["z"] = data["z1"]
+ data["a"] = data["z2"]
+ fmla = "y ~ 1 + [x ~ z]"
model, func = model_and_func
mod = model.from_formula(fmla, data)
mod.fit()
- fmla = 'y ~ 1 + [x ~ z + a]'
+ fmla = "y ~ 1 + [x ~ z + a]"
model, func = model_and_func
mod = model.from_formula(fmla, data)
mod.fit()
- fmla = 'y ~ 1 + [x + v ~ z + a]'
+ fmla = "y ~ 1 + [x + v ~ z + a]"
model, func = model_and_func
mod = model.from_formula(fmla, data)
mod.fit()
@@ -274,7 +287,7 @@ def test_single_character_names(data, model_and_func):
def test_ols_formula(data):
# GH 185
data = data.copy()
- fmla = 'y ~ 1 + x1'
+ fmla = "y ~ 1 + x1"
mod = IV2SLS.from_formula(fmla, data)
res = mod.fit()
- assert 'OLS Estimation Summary' in str(res)
+ assert "OLS Estimation Summary" in str(res)
diff --git a/linearmodels/tests/iv/test_gmm.py b/linearmodels/tests/iv/test_gmm.py
index 82b81c53f2..18e82b21f0 100644
--- a/linearmodels/tests/iv/test_gmm.py
+++ b/linearmodels/tests/iv/test_gmm.py
@@ -13,28 +13,27 @@
from linearmodels.utility import AttrDict
-@pytest.fixture(params=[None, 12], scope='module')
+@pytest.fixture(params=[None, 12], scope="module")
def bandwidth(request):
return request.param
-@pytest.fixture(params=['bartlett', 'qs', 'parzen'], scope='module')
+@pytest.fixture(params=["bartlett", "qs", "parzen"], scope="module")
def kernel(request):
kernel_name = request.param
- if kernel_name == 'bartlett':
+ if kernel_name == "bartlett":
weight_func = kernel_weight_bartlett
- alt_names = ['newey-west']
- elif kernel_name == 'parzen':
+ alt_names = ["newey-west"]
+ elif kernel_name == "parzen":
weight_func = kernel_weight_parzen
- alt_names = ['gallant']
+ alt_names = ["gallant"]
else:
weight_func = kernel_weight_quadratic_spectral
- alt_names = ['quadratic-spectral', 'andrews']
- return AttrDict(kernel=kernel_name, alt_names=alt_names,
- weight=weight_func)
+ alt_names = ["quadratic-spectral", "andrews"]
+ return AttrDict(kernel=kernel_name, alt_names=alt_names, weight=weight_func)
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
def data():
return generate_data()
@@ -68,8 +67,8 @@ def test_config(self, data):
weight = wm.weight_matrix(data.x, z, e)
s2 = (e - e.mean()).T @ (e - e.mean()) / nobs
assert_allclose(weight, s2 * z.T @ z / nobs)
- assert wm.config['center'] is False
- assert wm.config['debiased'] is False
+ assert wm.config["center"] is False
+ assert wm.config["debiased"] is False
class TestHeteroskedasticWeight(object):
@@ -96,14 +95,13 @@ def test_config(self, data):
ze = z * e
assert_allclose(weight, ze.T @ ze / nobs)
- assert wm.config['center'] is False
- assert wm.config['debiased'] is False
+ assert wm.config["center"] is False
+ assert wm.config["debiased"] is False
class TestKernelWeight(object):
def test_center(self, data, kernel, bandwidth):
- wm = KernelWeightMatrix(kernel.kernel, bandwidth, center=True,
- optimal_bw=True)
+ wm = KernelWeightMatrix(kernel.kernel, bandwidth, center=True, optimal_bw=True)
weight = wm.weight_matrix(data.x, data.z, data.e)
z, e, nobs = data.z, data.e, data.nobs
bw = bandwidth or wm.bandwidth
@@ -115,15 +113,17 @@ def test_center(self, data, kernel, bandwidth):
op = ze[i:].T @ ze[:-i]
s += w[i] * (op + op.T)
assert_allclose(weight, s / nobs)
- assert wm.config['bandwidth'] == bw
- assert wm.config['kernel'] == kernel.kernel
+ assert wm.config["bandwidth"] == bw
+ assert wm.config["kernel"] == kernel.kernel
for name in kernel.alt_names:
wm = KernelWeightMatrix(name, bandwidth, center=True, optimal_bw=True)
weight2 = wm.weight_matrix(data.x, data.z, data.e)
assert_equal(weight, weight2)
def test_debiased(self, kernel, data, bandwidth):
- wm = KernelWeightMatrix(debiased=True, kernel=kernel.kernel, bandwidth=bandwidth)
+ wm = KernelWeightMatrix(
+ debiased=True, kernel=kernel.kernel, bandwidth=bandwidth
+ )
weight = wm.weight_matrix(data.x, data.z, data.e)
z, e, nobs, nvar = data.z, data.e, data.nobs, data.nvar
bw = bandwidth or wm.bandwidth
@@ -134,8 +134,8 @@ def test_debiased(self, kernel, data, bandwidth):
op = ze[i:].T @ ze[:-i]
s += w[i] * (op + op.T)
assert_allclose(weight, s / (nobs - nvar))
- assert wm.config['bandwidth'] == bw
- assert wm.config['kernel'] == kernel.kernel
+ assert wm.config["bandwidth"] == bw
+ assert wm.config["kernel"] == kernel.kernel
def test_config(self, data, kernel, bandwidth):
wm = KernelWeightMatrix(kernel=kernel.kernel, bandwidth=bandwidth)
@@ -149,10 +149,10 @@ def test_config(self, data, kernel, bandwidth):
op = ze[i:].T @ ze[:-i]
s += w[i] * (op + op.T)
assert_allclose(weight, s / nobs)
- assert wm.config['center'] is False
- assert wm.config['debiased'] is False
- assert wm.config['bandwidth'] == bw
- assert wm.config['kernel'] == kernel.kernel
+ assert wm.config["center"] is False
+ assert wm.config["debiased"] is False
+ assert wm.config["bandwidth"] == bw
+ assert wm.config["kernel"] == kernel.kernel
for name in kernel.alt_names:
wm = KernelWeightMatrix(kernel=name, bandwidth=bandwidth)
@@ -191,9 +191,9 @@ def test_debiased(self, data):
def test_config(self, data):
wm = OneWayClusteredWeightMatrix(data.clusters)
- assert wm.config['center'] is False
- assert wm.config['debiased'] is False
- assert_equal(wm.config['clusters'], data.clusters)
+ assert wm.config["center"] is False
+ assert wm.config["debiased"] is False
+ assert_equal(wm.config["clusters"], data.clusters)
def test_errors(self, data):
wm = OneWayClusteredWeightMatrix(data.clusters[:10])
@@ -203,57 +203,73 @@ def test_errors(self, data):
class TestGMMCovariance(object):
def test_homoskedastic(self, data):
- c = IVGMMCovariance(data.x, data.y, data.z, data.params, data.i, 'unadjusted')
+ c = IVGMMCovariance(data.x, data.y, data.z, data.params, data.i, "unadjusted")
s = HomoskedasticWeightMatrix().weight_matrix(data.x, data.z, data.e)
x, z = data.x, data.z
xzwswzx = x.T @ z @ s @ z.T @ x / data.nobs
cov = data.xzizx_inv @ xzwswzx @ data.xzizx_inv
cov = (cov + cov.T) / 2
assert_allclose(c.cov, cov)
- assert c.config['debiased'] is False
+ assert c.config["debiased"] is False
def test_heteroskedastic(self, data):
- c = IVGMMCovariance(data.x, data.y, data.z, data.params, data.i, 'robust')
+ c = IVGMMCovariance(data.x, data.y, data.z, data.params, data.i, "robust")
s = HeteroskedasticWeightMatrix().weight_matrix(data.x, data.z, data.e)
x, z = data.x, data.z
xzwswzx = x.T @ z @ s @ z.T @ x / data.nobs
cov = data.xzizx_inv @ xzwswzx @ data.xzizx_inv
cov = (cov + cov.T) / 2
assert_allclose(c.cov, cov)
- assert c.config['debiased'] is False
+ assert c.config["debiased"] is False
def test_clustered(self, data):
- c = IVGMMCovariance(data.x, data.y, data.z, data.params, data.i, 'clustered',
- clusters=data.clusters)
- s = OneWayClusteredWeightMatrix(clusters=data.clusters).weight_matrix(data.x, data.z,
- data.e)
+ c = IVGMMCovariance(
+ data.x,
+ data.y,
+ data.z,
+ data.params,
+ data.i,
+ "clustered",
+ clusters=data.clusters,
+ )
+ s = OneWayClusteredWeightMatrix(clusters=data.clusters).weight_matrix(
+ data.x, data.z, data.e
+ )
x, z = data.x, data.z
xzwswzx = x.T @ z @ s @ z.T @ x / data.nobs
cov = data.xzizx_inv @ xzwswzx @ data.xzizx_inv
cov = (cov + cov.T) / 2
assert_allclose(c.cov, cov)
- assert c.config['debiased'] is False
- assert_equal(c.config['clusters'], data.clusters)
- c = IVGMMCovariance(data.x, data.y, data.z, data.params, data.i, 'clustered')
- assert 'Clustered' in str(c)
+ assert c.config["debiased"] is False
+ assert_equal(c.config["clusters"], data.clusters)
+ c = IVGMMCovariance(data.x, data.y, data.z, data.params, data.i, "clustered")
+ assert "Clustered" in str(c)
def test_kernel(self, data, kernel, bandwidth):
- c = IVGMMCovariance(data.x, data.y, data.z, data.params, data.i, 'kernel',
- kernel=kernel.kernel, bandwidth=bandwidth)
- s = KernelWeightMatrix(kernel=kernel.kernel, bandwidth=bandwidth).weight_matrix(data.x,
- data.z,
- data.e)
+ c = IVGMMCovariance(
+ data.x,
+ data.y,
+ data.z,
+ data.params,
+ data.i,
+ "kernel",
+ kernel=kernel.kernel,
+ bandwidth=bandwidth,
+ )
+ s = KernelWeightMatrix(kernel=kernel.kernel, bandwidth=bandwidth).weight_matrix(
+ data.x, data.z, data.e
+ )
x, z, nobs = data.x, data.z, data.nobs
xzwswzx = x.T @ z @ s @ z.T @ x / data.nobs
cov = data.xzizx_inv @ xzwswzx @ data.xzizx_inv
cov = (cov + cov.T) / 2
assert_allclose(c.cov, cov)
- assert c.config['kernel'] == kernel.kernel
- assert c.config['debiased'] is False
- assert c.config['bandwidth'] == bandwidth or nobs - 2
- c = IVGMMCovariance(data.x, data.y, data.z, data.params, data.i, 'kernel')
- assert 'Kernel' in str(c)
+ assert c.config["kernel"] == kernel.kernel
+ assert c.config["debiased"] is False
+ assert c.config["bandwidth"] == bandwidth or nobs - 2
+ c = IVGMMCovariance(data.x, data.y, data.z, data.params, data.i, "kernel")
+ assert "Kernel" in str(c)
def test_unknown(self, data):
with pytest.raises(ValueError):
- IVGMMCovariance(data.x, data.y, data.z, data.params, data.i, 'unknown').cov
+ IVGMMCovariance(data.x, data.y, data.z, data.params, data.i, "unknown").cov
diff --git a/linearmodels/tests/iv/test_missing_data.py b/linearmodels/tests/iv/test_missing_data.py
index e5c4a0043d..b679758372 100644
--- a/linearmodels/tests/iv/test_missing_data.py
+++ b/linearmodels/tests/iv/test_missing_data.py
@@ -6,15 +6,17 @@
from linearmodels.iv import IV2SLS, IVGMM, IVGMMCUE, IVLIML
from linearmodels.utility import AttrDict
-pytestmark = pytest.mark.filterwarnings('ignore::linearmodels.utility.MissingValueWarning')
+pytestmark = pytest.mark.filterwarnings(
+ "ignore::linearmodels.utility.MissingValueWarning"
+)
-@pytest.fixture(scope='module', params=[IV2SLS, IVLIML, IVGMM, IVGMMCUE])
+@pytest.fixture(scope="module", params=[IV2SLS, IVLIML, IVGMM, IVGMMCUE])
def model(request):
return request.param
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
def data():
n, q, k, p = 1000, 2, 5, 3
np.random.seed(12345)
@@ -29,7 +31,7 @@ def data():
v = np.random.multivariate_normal(np.zeros(r.shape[0]), r, n)
v.flat[::93] = np.nan
x = v[:, :k]
- z = v[:, k:k + p]
+ z = v[:, k : k + p]
e = v[:, [-1]]
params = np.arange(1, k + 1) / k
params = params[:, None]
@@ -49,25 +51,32 @@ def data():
endog_clean = x_clean[:, :q]
instr_clean = z_clean
clusters_clean = clusters[not_missing]
- return AttrDict(dep=dep, exog=exog, endog=endog, instr=instr,
- dep_clean=dep_clean, exog_clean=exog_clean,
- endog_clean=endog_clean, instr_clean=instr_clean,
- clusters=clusters, clusters_clean=clusters_clean)
+ return AttrDict(
+ dep=dep,
+ exog=exog,
+ endog=endog,
+ instr=instr,
+ dep_clean=dep_clean,
+ exog_clean=exog_clean,
+ endog_clean=endog_clean,
+ instr_clean=instr_clean,
+ clusters=clusters,
+ clusters_clean=clusters_clean,
+ )
def get_all(v):
- attr = [d for d in dir(v) if not d.startswith('_')]
+ attr = [d for d in dir(v) if not d.startswith("_")]
for a in attr:
val = getattr(v, a)
- if a in ('conf_int', 'durbin', 'wu_hausman', 'c_stat'):
+ if a in ("conf_int", "durbin", "wu_hausman", "c_stat"):
val()
def test_missing(data, model):
mod = model(data.dep, data.exog, data.endog, data.instr)
res = mod.fit()
- mod = model(data.dep_clean, data.exog_clean,
- data.endog_clean, data.instr_clean)
+ mod = model(data.dep_clean, data.exog_clean, data.endog_clean, data.instr_clean)
res2 = mod.fit()
assert res.nobs == res2.nobs
assert_series_equal(res.params, res2.params)
@@ -77,11 +86,10 @@ def test_missing(data, model):
def test_missing_clustered(data):
mod = IV2SLS(data.dep, data.exog, data.endog, data.instr)
with pytest.raises(ValueError):
- mod.fit(cov_type='clustered', clusters=data.clusters)
- res = mod.fit(cov_type='clustered', clusters=data.clusters_clean)
- mod = IV2SLS(data.dep_clean, data.exog_clean,
- data.endog_clean, data.instr_clean)
- res2 = mod.fit(cov_type='clustered', clusters=data.clusters_clean)
+ mod.fit(cov_type="clustered", clusters=data.clusters)
+ res = mod.fit(cov_type="clustered", clusters=data.clusters_clean)
+ mod = IV2SLS(data.dep_clean, data.exog_clean, data.endog_clean, data.instr_clean)
+ res2 = mod.fit(cov_type="clustered", clusters=data.clusters_clean)
assert res.nobs == res2.nobs
assert_series_equal(res.params, res2.params)
get_all(res)
diff --git a/linearmodels/tests/iv/test_model.py b/linearmodels/tests/iv/test_model.py
index 1e98dcbf32..4ed0114b7f 100644
--- a/linearmodels/tests/iv/test_model.py
+++ b/linearmodels/tests/iv/test_model.py
@@ -16,7 +16,7 @@
from linearmodels.utility import AttrDict
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
def data():
n, q, k, p = 1000, 2, 5, 3
np.random.seed(12345)
@@ -30,7 +30,7 @@ def data():
r += np.eye(9) * 0.5
v = np.random.multivariate_normal(np.zeros(r.shape[0]), r, n)
x = v[:, :k]
- z = v[:, k:k + p]
+ z = v[:, k : k + p]
e = v[:, [-1]]
params = np.arange(1, k + 1) / k
params = params[:, None]
@@ -43,18 +43,34 @@ def data():
vinv = np.linalg.inv(v)
kappa = 0.99
vk = (x.T @ x * (1 - kappa) + kappa * xhat.T @ xhat) / nobs
- return AttrDict(nobs=nobs, e=e, x=x, y=y, z=z, xhat=xhat,
- params=params, s2=s2, s2_debiased=s2_debiased,
- clusters=clusters, nvar=nvar, v=v, vinv=vinv, vk=vk,
- kappa=kappa, dep=y, exog=x[:, q:], endog=x[:, :q],
- instr=z)
+ return AttrDict(
+ nobs=nobs,
+ e=e,
+ x=x,
+ y=y,
+ z=z,
+ xhat=xhat,
+ params=params,
+ s2=s2,
+ s2_debiased=s2_debiased,
+ clusters=clusters,
+ nvar=nvar,
+ v=v,
+ vinv=vinv,
+ vk=vk,
+ kappa=kappa,
+ dep=y,
+ exog=x[:, q:],
+ endog=x[:, :q],
+ instr=z,
+ )
def get_all(v):
- attr = [d for d in dir(v) if not d.startswith('_')]
+ attr = [d for d in dir(v) if not d.startswith("_")]
for a in attr:
val = getattr(v, a)
- if a in ('conf_int', 'durbin', 'wu_hausman', 'c_stat'):
+ if a in ("conf_int", "durbin", "wu_hausman", "c_stat"):
val()
@@ -102,12 +118,14 @@ def test_kappa_fuller_warning(self, data):
def test_string_cat(self, data):
instr = data.instr.copy()
n = data.instr.shape[0]
- cat = pd.Series(['a'] * (n // 2) + ['b'] * (n // 2))
+ cat = pd.Series(["a"] * (n // 2) + ["b"] * (n // 2))
instr = pd.DataFrame(instr)
- instr['cat'] = cat
- res = IV2SLS(data.dep, data.exog, data.endog, instr).fit(cov_type='unadjusted')
- instr['cat'] = cat.astype('category')
- res_cat = IV2SLS(data.dep, data.exog, data.endog, instr).fit(cov_type='unadjusted')
+ instr["cat"] = cat
+ res = IV2SLS(data.dep, data.exog, data.endog, instr).fit(cov_type="unadjusted")
+ instr["cat"] = cat.astype("category")
+ res_cat = IV2SLS(data.dep, data.exog, data.endog, instr).fit(
+ cov_type="unadjusted"
+ )
assert_series_equal(res.params, res_cat.params)
def test_no_regressors(self, data):
@@ -260,6 +278,7 @@ def test_model_summary_smoke(data):
def test_model_missing(data):
import copy
+
data2 = AttrDict()
for key in data:
data2[key] = copy.deepcopy(data[key])
@@ -290,24 +309,20 @@ def test_compare(data):
c = compare([res1, res2, res3, res4])
assert len(c.rsquared) == 4
c.summary
- c = compare({'Model A': res1,
- 'Model B': res2,
- 'Model C': res3,
- 'Model D': res4})
+ c = compare({"Model A": res1, "Model B": res2, "Model C": res3, "Model D": res4})
c.summary
res = OrderedDict()
- res['Model A'] = res1
- res['Model B'] = res2
- res['Model C'] = res3
- res['Model D'] = res4
+ res["Model A"] = res1
+ res["Model B"] = res2
+ res["Model C"] = res3
+ res["Model D"] = res4
c = compare(res)
c.summary
c.pvalues
res1 = IV2SLS(data.dep, data.exog[:, :1], None, None).fit()
res2 = IV2SLS(data.dep, data.exog[:, :2], None, None).fit()
- c = compare({'Model A': res1,
- 'Model B': res2})
+ c = compare({"Model A": res1, "Model B": res2})
c.summary
@@ -316,10 +331,10 @@ def test_compare_single(data):
c = compare([res1])
assert len(c.rsquared) == 1
c.summary
- c = compare({'Model A': res1})
+ c = compare({"Model A": res1})
c.summary
res = OrderedDict()
- res['Model A'] = res1
+ res["Model A"] = res1
c = compare(res)
c.summary
c.pvalues
@@ -339,18 +354,18 @@ def test_first_stage_summary(data):
def test_gmm_str(data):
mod = IVGMM(data.dep, data.exog, data.endog, data.instr)
- str(mod.fit(cov_type='unadjusted'))
- str(mod.fit(cov_type='robust'))
- str(mod.fit(cov_type='clustered', clusters=data.clusters))
- str(mod.fit(cov_type='kernel'))
+ str(mod.fit(cov_type="unadjusted"))
+ str(mod.fit(cov_type="robust"))
+ str(mod.fit(cov_type="clustered", clusters=data.clusters))
+ str(mod.fit(cov_type="kernel"))
def test_gmm_cue_optimization_options(data):
mod = IVGMMCUE(data.dep, data.exog, data.endog, data.instr)
res_none = mod.fit(display=False)
- opt_options = dict(method='BFGS', options={'disp': False})
+ opt_options = dict(method="BFGS", options={"disp": False})
res_bfgs = mod.fit(display=False, opt_options=opt_options)
- opt_options = dict(method='L-BFGS-B', options={'disp': False})
+ opt_options = dict(method="L-BFGS-B", options={"disp": False})
res_lbfgsb = mod.fit(display=False, opt_options=opt_options)
assert res_none.iterations > 2
assert res_bfgs.iterations > 2
diff --git a/linearmodels/tests/iv/test_postestimation.py b/linearmodels/tests/iv/test_postestimation.py
index 8ad58f4f43..a61e859264 100644
--- a/linearmodels/tests/iv/test_postestimation.py
+++ b/linearmodels/tests/iv/test_postestimation.py
@@ -11,28 +11,32 @@
CWD = os.path.split(os.path.abspath(__file__))[0]
-HOUSING_DATA = pd.read_csv(os.path.join(CWD, 'results', 'housing.csv'), index_col=0)
-HOUSING_DATA.region = HOUSING_DATA.region.astype('category')
-HOUSING_DATA.state = HOUSING_DATA.state.astype('category')
-HOUSING_DATA.division = HOUSING_DATA.division.astype('category')
+HOUSING_DATA = pd.read_csv(os.path.join(CWD, "results", "housing.csv"), index_col=0)
+HOUSING_DATA.region = HOUSING_DATA.region.astype("category")
+HOUSING_DATA.state = HOUSING_DATA.state.astype("category")
+HOUSING_DATA.division = HOUSING_DATA.division.astype("category")
-SIMULATED_DATA = pd.read_stata(os.path.join(CWD, 'results', 'simulated-data.dta'))
+SIMULATED_DATA = pd.read_stata(os.path.join(CWD, "results", "simulated-data.dta"))
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
def data():
- return AttrDict(dep=SIMULATED_DATA.y_robust,
- exog=add_constant(SIMULATED_DATA[['x3', 'x4', 'x5']]),
- endog=SIMULATED_DATA[['x1', 'x2']],
- instr=SIMULATED_DATA[['z1', 'z2']])
+ return AttrDict(
+ dep=SIMULATED_DATA.y_robust,
+ exog=add_constant(SIMULATED_DATA[["x3", "x4", "x5"]]),
+ endog=SIMULATED_DATA[["x1", "x2"]],
+ instr=SIMULATED_DATA[["z1", "z2"]],
+ )
def test_sargan(data):
# Stata code:
# ivregress 2sls y_robust x3 x4 x5 (x1=z1 z2)
# estat overid
- res = IV2SLS(data.dep, data.exog, data.endog[['x1']], data.instr).fit(cov_type='unadjusted')
- assert_allclose(res.sargan.stat, .176535, rtol=1e-4)
+ res = IV2SLS(data.dep, data.exog, data.endog[["x1"]], data.instr).fit(
+ cov_type="unadjusted"
+ )
+ assert_allclose(res.sargan.stat, 0.176535, rtol=1e-4)
assert_allclose(res.sargan.pval, 0.6744, rtol=1e-4)
@@ -40,75 +44,85 @@ def test_basmann(data):
# Stata code:
# ivregress 2sls y_robust x3 x4 x5 (x1=z1 z2)
# estat overid
- res = IV2SLS(data.dep, data.exog, data.endog[['x1']], data.instr).fit(cov_type='unadjusted')
- assert_allclose(res.basmann.stat, .174822, rtol=1e-4)
+ res = IV2SLS(data.dep, data.exog, data.endog[["x1"]], data.instr).fit(
+ cov_type="unadjusted"
+ )
+ assert_allclose(res.basmann.stat, 0.174822, rtol=1e-4)
assert_allclose(res.basmann.pval, 0.6759, rtol=1e-3)
def test_durbin(data):
- res = IV2SLS(data.dep, data.exog, data.endog, data.instr).fit(cov_type='unadjusted')
+ res = IV2SLS(data.dep, data.exog, data.endog, data.instr).fit(cov_type="unadjusted")
assert_allclose(res.durbin().stat, 35.1258, rtol=1e-4)
assert_allclose(res.durbin().pval, 0.0000, atol=1e-6)
- assert_allclose(res.durbin('x1').stat, .156341, rtol=1e-4)
- assert_allclose(res.durbin('x1').pval, 0.6925, rtol=1e-3)
+ assert_allclose(res.durbin("x1").stat, 0.156341, rtol=1e-4)
+ assert_allclose(res.durbin("x1").pval, 0.6925, rtol=1e-3)
def test_wu_hausman(data):
- res = IV2SLS(data.dep, data.exog, data.endog, data.instr).fit(cov_type='unadjusted')
+ res = IV2SLS(data.dep, data.exog, data.endog, data.instr).fit(cov_type="unadjusted")
assert_allclose(res.wu_hausman().stat, 18.4063, rtol=1e-4)
assert_allclose(res.wu_hausman().pval, 0.0000, atol=1e-6)
- assert_allclose(res.wu_hausman('x1').stat, .154557, rtol=1e-4)
- assert_allclose(res.wu_hausman('x1').pval, 0.6944, rtol=1e-3)
+ assert_allclose(res.wu_hausman("x1").stat, 0.154557, rtol=1e-4)
+ assert_allclose(res.wu_hausman("x1").pval, 0.6944, rtol=1e-3)
def test_wooldridge_score(data):
- res = IV2SLS(data.dep, data.exog, data.endog[['x1', 'x2']], data.instr).fit(cov_type='robust')
+ res = IV2SLS(data.dep, data.exog, data.endog[["x1", "x2"]], data.instr).fit(
+ cov_type="robust"
+ )
assert_allclose(res.wooldridge_score.stat, 22.684, rtol=1e-4)
assert_allclose(res.wooldridge_score.pval, 0.0000, atol=1e-4)
def test_wooldridge_regression(data):
- mod = IV2SLS(data.dep, data.exog, data.endog[['x1', 'x2']], data.instr)
- res = mod.fit(cov_type='robust', debiased=True)
+ mod = IV2SLS(data.dep, data.exog, data.endog[["x1", "x2"]], data.instr)
+ res = mod.fit(cov_type="robust", debiased=True)
# Scale to correct for F vs Wald treatment
assert_allclose(res.wooldridge_regression.stat, 2 * 13.3461, rtol=1e-4)
assert_allclose(res.wooldridge_regression.pval, 0.0000, atol=1e-4)
def test_wooldridge_overid(data):
- res = IV2SLS(data.dep, data.exog, data.endog[['x1']], data.instr).fit(cov_type='robust')
+ res = IV2SLS(data.dep, data.exog, data.endog[["x1"]], data.instr).fit(
+ cov_type="robust"
+ )
assert_allclose(res.wooldridge_overid.stat, 0.221648, rtol=1e-4)
assert_allclose(res.wooldridge_overid.pval, 0.6378, rtol=1e-3)
def test_anderson_rubin(data):
- res = IV2SLS(data.dep, data.exog, data.endog[['x1']], data.instr).fit(cov_type='unadjusted')
- assert_allclose(res.nobs * (res._liml_kappa - 1), .176587, rtol=1e-4)
+ res = IV2SLS(data.dep, data.exog, data.endog[["x1"]], data.instr).fit(
+ cov_type="unadjusted"
+ )
+ assert_allclose(res.nobs * (res._liml_kappa - 1), 0.176587, rtol=1e-4)
def test_basmann_f(data):
- res = IV2SLS(data.dep, data.exog, data.endog[['x1']], data.instr).fit(cov_type='unadjusted')
- assert_allclose(res.basmann_f.stat, .174821, rtol=1e-4)
+ res = IV2SLS(data.dep, data.exog, data.endog[["x1"]], data.instr).fit(
+ cov_type="unadjusted"
+ )
+ assert_allclose(res.basmann_f.stat, 0.174821, rtol=1e-4)
assert_allclose(res.basmann_f.pval, 0.6760, rtol=1e-3)
def test_c_stat_smoke(data):
- res = IVGMM(data.dep, data.exog, data.endog, data.instr).fit(cov_type='robust')
+ res = IVGMM(data.dep, data.exog, data.endog, data.instr).fit(cov_type="robust")
c_stat = res.c_stat()
assert_allclose(c_stat.stat, 22.684, rtol=1e-4)
assert_allclose(c_stat.pval, 0.00, atol=1e-3)
- c_stat = res.c_stat(['x1'])
- assert_allclose(c_stat.stat, .158525, rtol=1e-3)
+ c_stat = res.c_stat(["x1"])
+ assert_allclose(c_stat.stat, 0.158525, rtol=1e-3)
assert_allclose(c_stat.pval, 0.6905, rtol=1e-3)
# Final test
- c_stat2 = res.c_stat('x1')
+ c_stat2 = res.c_stat("x1")
assert_allclose(c_stat.stat, c_stat2.stat)
def test_linear_restriction(data):
- res = IV2SLS(data.dep, data.exog, data.endog, data.instr).fit(cov_type='robust')
+ res = IV2SLS(data.dep, data.exog, data.endog, data.instr).fit(cov_type="robust")
nvar = len(res.params)
q = np.eye(nvar)
ts = res.wald_test(q, np.zeros(nvar))
@@ -118,6 +132,6 @@ def test_linear_restriction(data):
assert_allclose(stat, ts.stat)
assert ts.df == nvar
- formula = ' = '.join(res.params.index) + ' = 0'
+ formula = " = ".join(res.params.index) + " = 0"
ts2 = res.wald_test(formula=formula)
assert_allclose(ts.stat, ts2.stat)
diff --git a/linearmodels/tests/iv/test_results.py b/linearmodels/tests/iv/test_results.py
index 05af3ac34e..750caf9d5c 100644
--- a/linearmodels/tests/iv/test_results.py
+++ b/linearmodels/tests/iv/test_results.py
@@ -10,21 +10,21 @@
from linearmodels.tests.panel._utility import assert_frame_similar
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
def data():
return generate_data()
-@pytest.fixture(scope='module', params=[IV2SLS, IVLIML, IVGMM, IVGMMCUE])
+@pytest.fixture(scope="module", params=[IV2SLS, IVLIML, IVGMM, IVGMMCUE])
def model(request):
return request.param
def result_checker(res):
for attr in dir(res):
- if attr.startswith('_') or attr in ('test_linear_constraint', 'wald_test'):
+ if attr.startswith("_") or attr in ("test_linear_constraint", "wald_test"):
continue
- if attr == 'first_stage':
+ if attr == "first_stage":
result_checker(getattr(res, attr))
attr = getattr(res, attr)
if callable(attr):
@@ -36,28 +36,28 @@ def result_checker(res):
def test_results(data, model):
mod = model(data.dep, data.exog, data.endog, data.instr)
- result_checker(mod.fit(cov_type='unadjusted'))
- result_checker(mod.fit(cov_type='robust'))
- result_checker(mod.fit(cov_type='kernel'))
- result_checker(mod.fit(cov_type='clustered', clusters=data.clusters))
+ result_checker(mod.fit(cov_type="unadjusted"))
+ result_checker(mod.fit(cov_type="robust"))
+ result_checker(mod.fit(cov_type="kernel"))
+ result_checker(mod.fit(cov_type="clustered", clusters=data.clusters))
result_checker(model(data.dep, data.exog, None, None).fit())
def test_results_single(data, model):
mod = model(data.dep, data.exog[:, 0], data.endog[:, 0], data.instr[:, 0])
- result_checker(mod.fit(cov_type='unadjusted'))
- result_checker(mod.fit(cov_type='robust'))
- result_checker(mod.fit(cov_type='kernel'))
- result_checker(mod.fit(cov_type='clustered', clusters=data.clusters))
+ result_checker(mod.fit(cov_type="unadjusted"))
+ result_checker(mod.fit(cov_type="robust"))
+ result_checker(mod.fit(cov_type="kernel"))
+ result_checker(mod.fit(cov_type="clustered", clusters=data.clusters))
def test_results_no_exog(data, model):
mod = model(data.dep, None, data.endog[:, 0], data.instr[:, 0])
- result_checker(mod.fit(cov_type='unadjusted'))
- result_checker(mod.fit(cov_type='robust'))
- result_checker(mod.fit(cov_type='kernel'))
- result_checker(mod.fit(cov_type='clustered', clusters=data.clusters))
+ result_checker(mod.fit(cov_type="unadjusted"))
+ result_checker(mod.fit(cov_type="robust"))
+ result_checker(mod.fit(cov_type="kernel"))
+ result_checker(mod.fit(cov_type="clustered", clusters=data.clusters))
def test_fitted_predict(data, model):
@@ -66,7 +66,7 @@ def test_fitted_predict(data, model):
assert_series_equal(res.idiosyncratic, res.resids)
y = mod.dependent.pandas
expected = asarray(y) - asarray(res.resids)[:, None]
- expected = DataFrame(expected, y.index, ['fitted_values'])
+ expected = DataFrame(expected, y.index, ["fitted_values"])
assert_frame_similar(expected, res.fitted_values)
assert_allclose(expected, res.fitted_values)
pred = res.predict()
@@ -76,7 +76,7 @@ def test_fitted_predict(data, model):
pred = res.predict(idiosyncratic=True, missing=True)
nobs = IVData(data.dep).pandas.shape[0]
assert pred.shape == (nobs, 2)
- assert list(pred.columns) == ['fitted_values', 'residual']
+ assert list(pred.columns) == ["fitted_values", "residual"]
def test_fitted_predict_exception(data, model):
diff --git a/linearmodels/tests/panel/_utility.py b/linearmodels/tests/panel/_utility.py
index 979c68c83d..77601d1fdc 100644
--- a/linearmodels/tests/panel/_utility.py
+++ b/linearmodels/tests/panel/_utility.py
@@ -16,13 +16,14 @@
except ImportError:
MISSING_XARRAY = True
-datatypes = ['numpy', 'pandas']
+datatypes = ["numpy", "pandas"]
if not MISSING_XARRAY:
- datatypes += ['xarray']
+ datatypes += ["xarray"]
-def lsdv(y: DataFrame, x: DataFrame, has_const=False, entity=False, time=False,
- general=None):
+def lsdv(
+ y: DataFrame, x: DataFrame, has_const=False, entity=False, time=False, general=None
+):
nvar = x.shape[1]
temp = x.reset_index()
cat_index = temp.index
@@ -30,23 +31,29 @@ def lsdv(y: DataFrame, x: DataFrame, has_const=False, entity=False, time=False,
cat = Categorical(temp.iloc[:, 0])
cat.index = cat_index
dummies = get_dummies(cat, drop_first=has_const)
- x = DataFrame(np.c_[x.values, dummies.values.astype(np.float64)],
- index=x.index,
- columns=list(x.columns) + list(dummies.columns))
+ x = DataFrame(
+ np.c_[x.values, dummies.values.astype(np.float64)],
+ index=x.index,
+ columns=list(x.columns) + list(dummies.columns),
+ )
if time:
cat = Categorical(temp.iloc[:, 1])
cat.index = cat_index
dummies = get_dummies(cat, drop_first=(has_const or entity))
- x = DataFrame(np.c_[x.values, dummies.values.astype(np.float64)],
- index=x.index,
- columns=list(x.columns) + list(dummies.columns))
+ x = DataFrame(
+ np.c_[x.values, dummies.values.astype(np.float64)],
+ index=x.index,
+ columns=list(x.columns) + list(dummies.columns),
+ )
if general is not None:
cat = Categorical(general)
cat.index = cat_index
dummies = get_dummies(cat, drop_first=(has_const or entity or time))
- x = DataFrame(np.c_[x.values, dummies.values.astype(np.float64)],
- index=x.index,
- columns=list(x.columns) + list(dummies.columns))
+ x = DataFrame(
+ np.c_[x.values, dummies.values.astype(np.float64)],
+ index=x.index,
+ columns=list(x.columns) + list(dummies.columns),
+ )
w = np.ones_like(y)
wy = w * y.values
@@ -57,8 +64,15 @@ def lsdv(y: DataFrame, x: DataFrame, has_const=False, entity=False, time=False,
return params[:nvar]
-def generate_data(missing, datatype, const=False, ntk=(971, 7, 5), other_effects=0, rng=None,
- num_cats=4):
+def generate_data(
+ missing,
+ datatype,
+ const=False,
+ ntk=(971, 7, 5),
+ other_effects=0,
+ rng=None,
+ num_cats=4,
+):
if rng is None:
np.random.seed(12345)
else:
@@ -72,9 +86,9 @@ def generate_data(missing, datatype, const=False, ntk=(971, 7, 5), other_effects
w = np.random.chisquare(5, (t, n)) / 5
c = None
if other_effects == 1:
- cats = ['Industries']
+ cats = ["Industries"]
else:
- cats = ['cat.' + str(i) for i in range(other_effects)]
+ cats = ["cat." + str(i) for i in range(other_effects)]
if other_effects:
if not isinstance(num_cats, list):
num_cats = [num_cats] * other_effects
@@ -84,7 +98,7 @@ def generate_data(missing, datatype, const=False, ntk=(971, 7, 5), other_effects
c.append(np.random.randint(0, nc, (1, t, n)))
c = np.concatenate(c, 0)
- vcats = ['varcat.' + str(i) for i in range(2)]
+ vcats = ["varcat." + str(i) for i in range(2)]
vc2 = np.ones((2, t, 1)) @ np.random.randint(0, n // 2, (2, 1, n))
vc1 = vc2[[0]]
@@ -97,51 +111,70 @@ def generate_data(missing, datatype, const=False, ntk=(971, 7, 5), other_effects
locs = np.random.choice(n * t * k, int(n * t * k * missing))
x.flat[locs] = np.nan
- if datatype in ('pandas', 'xarray'):
- entities = ['firm' + str(i) for i in range(n)]
- time = date_range('1-1-1900', periods=t, freq='A-DEC')
- var_names = ['x' + str(i) for i in range(k)]
+ if datatype in ("pandas", "xarray"):
+ entities = ["firm" + str(i) for i in range(n)]
+ time = date_range("1-1-1900", periods=t, freq="A-DEC")
+ var_names = ["x" + str(i) for i in range(k)]
# y = DataFrame(y, index=time, columns=entities)
- y = panel_to_frame(y[None], items=['y'], major_axis=time, minor_axis=entities, swap=True)
- w = panel_to_frame(w[None], items=['w'], major_axis=time, minor_axis=entities, swap=True)
+ y = panel_to_frame(
+ y[None], items=["y"], major_axis=time, minor_axis=entities, swap=True
+ )
+ w = panel_to_frame(
+ w[None], items=["w"], major_axis=time, minor_axis=entities, swap=True
+ )
w = w.reindex(y.index)
- x = panel_to_frame(x, items=var_names, major_axis=time, minor_axis=entities, swap=True)
+ x = panel_to_frame(
+ x, items=var_names, major_axis=time, minor_axis=entities, swap=True
+ )
x = x.reindex(y.index)
- c = panel_to_frame(c, items=cats, major_axis=time, minor_axis=entities, swap=True)
+ c = panel_to_frame(
+ c, items=cats, major_axis=time, minor_axis=entities, swap=True
+ )
c = c.reindex(y.index)
- vc1 = panel_to_frame(vc1, items=vcats[:1], major_axis=time, minor_axis=entities, swap=True)
+ vc1 = panel_to_frame(
+ vc1, items=vcats[:1], major_axis=time, minor_axis=entities, swap=True
+ )
vc1 = vc1.reindex(y.index)
- vc2 = panel_to_frame(vc2, items=vcats, major_axis=time, minor_axis=entities, swap=True)
+ vc2 = panel_to_frame(
+ vc2, items=vcats, major_axis=time, minor_axis=entities, swap=True
+ )
vc2 = vc2.reindex(y.index)
- if datatype == 'xarray':
+ if datatype == "xarray":
# TODO: This is broken now, need to transfor multiindex to xarray 3d
import xarray as xr
- x = xr.DataArray(PanelData(x).values3d,
- coords={'entities': entities, 'time': time,
- 'vars': var_names},
- dims=['vars', 'time', 'entities'])
- y = xr.DataArray(PanelData(y).values3d,
- coords={'entities': entities, 'time': time,
- 'vars': ['y']},
- dims=['vars', 'time', 'entities'])
- w = xr.DataArray(PanelData(w).values3d,
- coords={'entities': entities, 'time': time,
- 'vars': ['w']},
- dims=['vars', 'time', 'entities'])
+
+ x = xr.DataArray(
+ PanelData(x).values3d,
+ coords={"entities": entities, "time": time, "vars": var_names},
+ dims=["vars", "time", "entities"],
+ )
+ y = xr.DataArray(
+ PanelData(y).values3d,
+ coords={"entities": entities, "time": time, "vars": ["y"]},
+ dims=["vars", "time", "entities"],
+ )
+ w = xr.DataArray(
+ PanelData(w).values3d,
+ coords={"entities": entities, "time": time, "vars": ["w"]},
+ dims=["vars", "time", "entities"],
+ )
if c.shape[1] > 0:
- c = xr.DataArray(PanelData(c).values3d,
- coords={'entities': entities, 'time': time,
- 'vars': c.columns},
- dims=['vars', 'time', 'entities'])
- vc1 = xr.DataArray(PanelData(vc1).values3d,
- coords={'entities': entities, 'time': time,
- 'vars': vc1.columns},
- dims=['vars', 'time', 'entities'])
- vc2 = xr.DataArray(PanelData(vc2).values3d,
- coords={'entities': entities, 'time': time,
- 'vars': vc2.columns},
- dims=['vars', 'time', 'entities'])
+ c = xr.DataArray(
+ PanelData(c).values3d,
+ coords={"entities": entities, "time": time, "vars": c.columns},
+ dims=["vars", "time", "entities"],
+ )
+ vc1 = xr.DataArray(
+ PanelData(vc1).values3d,
+ coords={"entities": entities, "time": time, "vars": vc1.columns},
+ dims=["vars", "time", "entities"],
+ )
+ vc2 = xr.DataArray(
+ PanelData(vc2).values3d,
+ coords={"entities": entities, "time": time, "vars": vc2.columns},
+ dims=["vars", "time", "entities"],
+ )
if rng is not None:
rng.set_state(np.random.get_state())
@@ -152,27 +185,34 @@ def generate_data(missing, datatype, const=False, ntk=(971, 7, 5), other_effects
def assert_results_equal(res1, res2, test_fit=True, test_df=True, strict=True):
n = min(res1.params.shape[0], res2.params.shape[0])
- assert_series_equal(res1.params.iloc[:n], res2.params.iloc[:n],
- check_less_precise=not strict)
- assert_series_equal(res1.pvalues.iloc[:n], res2.pvalues.iloc[:n],
- check_less_precise=not strict)
- assert_series_equal(res1.tstats.iloc[:n], res2.tstats.iloc[:n],
- check_less_precise=not strict)
- assert_frame_equal(res1.cov.iloc[:n, :n], res2.cov.iloc[:n, :n],
- check_less_precise=not strict)
- assert_frame_equal(res1.conf_int().iloc[:n], res2.conf_int().iloc[:n],
- check_less_precise=not strict)
+ assert_series_equal(
+ res1.params.iloc[:n], res2.params.iloc[:n], check_less_precise=not strict
+ )
+ assert_series_equal(
+ res1.pvalues.iloc[:n], res2.pvalues.iloc[:n], check_less_precise=not strict
+ )
+ assert_series_equal(
+ res1.tstats.iloc[:n], res2.tstats.iloc[:n], check_less_precise=not strict
+ )
+ assert_frame_equal(
+ res1.cov.iloc[:n, :n], res2.cov.iloc[:n, :n], check_less_precise=not strict
+ )
+ assert_frame_equal(
+ res1.conf_int().iloc[:n],
+ res2.conf_int().iloc[:n],
+ check_less_precise=not strict,
+ )
assert_allclose(res1.s2, res2.s2)
rtol = 1e-7 if strict else 1e-4
delta = 1 + (res1.resids.values - res2.resids.values) / max(
- res1.resids.std(),
- res2.resids.std())
+ res1.resids.std(), res2.resids.std()
+ )
assert_allclose(delta, np.ones_like(delta), rtol=rtol)
delta = 1 + (res1.wresids.values - res2.wresids.values) / max(
- res1.wresids.std(),
- res2.wresids.std())
+ res1.wresids.std(), res2.wresids.std()
+ )
assert_allclose(delta, np.ones_like(delta), rtol=rtol)
if test_df:
@@ -197,7 +237,7 @@ def assert_frame_similar(result, expected):
def access_attributes(result):
d = dir(result)
for key in d:
- if not key.startswith('_') and key not in ('wald_test',):
+ if not key.startswith("_") and key not in ("wald_test",):
val = getattr(result, key)
if callable(val):
val()
diff --git a/linearmodels/tests/panel/results/execute-stata-simulated-data.py b/linearmodels/tests/panel/results/execute-stata-simulated-data.py
index c9074559d5..ab7b140700 100644
--- a/linearmodels/tests/panel/results/execute-stata-simulated-data.py
+++ b/linearmodels/tests/panel/results/execute-stata-simulated-data.py
@@ -3,32 +3,34 @@
from os.path import join
import subprocess
-STATA_PATH = join('C:\\', 'Program Files (x86)', 'Stata13', 'StataMP-64.exe')
+STATA_PATH = join("C:\\", "Program Files (x86)", "Stata13", "StataMP-64.exe")
-dtafile = join(os.getcwd(), 'simulated-panel.dta')
+dtafile = join(os.getcwd(), "simulated-panel.dta")
# Permutations
# estimator -> be, fe, or regress to match pooled
# datasets, (nothing), _light, _heavy
# vce options -> conventional (be, fe, re), robust(re, fe, *regress*), ols(*regress*)
-configs = {'xtreg {vars}, be vce(conventional)': 'between-conventional-',
- 'xtreg {vars}, be wls vce(conventional)': 'between-conventional-wls',
- 'xtreg {vars}, fe vce(conventional)': 'fixed_effect-conventional-',
- 'xtreg {vars}, fe vce(robust)': 'fixed_effect-robust-',
- 'xtreg {vars}, fe vce(cluster firm_id)': 'fixed_effect-cluster-',
- 'xtreg {vars}, re vce(conventional)': 'random_effect-conventional-',
- 'xtreg {vars}, re vce(robust)': 'random_effect-robust-',
- 'xtreg {vars}, re vce(cluster firm_id)': 'random_effect-cluster-',
- 'xtreg {vars} [aweight=w], fe vce(conventional)': 'fixed_effect-conventional-weighted',
- 'xtreg {vars} [aweight=w], fe vce(robust)': 'fixed_effect-robust-weighted',
- 'xtreg {vars} [aweight=w], fe vce(cluster firm_id)': 'fixed_effect-cluster-weighted',
- 'regress {vars}, vce(ols)': 'pooled-conventional-',
- 'regress {vars}, vce(robust)': 'pooled-robust-',
- 'regress {vars}, vce(cluster firm_id)': 'pooled-cluster-',
- 'regress {vars} [aweight=w], vce(ols)': 'pooled-conventional-weighted',
- 'regress {vars} [aweight=w], vce(robust)': 'pooled-robust-weighted',
- 'regress {vars} [aweight=w], vce(cluster firm_id)': 'pooled-cluster-weighted'}
+configs = {
+ "xtreg {vars}, be vce(conventional)": "between-conventional-",
+ "xtreg {vars}, be wls vce(conventional)": "between-conventional-wls",
+ "xtreg {vars}, fe vce(conventional)": "fixed_effect-conventional-",
+ "xtreg {vars}, fe vce(robust)": "fixed_effect-robust-",
+ "xtreg {vars}, fe vce(cluster firm_id)": "fixed_effect-cluster-",
+ "xtreg {vars}, re vce(conventional)": "random_effect-conventional-",
+ "xtreg {vars}, re vce(robust)": "random_effect-robust-",
+ "xtreg {vars}, re vce(cluster firm_id)": "random_effect-cluster-",
+ "xtreg {vars} [aweight=w], fe vce(conventional)": "fixed_effect-conventional-weighted",
+ "xtreg {vars} [aweight=w], fe vce(robust)": "fixed_effect-robust-weighted",
+ "xtreg {vars} [aweight=w], fe vce(cluster firm_id)": "fixed_effect-cluster-weighted",
+ "regress {vars}, vce(ols)": "pooled-conventional-",
+ "regress {vars}, vce(robust)": "pooled-robust-",
+ "regress {vars}, vce(cluster firm_id)": "pooled-cluster-",
+ "regress {vars} [aweight=w], vce(ols)": "pooled-conventional-weighted",
+ "regress {vars} [aweight=w], vce(robust)": "pooled-robust-weighted",
+ "regress {vars} [aweight=w], vce(cluster firm_id)": "pooled-cluster-weighted",
+}
od = OrderedDict() # type: OrderedDict
for key in sorted(configs.keys()):
@@ -39,11 +41,13 @@
start = """
use {dtafile}, clear \n
xtset firm_id time \n
-""".format(dtafile=dtafile)
+""".format(
+ dtafile=dtafile
+)
-_sep = '#################!{config}-{ending}!####################'
-endings = ['', '_light', '_heavy']
-variables = ['y', 'x1', 'x2', 'x3', 'x4', 'x5']
+_sep = "#################!{config}-{ending}!####################"
+endings = ["", "_light", "_heavy"]
+variables = ["y", "x1", "x2", "x3", "x4", "x5"]
results = """
estout using {outfile}, cells(b(fmt(%13.12g)) t(fmt(%13.12g)) p(fmt(%13.12g))) """
@@ -64,25 +68,25 @@
file close myfile
"""
-outfile = os.path.join(os.getcwd(), 'stata-panel-simulated-results.txt')
+outfile = os.path.join(os.getcwd(), "stata-panel-simulated-results.txt")
if os.path.exists(outfile):
os.unlink(outfile)
-with open('simulated-results.do', 'w') as stata:
+with open("simulated-results.do", "w") as stata:
stata.write(start)
for config in configs:
descr = configs[config]
for ending in endings:
- _vars = ' '.join([v + ending for v in variables])
+ _vars = " ".join([v + ending for v in variables])
command = config.format(vars=_vars)
sep = _sep.format(config=descr, ending=ending)
stata.write(section_header.format(outfile=outfile, separator=sep))
- stata.write(command + '\n')
+ stata.write(command + "\n")
stata.write(results.format(outfile=outfile))
- stata.write('\n' * 4)
+ stata.write("\n" * 4)
-do_file = join(os.getcwd(), 'simulated-results.do')
-cmd = [STATA_PATH, '/e', 'do', do_file]
-print(' '.join(cmd))
+do_file = join(os.getcwd(), "simulated-results.do")
+cmd = [STATA_PATH, "/e", "do", do_file]
+print(" ".join(cmd))
subprocess.call(cmd)
diff --git a/linearmodels/tests/panel/results/generate-panel-data.py b/linearmodels/tests/panel/results/generate-panel-data.py
index 25d09ad932..9cd7956d3a 100644
--- a/linearmodels/tests/panel/results/generate-panel-data.py
+++ b/linearmodels/tests/panel/results/generate-panel-data.py
@@ -19,14 +19,14 @@
w = np.ones((t, 1)) @ w
w = w / w.mean()
-items = ['x' + str(i) for i in range(1, k + 1)]
-items = ['intercept'] + items
-major = pd.date_range('12-31-1999', periods=t, freq='A-DEC')
-minor = ['firm.' + str(i) for i in range(1, n + 1)]
+items = ["x" + str(i) for i in range(1, k + 1)]
+items = ["intercept"] + items
+major = pd.date_range("12-31-1999", periods=t, freq="A-DEC")
+minor = ["firm." + str(i) for i in range(1, n + 1)]
x = panel_to_frame(x, items, major, minor, swap=True)
-y = panel_to_frame(y[None, :], ['y'], major, minor, swap=True)
-w = panel_to_frame(w[None, :], ['w'], major, minor, swap=True)
+y = panel_to_frame(y[None, :], ["y"], major, minor, swap=True)
+w = panel_to_frame(w[None, :], ["w"], major, minor, swap=True)
x = PanelData(x)
y = PanelData(y)
@@ -34,29 +34,29 @@
z = concat([x.dataframe, y.dataframe, w.dataframe], 1)
final_index = pd.MultiIndex.from_product([minor, major])
-final_index.levels[0].name = 'firm'
+final_index.levels[0].name = "firm"
z = z.reindex(final_index)
-z.index.levels[0].name = 'firm'
-z.index.levels[1].name = 'time'
+z.index.levels[0].name = "firm"
+z.index.levels[1].name = "time"
z = z.reset_index()
-z['firm_id'] = z.firm.astype('category')
-z['firm_id'] = z.firm_id.cat.codes
+z["firm_id"] = z.firm.astype("category")
+z["firm_id"] = z.firm_id.cat.codes
-vars = ['y', 'x1', 'x2', 'x3', 'x4', 'x5']
+vars = ["y", "x1", "x2", "x3", "x4", "x5"]
missing = 0.05
for v in vars:
locs = np.random.choice(n * t, int(n * t * missing))
temp = z[v].copy()
temp.loc[locs] = np.nan
- z[v + '_light'] = temp
+ z[v + "_light"] = temp
-vars = ['y', 'x1', 'x2', 'x3', 'x4', 'x5']
+vars = ["y", "x1", "x2", "x3", "x4", "x5"]
missing = 0.20
for v in vars:
locs = np.random.choice(n * t, int(n * t * missing))
temp = z[v].copy()
temp.loc[locs] = np.nan
- z[v + '_heavy'] = temp
+ z[v + "_heavy"] = temp
-z.to_stata('simulated-panel.dta')
+z.to_stata("simulated-panel.dta")
diff --git a/linearmodels/tests/panel/results/parse_stata_results.py b/linearmodels/tests/panel/results/parse_stata_results.py
index ef13977aee..41d65ff168 100644
--- a/linearmodels/tests/panel/results/parse_stata_results.py
+++ b/linearmodels/tests/panel/results/parse_stata_results.py
@@ -5,22 +5,22 @@
from linearmodels.utility import AttrDict
-filename = 'stata-panel-simulated-results.txt'
+filename = "stata-panel-simulated-results.txt"
cwd = os.path.split(os.path.abspath(__file__))[0]
blocks = {}
block = []
-key = ''
+key = ""
with open(os.path.join(cwd, filename)) as results:
for line in results.readlines():
line = line.strip()
if not line:
continue
- if '###!' in line:
+ if "###!" in line:
if key:
blocks[key] = block
block = []
- key = line.split('!')[1]
+ key = line.split("!")[1]
block.append(line)
if block:
blocks[key] = block
@@ -30,30 +30,29 @@ def parse_block(block):
params = {}
stats = {}
for i, line in enumerate(block):
- if 'b/t' in line:
+ if "b/t" in line:
params_start = i + 1
- if 'rss' in line:
+ if "rss" in line:
stats_start = i
- if '** Variance **' in line:
+ if "** Variance **" in line:
variance_start = i + 1
for i in range(params_start, stats_start, 3):
- name, value = block[i].split('\t')
+ name, value = block[i].split("\t")
value = float(value)
tstat = float(block[i + 1])
pvalue = float(block[i + 1])
- params[name] = pd.Series(
- {'param': value, 'tstat': tstat, 'pvalue': pvalue})
+ params[name] = pd.Series({"param": value, "tstat": tstat, "pvalue": pvalue})
params = pd.DataFrame(params).sort_index()
for i in range(stats_start, variance_start - 1):
- if '\t' in block[i]:
- name, value = block[i].split('\t')
+ if "\t" in block[i]:
+ name, value = block[i].split("\t")
stats[name] = float(value)
else:
stats[block[i]] = None
stats = pd.Series(stats)
- var = '\n'.join(block[variance_start + 1:])
- variance = pd.read_csv(StringIO(',' + var.replace('\t', ',')))
+ var = "\n".join(block[variance_start + 1 :])
+ variance = pd.read_csv(StringIO("," + var.replace("\t", ",")))
index = variance.pop(variance.columns[0])
index.name = None
variance.index = index
@@ -70,5 +69,5 @@ def data():
return blocks
-if __name__ == '__main__':
+if __name__ == "__main__":
print(data())
diff --git a/linearmodels/tests/panel/test_between_ols.py b/linearmodels/tests/panel/test_between_ols.py
index 0487da1f98..4a08b2e672 100644
--- a/linearmodels/tests/panel/test_between_ols.py
+++ b/linearmodels/tests/panel/test_between_ols.py
@@ -15,7 +15,9 @@
assert_results_equal, datatypes,
generate_data)
-pytestmark = pytest.mark.filterwarnings('ignore::linearmodels.utility.MissingValueWarning')
+pytestmark = pytest.mark.filterwarnings(
+ "ignore::linearmodels.utility.MissingValueWarning"
+)
def data_gen(missing, datatype):
@@ -58,17 +60,16 @@ def test_single_entity(data):
dep = mod.dependent.dataframe
exog = mod.exog.dataframe
ols = IV2SLS(dep, exog, None, None)
- ols_res = ols.fit(cov_type='unadjusted')
+ ols_res = ols.fit(cov_type="unadjusted")
assert_results_equal(res, ols_res)
- res = mod.fit(cov_type='robust', debiased=False)
- ols_res = ols.fit(cov_type='robust')
+ res = mod.fit(cov_type="robust", debiased=False)
+ ols_res = ols.fit(cov_type="robust")
assert_results_equal(res, ols_res)
- clusters = pd.DataFrame(np.random.randint(0, 9, dep.shape),
- index=dep.index)
- res = mod.fit(cov_type='clustered', clusters=clusters, debiased=False)
- ols_res = ols.fit(cov_type='clustered', clusters=clusters)
+ clusters = pd.DataFrame(np.random.randint(0, 9, dep.shape), index=dep.index)
+ res = mod.fit(cov_type="clustered", clusters=clusters, debiased=False)
+ ols_res = ols.fit(cov_type="clustered", clusters=clusters)
assert_results_equal(res, ols_res)
@@ -97,17 +98,16 @@ def test_single_entity_weights(data):
dep = mod.dependent.dataframe
exog = mod.exog.dataframe
ols = IV2SLS(dep, exog, None, None, weights=mod.weights.values2d)
- ols_res = ols.fit(cov_type='unadjusted')
+ ols_res = ols.fit(cov_type="unadjusted")
assert_results_equal(res, ols_res)
- res = mod.fit(cov_type='robust', debiased=False)
- ols_res = ols.fit(cov_type='robust', debiased=False)
+ res = mod.fit(cov_type="robust", debiased=False)
+ ols_res = ols.fit(cov_type="robust", debiased=False)
assert_results_equal(res, ols_res)
- clusters = pd.DataFrame(np.random.randint(0, 9, dep.shape),
- index=dep.index)
- res = mod.fit(cov_type='clustered', clusters=clusters, debiased=False)
- ols_res = ols.fit(cov_type='clustered', clusters=clusters, debiased=False)
+ clusters = pd.DataFrame(np.random.randint(0, 9, dep.shape), index=dep.index)
+ res = mod.fit(cov_type="clustered", clusters=clusters, debiased=False)
+ ols_res = ols.fit(cov_type="clustered", clusters=clusters, debiased=False)
assert_results_equal(res, ols_res)
@@ -116,14 +116,13 @@ def test_multiple_obs_per_entity(data):
res = mod.fit(reweight=True, debiased=False)
dep = mod.dependent.values3d.mean(1).T
- exog = pd.DataFrame(mod.exog.values3d.mean(1).T,
- columns=mod.exog.vars)
+ exog = pd.DataFrame(mod.exog.values3d.mean(1).T, columns=mod.exog.vars)
ols = IV2SLS(dep, exog, None, None)
- ols_res = ols.fit(cov_type='unadjusted')
+ ols_res = ols.fit(cov_type="unadjusted")
assert_results_equal(res, ols_res)
- res = mod.fit(cov_type='robust', debiased=False)
- ols_res = ols.fit(cov_type='robust', debiased=False)
+ res = mod.fit(cov_type="robust", debiased=False)
+ ols_res = ols.fit(cov_type="robust", debiased=False)
assert_results_equal(res, ols_res)
clusters = mod.dependent.dataframe.copy()
@@ -133,8 +132,8 @@ def test_multiple_obs_per_entity(data):
clusters.loc[entity] = np.random.randint(9)
ols_clusters = PanelData(clusters).values3d.mean(1).T.astype(np.int32)
- res = mod.fit(cov_type='clustered', clusters=clusters, debiased=False)
- ols_res = ols.fit(cov_type='clustered', clusters=ols_clusters)
+ res = mod.fit(cov_type="clustered", clusters=clusters, debiased=False)
+ ols_res = ols.fit(cov_type="clustered", clusters=ols_clusters)
assert_results_equal(res, ols_res)
@@ -152,11 +151,11 @@ def test_multiple_obs_per_entity_weighted(data):
exog = pd.DataFrame(wexog, columns=mod.exog.vars)
ols = IV2SLS(dep, exog, None, None, weights=weights)
- ols_res = ols.fit(cov_type='unadjusted')
+ ols_res = ols.fit(cov_type="unadjusted")
assert_results_equal(res, ols_res)
- res = mod.fit(cov_type='robust', debiased=False)
- ols_res = ols.fit(cov_type='robust')
+ res = mod.fit(cov_type="robust", debiased=False)
+ ols_res = ols.fit(cov_type="robust")
assert_results_equal(res, ols_res)
clusters = mod.dependent.dataframe.copy()
@@ -166,8 +165,8 @@ def test_multiple_obs_per_entity_weighted(data):
clusters.loc[entity] = np.random.randint(9)
ols_clusters = PanelData(clusters).values3d.mean(1).T.astype(np.int32)
- res = mod.fit(cov_type='clustered', clusters=clusters, debiased=False)
- ols_res = ols.fit(cov_type='clustered', clusters=ols_clusters)
+ res = mod.fit(cov_type="clustered", clusters=clusters, debiased=False)
+ ols_res = ols.fit(cov_type="clustered", clusters=ols_clusters)
assert_results_equal(res, ols_res)
@@ -184,20 +183,21 @@ def test_missing(missing_data):
weights = weights.reindex(mod.dependent.entities)
ols = IV2SLS(dep, exog, None, None, weights=weights)
- ols_res = ols.fit(cov_type='unadjusted')
+ ols_res = ols.fit(cov_type="unadjusted")
assert_results_equal(res, ols_res)
- res = mod.fit(reweight=True, cov_type='robust', debiased=False)
- ols_res = ols.fit(cov_type='robust')
+ res = mod.fit(reweight=True, cov_type="robust", debiased=False)
+ ols_res = ols.fit(cov_type="robust")
assert_results_equal(res, ols_res)
vc1 = PanelData(missing_data.vc1)
ols_clusters = vc1.dataframe.groupby(level=0).mean().astype(np.int32)
ols_clusters = ols_clusters.reindex(mod.dependent.entities)
- res = mod.fit(reweight=True, cov_type='clustered',
- clusters=missing_data.vc1, debiased=False)
- ols_res = ols.fit(cov_type='clustered', clusters=ols_clusters)
+ res = mod.fit(
+ reweight=True, cov_type="clustered", clusters=missing_data.vc1, debiased=False
+ )
+ ols_res = ols.fit(cov_type="clustered", clusters=ols_clusters)
assert_results_equal(res, ols_res)
@@ -219,14 +219,14 @@ def test_missing_weighted(missing_data):
exog = (1.0 / weights.values) * exog
ols = IV2SLS(dep, exog, None, None, weights=weights)
- ols_res = ols.fit(cov_type='unadjusted')
+ ols_res = ols.fit(cov_type="unadjusted")
assert_results_equal(res, ols_res)
def test_unknown_covariance(data):
mod = BetweenOLS(data.y, data.x)
with pytest.raises(KeyError):
- mod.fit(cov_type='unknown')
+ mod.fit(cov_type="unknown")
def test_results_access(data):
@@ -266,14 +266,14 @@ def test_2way_cluster(data):
exog = mod.exog.dataframe.groupby(level=0).mean()
clusters = mod.dependent.dataframe.copy()
- clusters.columns = ['cluster.0']
- clusters['cluster.1'] = mod.dependent.dataframe.copy()
+ clusters.columns = ["cluster.0"]
+ clusters["cluster.1"] = mod.dependent.dataframe.copy()
clusters.loc[:, :] = 0
clusters = clusters.astype(np.int32)
for entity in mod.dependent.entities:
clusters.loc[entity, :] = np.random.randint(33, size=(1, 2))
- res = mod.fit(cov_type='clustered', clusters=clusters, debiased=False)
+ res = mod.fit(cov_type="clustered", clusters=clusters, debiased=False)
dep = dep.reindex(list(res.resids.index))
exog = exog.reindex(list(res.resids.index))
@@ -282,7 +282,7 @@ def test_2way_cluster(data):
ols_clusters = clusters.groupby(level=0).max()
ols_clusters = ols_clusters.reindex(list(res.resids.index))
- ols_res = ols.fit(cov_type='clustered', clusters=ols_clusters)
+ ols_res = ols.fit(cov_type="clustered", clusters=ols_clusters)
assert_results_equal(res, ols_res)
@@ -296,7 +296,7 @@ def test_cluster_error(data):
clusters.iloc[::7, :] = 0
with pytest.raises(ValueError):
- mod.fit(cov_type='clustered', clusters=clusters, debiased=False)
+ mod.fit(cov_type="clustered", clusters=clusters, debiased=False)
def test_default_clusters(data):
@@ -314,20 +314,23 @@ def test_default_clusters(data):
x = x[:, [0]]
y = y[:, [0]]
mod = BetweenOLS(y, x)
- res = mod.fit(reweight=True, cov_type='clustered', debiased=False)
+ res = mod.fit(reweight=True, cov_type="clustered", debiased=False)
dep = mod.dependent.dataframe
exog = mod.exog.dataframe
ols = IV2SLS(dep, exog, None, None)
- ols_res = ols.fit(cov_type='clustered')
+ ols_res = ols.fit(cov_type="clustered")
assert_results_equal(res, ols_res)
def test_fitted_effects_residuals(both_data_types):
mod = BetweenOLS(both_data_types.y, both_data_types.x)
res = mod.fit(reweight=True, debiased=False)
- expected = pd.DataFrame(mod.exog.values2d @ res.params.values, mod.dependent.index,
- columns=['fitted_values'])
+ expected = pd.DataFrame(
+ mod.exog.values2d @ res.params.values,
+ mod.dependent.index,
+ columns=["fitted_values"],
+ )
assert_allclose(expected, res.fitted_values)
assert_frame_similar(res.fitted_values, expected)
@@ -337,12 +340,12 @@ def test_fitted_effects_residuals(both_data_types):
resids = resids.reindex(reindex)
resids.index = index
expected = pd.DataFrame(resids)
- expected.columns = ['estimated_effects']
+ expected.columns = ["estimated_effects"]
assert_allclose(expected, res.estimated_effects)
assert_frame_similar(res.estimated_effects, expected)
fitted_effects = res.fitted_values.values + res.estimated_effects.values
expected.iloc[:, 0] = mod.dependent.values2d - fitted_effects
- expected.columns = ['idiosyncratic']
+ expected.columns = ["idiosyncratic"]
assert_allclose(expected, res.idiosyncratic, atol=1e-8)
assert_frame_similar(res.idiosyncratic, expected)
diff --git a/linearmodels/tests/panel/test_cluster_input_formats.py b/linearmodels/tests/panel/test_cluster_input_formats.py
index 1d6d5fccb5..169b28d850 100644
--- a/linearmodels/tests/panel/test_cluster_input_formats.py
+++ b/linearmodels/tests/panel/test_cluster_input_formats.py
@@ -9,11 +9,13 @@
from linearmodels.panel.model import PanelOLS
from linearmodels.tests.panel._utility import datatypes, generate_data
-pytestmark = pytest.mark.filterwarnings('ignore::linearmodels.utility.MissingValueWarning')
+pytestmark = pytest.mark.filterwarnings(
+ "ignore::linearmodels.utility.MissingValueWarning"
+)
missing = [0.0, 0.20]
perms = list(product(missing, datatypes))
-ids = list(map(lambda s: '-'.join(map(str, s)), perms))
+ids = list(map(lambda s: "-".join(map(str, s)), perms))
@pytest.fixture(params=perms, ids=ids)
@@ -28,7 +30,7 @@ def test_categorical_input(data):
effects = np.random.randint(0, 5, size=(nt, 2))
temp = {}
for i, e in enumerate(effects.T):
- name = 'effect.' + str(i)
+ name = "effect." + str(i)
temp[name] = pd.Categorical(pd.Series(e, index=y.index, name=name))
effects = pd.DataFrame(temp, index=y.index)
mod = PanelOLS(data.y, data.x, other_effects=effects)
@@ -37,33 +39,39 @@ def test_categorical_input(data):
clusters = np.random.randint(0, y.shape[2] // 2, size=(nt, 2))
temp = {}
for i, c in enumerate(clusters.T):
- name = 'effect.' + str(i)
+ name = "effect." + str(i)
temp[name] = pd.Categorical(pd.Series(c, index=y.index, name=name))
clusters = pd.DataFrame(temp, index=y.index)
- mod.fit(cov_type='clustered', clusters=clusters)
+ mod.fit(cov_type="clustered", clusters=clusters)
def test_string_input(data):
y = PanelData(data.y)
nt = y.values2d.shape[0]
temp = {}
- prim = ['a', 'b', 'c', 'd', 'e']
+ prim = ["a", "b", "c", "d", "e"]
for i in range(2):
- name = 'effect.' + str(i)
- temp[name] = pd.Series(np.random.choice(prim, size=nt), index=y.index, name=name)
+ name = "effect." + str(i)
+ temp[name] = pd.Series(
+ np.random.choice(prim, size=nt), index=y.index, name=name
+ )
effects = pd.DataFrame(temp, index=y.index)
mod = PanelOLS(data.y, data.x, other_effects=effects)
mod.fit()
clusters = np.random.randint(0, y.shape[2] // 2, size=(nt, 2))
temp = {}
- prim = list(map(lambda s: ''.join(s), list(product(ascii_lowercase, ascii_lowercase))))
+ prim = list(
+ map(lambda s: "".join(s), list(product(ascii_lowercase, ascii_lowercase)))
+ )
for i in range(clusters.shape[1]):
- name = 'effect.' + str(i)
- temp[name] = pd.Series(np.random.choice(prim, size=nt), index=y.index, name=name)
+ name = "effect." + str(i)
+ temp[name] = pd.Series(
+ np.random.choice(prim, size=nt), index=y.index, name=name
+ )
clusters = pd.DataFrame(temp, index=y.index)
- mod.fit(cov_type='clustered', clusters=clusters)
+ mod.fit(cov_type="clustered", clusters=clusters)
def test_integer_input(data):
@@ -72,7 +80,7 @@ def test_integer_input(data):
effects = np.random.randint(0, 5, size=(nt, 2))
temp = {}
for i, e in enumerate(effects.T):
- name = 'effect.' + str(i)
+ name = "effect." + str(i)
temp[name] = pd.Series(e, index=y.index, name=name)
effects = pd.DataFrame(temp, index=y.index)
mod = PanelOLS(data.y, data.x, other_effects=effects)
@@ -81,30 +89,34 @@ def test_integer_input(data):
clusters = np.random.randint(0, y.shape[2] // 2, size=(nt, 2))
temp = {}
for i, c in enumerate(clusters.T):
- name = 'effect.' + str(i)
+ name = "effect." + str(i)
temp[name] = pd.Series(c, index=y.index, name=name)
clusters = pd.DataFrame(temp, index=y.index)
- mod.fit(cov_type='clustered', clusters=clusters)
+ mod.fit(cov_type="clustered", clusters=clusters)
def test_mixed_input(data):
y = PanelData(data.y)
nt = y.values2d.shape[0]
effects = np.random.randint(0, 5, size=nt)
- prim = ['a', 'b', 'c', 'd', 'e']
- temp = {'effect.0': pd.Categorical(pd.Series(effects, index=y.index)),
- 'effect.1': pd.Series(np.random.choice(prim, size=nt), index=y.index)}
+ prim = ["a", "b", "c", "d", "e"]
+ temp = {
+ "effect.0": pd.Categorical(pd.Series(effects, index=y.index)),
+ "effect.1": pd.Series(np.random.choice(prim, size=nt), index=y.index),
+ }
effects = pd.DataFrame(temp, index=y.index)
mod = PanelOLS(data.y, data.x, other_effects=effects)
mod.fit()
clusters = np.random.randint(0, y.shape[2] // 2, size=(nt, 2))
temp = {}
- prim = list(map(lambda s: ''.join(s), list(product(ascii_lowercase, ascii_lowercase))))
- temp['var.cluster.0'] = pd.Series(np.random.choice(prim, size=nt), index=y.index)
- temp['var.cluster.1'] = pd.Series(clusters[:, 1], index=y.index)
+ prim = list(
+ map(lambda s: "".join(s), list(product(ascii_lowercase, ascii_lowercase)))
+ )
+ temp["var.cluster.0"] = pd.Series(np.random.choice(prim, size=nt), index=y.index)
+ temp["var.cluster.1"] = pd.Series(clusters[:, 1], index=y.index)
clusters = pd.DataFrame(temp, index=y.index)
- mod.fit(cov_type='clustered', clusters=clusters)
+ mod.fit(cov_type="clustered", clusters=clusters)
def test_nested_effects(data):
@@ -112,18 +124,18 @@ def test_nested_effects(data):
effects = pd.DataFrame(y.entity_ids // 2, index=y.index)
with pytest.raises(ValueError) as exception:
PanelOLS(data.y, data.x, entity_effects=True, other_effects=effects)
- assert 'entity effects' in str(exception.value)
+ assert "entity effects" in str(exception.value)
effects = pd.DataFrame(y.time_ids // 2, index=y.index)
with pytest.raises(ValueError) as exception:
PanelOLS(data.y, data.x, time_effects=True, other_effects=effects)
- assert 'time effects' in str(exception.value)
+ assert "time effects" in str(exception.value)
effects1 = pd.Series(y.entity_ids.squeeze() // 2, index=y.index)
effects2 = pd.Series(y.entity_ids.squeeze() // 4, index=y.index)
- effects = pd.DataFrame({'eff1': effects1, 'eff2': effects2})
+ effects = pd.DataFrame({"eff1": effects1, "eff2": effects2})
with pytest.raises(ValueError) as exception:
PanelOLS(data.y, data.x, other_effects=effects)
- assert 'by other effects' in str(exception.value)
- assert 'time effects' not in str(exception.value)
- assert 'entity effects' not in str(exception.value)
+ assert "by other effects" in str(exception.value)
+ assert "time effects" not in str(exception.value)
+ assert "entity effects" not in str(exception.value)
diff --git a/linearmodels/tests/panel/test_data.py b/linearmodels/tests/panel/test_data.py
index c67f547d1b..c5b36ded7e 100644
--- a/linearmodels/tests/panel/test_data.py
+++ b/linearmodels/tests/panel/test_data.py
@@ -1,13 +1,13 @@
from linearmodels.compat.numpy import lstsq
from linearmodels.compat.pandas import get_codes, is_string_dtype
-from itertools import product
from datetime import datetime
+from itertools import product
import numpy as np
from numpy.linalg import pinv
from numpy.testing import assert_allclose, assert_equal
-from pandas import (Categorical, DataFrame, Series, date_range, get_dummies)
+from pandas import Categorical, DataFrame, Series, date_range, get_dummies
from pandas.testing import assert_frame_equal, assert_index_equal
import pytest
@@ -23,15 +23,23 @@
pass
-pytestmark = pytest.mark.filterwarnings('ignore::linearmodels.utility.MissingValueWarning')
+pytestmark = pytest.mark.filterwarnings(
+ "ignore::linearmodels.utility.MissingValueWarning"
+)
PERC_MISSING = [0, 0.02, 0.10, 0.33]
TYPES = datatypes
-@pytest.fixture(params=list(product(PERC_MISSING, TYPES)),
- ids=list(map(lambda x: str(int(100 * x[0])) + '-' + str(x[1]),
- product(PERC_MISSING, TYPES))))
+@pytest.fixture(
+ params=list(product(PERC_MISSING, TYPES)),
+ ids=list(
+ map(
+ lambda x: str(int(100 * x[0])) + "-" + str(x[1]),
+ product(PERC_MISSING, TYPES),
+ )
+ ),
+)
def data(request):
missing, datatype = request.param
return generate_data(missing, datatype, ntk=(231, 7, 5))
@@ -42,9 +50,9 @@ def mi_df():
np.random.seed(12345)
n, t, k = 11, 7, 3
x = np.random.standard_normal((k, t, n))
- major = date_range('12-31-1999', periods=7)
- items = ['var.{0}'.format(i) for i in range(1, k + 1)]
- minor = ['entities.{0}'.format(i) for i in range(1, n + 1)]
+ major = date_range("12-31-1999", periods=7)
+ items = ["var.{0}".format(i) for i in range(1, k + 1)]
+ minor = ["entities.{0}".format(i) for i in range(1, n + 1)]
return panel_to_frame(x, items, major, minor, swap=True)
@@ -57,12 +65,17 @@ def test_numpy_3d():
assert dh.nobs == t
assert dh.nvar == k
assert_equal(np.reshape(x.T, (n * t, k)), dh.values2d)
- items = ['entity.{0}'.format(i) for i in range(n)]
+ items = ["entity.{0}".format(i) for i in range(n)]
obs = [i for i in range(t)]
- var_names = ['x.{0}'.format(i) for i in range(k)]
- expected_frame = panel_to_frame(np.reshape(x, (k, t, n)), items=var_names,
- major_axis=obs, minor_axis=items, swap=True)
- expected_frame.index.set_names(['entity', 'time'], inplace=True)
+ var_names = ["x.{0}".format(i) for i in range(k)]
+ expected_frame = panel_to_frame(
+ np.reshape(x, (k, t, n)),
+ items=var_names,
+ major_axis=obs,
+ minor_axis=items,
+ swap=True,
+ )
+ expected_frame.index.set_names(["entity", "time"], inplace=True)
assert_frame_equal(dh.dataframe, expected_frame)
@@ -87,9 +100,9 @@ def test_numpy_2d():
def test_pandas_multiindex_dataframe():
n, t, k = 11, 7, 3
x = np.random.random((n, t, k))
- major = date_range('12-31-1999', periods=7)
- minor = ['var.{0}'.format(i) for i in range(1, k + 1)]
- items = ['item.{0}'.format(i) for i in range(1, n + 1)]
+ major = date_range("12-31-1999", periods=7)
+ minor = ["var.{0}".format(i) for i in range(1, k + 1)]
+ items = ["item.{0}".format(i) for i in range(1, n + 1)]
x = panel_to_frame(x, items=items, major_axis=major, minor_axis=minor, swap=True)
PanelData(x)
@@ -97,8 +110,8 @@ def test_pandas_multiindex_dataframe():
def test_pandas_dataframe():
t, n = 11, 7
x = np.random.random((t, n))
- index = date_range('12-31-1999', periods=t)
- cols = ['entity.{0}'.format(i) for i in range(1, n + 1)]
+ index = date_range("12-31-1999", periods=t)
+ cols = ["entity.{0}".format(i) for i in range(1, n + 1)]
x = DataFrame(x, columns=cols, index=index)
PanelData(x)
@@ -106,34 +119,40 @@ def test_pandas_dataframe():
def test_existing_panel_data():
n, t, k = 11, 7, 3
x = np.random.random((k, t, n))
- major = date_range('12-31-1999', periods=7)
- items = ['var.{0}'.format(i) for i in range(1, k + 1)]
- minor = ['entities.{0}'.format(i) for i in range(1, n + 1)]
+ major = date_range("12-31-1999", periods=7)
+ items = ["var.{0}".format(i) for i in range(1, k + 1)]
+ minor = ["entities.{0}".format(i) for i in range(1, n + 1)]
x = panel_to_frame(x, items=items, major_axis=major, minor_axis=minor, swap=True)
dh = PanelData(x)
dh2 = PanelData(dh)
assert_frame_equal(dh.dataframe, dh2.dataframe)
-@pytest.mark.skipif(MISSING_XARRAY, reason='xarray is not installed')
+@pytest.mark.skipif(MISSING_XARRAY, reason="xarray is not installed")
def test_xarray_2d():
n, t = 11, 7
x = np.random.random((t, n))
- x = xr.DataArray(x, dims=('time', 'entity'),
- coords={
- 'entity': list('firm.' + str(i) for i in range(n))})
+ x = xr.DataArray(
+ x,
+ dims=("time", "entity"),
+ coords={"entity": list("firm." + str(i) for i in range(n))},
+ )
dh = PanelData(x)
assert_equal(dh.values2d, np.reshape(x.values.T, (n * t, 1)))
-@pytest.mark.skipif(MISSING_XARRAY, reason='xarray is not installed')
+@pytest.mark.skipif(MISSING_XARRAY, reason="xarray is not installed")
def test_xarray_3d():
n, t, k = 11, 7, 13
x = np.random.random((k, t, n))
- x = xr.DataArray(x, dims=('var', 'time', 'entity'),
- coords={
- 'entity': list('firm.' + str(i) for i in range(n)),
- 'var': list('x.' + str(i) for i in range(k))})
+ x = xr.DataArray(
+ x,
+ dims=("var", "time", "entity"),
+ coords={
+ "entity": list("firm." + str(i) for i in range(n)),
+ "var": list("x." + str(i) for i in range(k)),
+ },
+ )
dh = PanelData(x)
assert_equal(np.reshape(x.values.T, (n * t, k)), dh.values2d)
@@ -172,11 +191,15 @@ def test_missing(mi_df):
def test_incorrect_dataframe():
grouped = np.array(list([i] * 10 for i in range(10))).ravel()
- df = DataFrame({'a': np.arange(100),
- 'b': grouped,
- 'c': np.random.permutation(grouped),
- 'data': np.random.randn(100)})
- df = df.set_index(['a', 'b', 'c'])
+ df = DataFrame(
+ {
+ "a": np.arange(100),
+ "b": grouped,
+ "c": np.random.permutation(grouped),
+ "data": np.random.randn(100),
+ }
+ )
+ df = df.set_index(["a", "b", "c"])
with pytest.raises(ValueError):
PanelData(df)
@@ -186,7 +209,7 @@ def test_incorrect_types():
PanelData(list(np.random.randn(10)))
-@pytest.mark.skipif(MISSING_XARRAY, reason='xarray is not installed')
+@pytest.mark.skipif(MISSING_XARRAY, reason="xarray is not installed")
def test_incorrect_types_xarray():
with pytest.raises(ValueError):
PanelData(xr.DataArray(np.random.randn(10)))
@@ -198,8 +221,8 @@ def test_ids(mi_df):
assert eids.shape == (77, 1)
assert len(np.unique(eids)) == 11
for i in range(0, len(eids), 7):
- assert np.ptp(eids[i:i + 7]) == 0
- assert np.all((eids[i + 8:] - eids[i]) != 0)
+ assert np.ptp(eids[i : i + 7]) == 0
+ assert np.all((eids[i + 8 :] - eids[i]) != 0)
tids = data.time_ids
assert tids.shape == (77, 1)
@@ -210,19 +233,19 @@ def test_ids(mi_df):
def test_str_repr(mi_df):
data = PanelData(mi_df)
- assert 'PanelData' in str(data)
+ assert "PanelData" in str(data)
assert str(hex(id(data))) in data.__repr__()
def test_demean(mi_df):
data = PanelData(mi_df)
- fe = data.demean('entity')
+ fe = data.demean("entity")
expected = data.values3d.copy()
for i in range(3):
expected[i] -= expected[i].mean(0)
assert_allclose(fe.values3d, expected)
- te = data.demean('time')
+ te = data.demean("time")
expected = data.values3d.copy()
for i in range(3):
expected[i] -= expected[i].mean(1)[:, None]
@@ -237,11 +260,11 @@ def demean(x):
return x - x.mean()
entity_demean = df.groupby(level=0).transform(demean)
- res = dh.demean('entity')
+ res = dh.demean("entity")
assert_allclose(entity_demean.values, res.values2d)
time_demean = df.groupby(level=1).transform(demean)
- res = dh.demean('time')
+ res = dh.demean("time")
assert_allclose(time_demean.values, res.values2d)
@@ -255,16 +278,14 @@ def test_demean_against_dummy_regression(data):
cat = Categorical(no_index[df.index.levels[0].name])
d = get_dummies(cat, drop_first=False).astype(np.float64)
dummy_demeaned = df.values - d @ lstsq(d, df.values)[0]
- entity_demean = dh.demean('entity')
- assert_allclose(1 + np.abs(entity_demean.values2d),
- 1 + np.abs(dummy_demeaned))
+ entity_demean = dh.demean("entity")
+ assert_allclose(1 + np.abs(entity_demean.values2d), 1 + np.abs(dummy_demeaned))
cat = Categorical(no_index[df.index.levels[1].name])
d = get_dummies(cat, drop_first=False).astype(np.float64)
dummy_demeaned = df.values - d @ lstsq(d, df.values)[0]
- time_demean = dh.demean('time')
- assert_allclose(1 + np.abs(time_demean.values2d),
- 1 + np.abs(dummy_demeaned))
+ time_demean = dh.demean("time")
+ assert_allclose(1 + np.abs(time_demean.values2d), 1 + np.abs(dummy_demeaned))
cat = Categorical(no_index[df.index.levels[0].name])
d1 = get_dummies(cat, drop_first=False).astype(np.float64)
@@ -272,21 +293,20 @@ def test_demean_against_dummy_regression(data):
d2 = get_dummies(cat, drop_first=True).astype(np.float64)
d = np.c_[d1.values, d2.values]
dummy_demeaned = df.values - d @ lstsq(d, df.values)[0]
- both_demean = dh.demean('both')
- assert_allclose(1 + np.abs(both_demean.values2d),
- 1 + np.abs(dummy_demeaned))
+ both_demean = dh.demean("both")
+ assert_allclose(1 + np.abs(both_demean.values2d), 1 + np.abs(dummy_demeaned))
def test_demean_missing(mi_df):
mi_df.values.flat[::13] = np.nan
data = PanelData(mi_df)
- fe = data.demean('entity')
+ fe = data.demean("entity")
expected = data.values3d.copy()
for i in range(3):
expected[i] -= np.nanmean(expected[i], 0)
assert_allclose(fe.values3d, expected)
- te = data.demean('time')
+ te = data.demean("time")
expected = data.values3d.copy()
for i in range(3):
expected[i] -= np.nanmean(expected[i], 1)[:, None]
@@ -305,7 +325,7 @@ def test_demean_many_missing(mi_df):
mi_df.loc[time, column] = np.nan
mi_df.index = mi_df.index.swaplevel()
data = PanelData(mi_df)
- fe = data.demean('entity')
+ fe = data.demean("entity")
orig_nan = np.isnan(data.values3d.ravel())
fe_nan = np.isnan(fe.values3d.ravel())
assert np.all(fe_nan[orig_nan])
@@ -318,7 +338,7 @@ def test_demean_many_missing(mi_df):
expected[i] -= mu
assert_allclose(fe.values3d, expected)
- te = data.demean('time')
+ te = data.demean("time")
expected = data.values3d.copy()
for i in range(3):
mu = np.ones((expected[i].shape[0], 1)) * np.nan
@@ -342,7 +362,7 @@ def test_demean_many_missing_dropped(mi_df):
data = PanelData(mi_df)
data.drop(data.isnull)
- fe = data.demean('entity')
+ fe = data.demean("entity")
expected = data.values2d.copy()
eid = data.entity_ids.ravel()
@@ -354,11 +374,11 @@ def test_demean_many_missing_dropped(mi_df):
def test_demean_both_large_t():
x = np.random.standard_normal((1, 100, 10))
- time = date_range('1-1-2000', periods=100)
- entities = ['entity.{0}'.format(i) for i in range(10)]
- data = panel_to_frame(x, ['x'], time, entities, swap=True)
+ time = date_range("1-1-2000", periods=100)
+ entities = ["entity.{0}".format(i) for i in range(10)]
+ data = panel_to_frame(x, ["x"], time, entities, swap=True)
data = PanelData(data)
- demeaned = data.demean('both')
+ demeaned = data.demean("both")
df = data.dataframe
no_index = df.reset_index()
@@ -368,14 +388,13 @@ def test_demean_both_large_t():
d2 = get_dummies(cat, drop_first=True).astype(np.float64)
d = np.c_[d1.values, d2.values]
dummy_demeaned = df.values - d @ pinv(d) @ df.values
- assert_allclose(1 + np.abs(demeaned.values2d),
- 1 + np.abs(dummy_demeaned))
+ assert_allclose(1 + np.abs(demeaned.values2d), 1 + np.abs(dummy_demeaned))
def test_demean_invalid(mi_df):
data = PanelData(mi_df)
with pytest.raises(ValueError):
- data.demean('unknown')
+ data.demean("unknown")
def test_dummies(mi_df):
@@ -383,14 +402,14 @@ def test_dummies(mi_df):
edummy = data.dummies()
assert edummy.shape == (77, 11)
assert np.all(edummy.sum(0) == 7)
- tdummy = data.dummies(group='time')
+ tdummy = data.dummies(group="time")
assert tdummy.shape == (77, 7)
assert np.all(tdummy.sum(0) == 11)
- tdummy_drop = data.dummies(group='time', drop_first=True)
+ tdummy_drop = data.dummies(group="time", drop_first=True)
assert tdummy_drop.shape == (77, 6)
assert np.all(tdummy.sum(0) == 11)
with pytest.raises(ValueError):
- data.dummies('unknown')
+ data.dummies("unknown")
def test_roundtrip_3d(data):
@@ -419,29 +438,32 @@ def test_demean_missing_alt_types(data):
check = isinstance(data.x, (DataFrame, np.ndarray))
xpd = PanelData(data.x)
xpd.drop(xpd.isnull)
- entity_demean = xpd.demean('entity')
+ entity_demean = xpd.demean("entity")
expected = xpd.dataframe.groupby(level=0).transform(lambda s: s - s.mean())
- assert_frame_equal(entity_demean.dataframe, expected,
- check_index_type=check,
- check_column_type=check)
-
- time_demean = xpd.demean('time')
+ assert_frame_equal(
+ entity_demean.dataframe,
+ expected,
+ check_index_type=check,
+ check_column_type=check,
+ )
+
+ time_demean = xpd.demean("time")
expected = xpd.dataframe.groupby(level=1).transform(lambda s: s - s.mean())
- assert_frame_equal(time_demean.dataframe, expected,
- check_index_type=check,
- check_column_type=check)
+ assert_frame_equal(
+ time_demean.dataframe, expected, check_index_type=check, check_column_type=check
+ )
def test_mean_missing(data):
xpd = PanelData(data.x)
xpd.drop(xpd.isnull)
- entity_mean = xpd.mean('entity')
+ entity_mean = xpd.mean("entity")
expected = xpd.dataframe.groupby(level=0).mean()
expected = expected.loc[xpd.entities]
expected.columns.name = None
assert_frame_equal(entity_mean, expected)
- time_mean = xpd.mean('time')
+ time_mean = xpd.mean("time")
expected = xpd.dataframe.groupby(level=1).mean()
expected = expected.loc[xpd.time]
expected.columns.name = None
@@ -451,14 +473,14 @@ def test_mean_missing(data):
def test_count(data):
xpd = PanelData(data.x)
xpd.drop(xpd.isnull)
- entity_mean = xpd.count('entity')
+ entity_mean = xpd.count("entity")
expected = xpd.dataframe.groupby(level=0).count()
expected = expected.loc[xpd.entities]
expected.columns.name = None
expected = expected.astype(np.int64)
assert_frame_equal(entity_mean, expected)
- time_mean = xpd.count('time')
+ time_mean = xpd.count("time")
expected = xpd.dataframe.groupby(level=1).count()
expected = expected.loc[xpd.time]
expected.columns.name = None
@@ -478,15 +500,17 @@ def test_demean_simple_weighted(data):
x.drop(missing)
w.drop(missing)
w.dataframe.iloc[:, 0] = 1
- unweighted_entity_demean = x.demean('entity')
- weighted_entity_demean = x.demean('entity', weights=w)
- assert_allclose(unweighted_entity_demean.dataframe,
- weighted_entity_demean.dataframe)
+ unweighted_entity_demean = x.demean("entity")
+ weighted_entity_demean = x.demean("entity", weights=w)
+ assert_allclose(
+ unweighted_entity_demean.dataframe, weighted_entity_demean.dataframe
+ )
- unweighted_entity_demean = x.demean('time')
- weighted_entity_demean = x.demean('time', weights=w)
- assert_allclose(unweighted_entity_demean.dataframe,
- weighted_entity_demean.dataframe)
+ unweighted_entity_demean = x.demean("time")
+ weighted_entity_demean = x.demean("time", weights=w)
+ assert_allclose(
+ unweighted_entity_demean.dataframe, weighted_entity_demean.dataframe
+ )
def test_demean_weighted(data):
@@ -496,7 +520,7 @@ def test_demean_weighted(data):
x.drop(missing)
w.drop(missing)
- entity_demean = x.demean('entity', weights=w)
+ entity_demean = x.demean("entity", weights=w)
d = get_dummies(Categorical(get_codes(x.index)[0]))
d = d.values
root_w = np.sqrt(w.values2d)
@@ -504,10 +528,9 @@ def test_demean_weighted(data):
wd = d * root_w
mu = wd @ lstsq(wd, wx)[0]
e = wx - mu
- assert_allclose(1 + np.abs(entity_demean.values2d),
- 1 + np.abs(e))
+ assert_allclose(1 + np.abs(entity_demean.values2d), 1 + np.abs(e))
- time_demean = x.demean('time', weights=w)
+ time_demean = x.demean("time", weights=w)
d = get_dummies(Categorical(get_codes(x.index)[1]))
d = d.values
root_w = np.sqrt(w.values2d)
@@ -515,8 +538,7 @@ def test_demean_weighted(data):
wd = d * root_w
mu = wd @ lstsq(wd, wx)[0]
e = wx - mu
- assert_allclose(1 + np.abs(time_demean.values2d),
- 1 + np.abs(e))
+ assert_allclose(1 + np.abs(time_demean.values2d), 1 + np.abs(e))
def test_mean_weighted(data):
@@ -525,7 +547,7 @@ def test_mean_weighted(data):
missing = x.isnull | w.isnull
x.drop(missing)
w.drop(missing)
- entity_mean = x.mean('entity', weights=w)
+ entity_mean = x.mean("entity", weights=w)
c = x.index.levels[0][get_codes(x.index)[0]]
d = get_dummies(Categorical(c, ordered=True))
d = d[entity_mean.index]
@@ -536,7 +558,7 @@ def test_mean_weighted(data):
mu = lstsq(wd, wx)[0]
assert_allclose(entity_mean, mu)
- time_mean = x.mean('time', weights=w)
+ time_mean = x.mean("time", weights=w)
c = x.index.levels[1][get_codes(x.index)[1]]
d = get_dummies(Categorical(c, ordered=True))
d = d[list(time_mean.index)]
@@ -550,91 +572,94 @@ def test_mean_weighted(data):
def test_categorical_conversion():
t, n = 3, 1000
- string = np.random.choice(['a', 'b', 'c'], (t, n))
+ string = np.random.choice(["a", "b", "c"], (t, n))
num = np.random.randn(t, n)
- time = date_range('1-1-2000', periods=t)
- entities = ['entity.{0}'.format(i) for i in range(n)]
- p = panel_to_frame(None, items=['a', 'b'], major_axis=time,
- minor_axis=entities, swap=True)
- p['a'] = string.T.ravel()
- p['b'] = num.T.ravel()
- p = p[['a', 'b']]
+ time = date_range("1-1-2000", periods=t)
+ entities = ["entity.{0}".format(i) for i in range(n)]
+ p = panel_to_frame(
+ None, items=["a", "b"], major_axis=time, minor_axis=entities, swap=True
+ )
+ p["a"] = string.T.ravel()
+ p["b"] = num.T.ravel()
+ p = p[["a", "b"]]
panel = PanelData(p, convert_dummies=False)
df = panel.dataframe.copy()
- df['a'] = Categorical(df['a'])
+ df["a"] = Categorical(df["a"])
panel = PanelData(df, convert_dummies=True)
df = panel.dataframe
assert df.shape == (3000, 3)
s = string.T.ravel()
- a_locs = np.where(s == 'a')
- b_locs = np.where(s == 'b')
- c_locs = np.where(s == 'c')
- assert np.all(df.loc[:, 'a.b'].values[a_locs] == 0.0)
- assert np.all(df.loc[:, 'a.b'].values[b_locs] == 1.0)
- assert np.all(df.loc[:, 'a.b'].values[c_locs] == 0.0)
+ a_locs = np.where(s == "a")
+ b_locs = np.where(s == "b")
+ c_locs = np.where(s == "c")
+ assert np.all(df.loc[:, "a.b"].values[a_locs] == 0.0)
+ assert np.all(df.loc[:, "a.b"].values[b_locs] == 1.0)
+ assert np.all(df.loc[:, "a.b"].values[c_locs] == 0.0)
- assert np.all(df.loc[:, 'a.c'].values[a_locs] == 0.0)
- assert np.all(df.loc[:, 'a.c'].values[b_locs] == 0.0)
- assert np.all(df.loc[:, 'a.c'].values[c_locs] == 1.0)
+ assert np.all(df.loc[:, "a.c"].values[a_locs] == 0.0)
+ assert np.all(df.loc[:, "a.c"].values[b_locs] == 0.0)
+ assert np.all(df.loc[:, "a.c"].values[c_locs] == 1.0)
def test_string_conversion():
t, n = 3, 1000
- string = np.random.choice(['a', 'b', 'c'], (t, n))
+ string = np.random.choice(["a", "b", "c"], (t, n))
num = np.random.randn(t, n)
- time = date_range('1-1-2000', periods=t)
- entities = ['entity.{0}'.format(i) for i in range(n)]
- p = panel_to_frame(None, items=['a', 'b'], major_axis=time, minor_axis=entities,
- swap=True)
- p['a'] = string.T.ravel()
- p['b'] = num.T.ravel()
- p = p[['a', 'b']]
- panel = PanelData(p, var_name='OtherEffect')
+ time = date_range("1-1-2000", periods=t)
+ entities = ["entity.{0}".format(i) for i in range(n)]
+ p = panel_to_frame(
+ None, items=["a", "b"], major_axis=time, minor_axis=entities, swap=True
+ )
+ p["a"] = string.T.ravel()
+ p["b"] = num.T.ravel()
+ p = p[["a", "b"]]
+ panel = PanelData(p, var_name="OtherEffect")
df = panel.dataframe
assert df.shape == (3000, 3)
s = string.T.ravel()
- a_locs = np.where(s == 'a')
- b_locs = np.where(s == 'b')
- c_locs = np.where(s == 'c')
- assert np.all(df.loc[:, 'a.b'].values[a_locs] == 0.0)
- assert np.all(df.loc[:, 'a.b'].values[b_locs] == 1.0)
- assert np.all(df.loc[:, 'a.b'].values[c_locs] == 0.0)
+ a_locs = np.where(s == "a")
+ b_locs = np.where(s == "b")
+ c_locs = np.where(s == "c")
+ assert np.all(df.loc[:, "a.b"].values[a_locs] == 0.0)
+ assert np.all(df.loc[:, "a.b"].values[b_locs] == 1.0)
+ assert np.all(df.loc[:, "a.b"].values[c_locs] == 0.0)
- assert np.all(df.loc[:, 'a.c'].values[a_locs] == 0.0)
- assert np.all(df.loc[:, 'a.c'].values[b_locs] == 0.0)
- assert np.all(df.loc[:, 'a.c'].values[c_locs] == 1.0)
+ assert np.all(df.loc[:, "a.c"].values[a_locs] == 0.0)
+ assert np.all(df.loc[:, "a.c"].values[b_locs] == 0.0)
+ assert np.all(df.loc[:, "a.c"].values[c_locs] == 1.0)
def test_string_nonconversion():
t, n = 3, 1000
- string = np.random.choice(['a', 'b', 'c'], (t, n))
+ string = np.random.choice(["a", "b", "c"], (t, n))
num = np.random.randn(t, n)
- time = date_range('1-1-2000', periods=t)
- entities = ['entity.{0}'.format(i) for i in range(n)]
- p = panel_to_frame(None, items=['a', 'b'], major_axis=time, minor_axis=entities,
- swap=True)
- p['a'] = string.T.ravel()
- p['b'] = num.T.ravel()
- panel = PanelData(p, var_name='OtherEffect', convert_dummies=False)
- assert is_string_dtype(panel.dataframe['a'].dtype)
- assert np.all(panel.dataframe['a'] == string.T.ravel())
+ time = date_range("1-1-2000", periods=t)
+ entities = ["entity.{0}".format(i) for i in range(n)]
+ p = panel_to_frame(
+ None, items=["a", "b"], major_axis=time, minor_axis=entities, swap=True
+ )
+ p["a"] = string.T.ravel()
+ p["b"] = num.T.ravel()
+ panel = PanelData(p, var_name="OtherEffect", convert_dummies=False)
+ assert is_string_dtype(panel.dataframe["a"].dtype)
+ assert np.all(panel.dataframe["a"] == string.T.ravel())
def test_repr_html(mi_df):
data = PanelData(mi_df)
html = data._repr_html_()
- assert '
' in html
+ assert "
" in html
def test_general_demean_oneway(mi_df):
y = PanelData(mi_df)
- dm1 = y.demean('entity')
+ dm1 = y.demean("entity")
g = DataFrame(y.entity_ids, index=y.index)
dm2 = y.general_demean(g)
assert_allclose(dm1.values2d, dm2.values2d)
- dm1 = y.demean('time')
+ dm1 = y.demean("time")
g = DataFrame(y.time_ids, index=y.index)
dm2 = y.general_demean(g)
assert_allclose(dm1.values2d, dm2.values2d)
@@ -649,9 +674,9 @@ def test_general_demean_oneway(mi_df):
def test_general_demean_twoway(mi_df):
y = PanelData(mi_df)
- dm1 = y.demean('both')
+ dm1 = y.demean("both")
g = DataFrame(y.entity_ids, index=y.index)
- g['column2'] = Series(y.time_ids.squeeze(), index=y.index)
+ g["column2"] = Series(y.time_ids.squeeze(), index=y.index)
dm2 = y.general_demean(g)
assert_allclose(dm1.values2d, dm2.values2d)
@@ -668,7 +693,7 @@ def test_general_demean_twoway(mi_df):
def test_general_unit_weighted_demean_oneway(mi_df):
y = PanelData(mi_df)
- dm1 = y.demean('entity')
+ dm1 = y.demean("entity")
g = PanelData(DataFrame(y.entity_ids, index=y.index))
weights = PanelData(g).copy()
weights.dataframe.iloc[:, :] = 1
@@ -677,15 +702,14 @@ def test_general_unit_weighted_demean_oneway(mi_df):
dm3 = y.general_demean(g)
assert_allclose(dm3.values2d, dm2.values2d)
- dm1 = y.demean('time')
+ dm1 = y.demean("time")
g = PanelData(DataFrame(y.time_ids, index=y.index))
dm2 = y.general_demean(g, weights)
assert_allclose(dm1.values2d, dm2.values2d)
dm3 = y.general_demean(g)
assert_allclose(dm3.values2d, dm2.values2d)
- g = PanelData(DataFrame(np.random.randint(0, 10, g.dataframe.shape),
- index=y.index))
+ g = PanelData(DataFrame(np.random.randint(0, 10, g.dataframe.shape), index=y.index))
dm2 = y.general_demean(g, weights)
dm3 = y.general_demean(g)
g = Categorical(g.dataframe.iloc[:, 0])
@@ -698,21 +722,21 @@ def test_general_unit_weighted_demean_oneway(mi_df):
def test_general_weighted_demean_oneway(mi_df):
y = PanelData(mi_df)
weights = DataFrame(
- np.random.chisquare(10, (y.dataframe.shape[0], 1)) / 10, index=y.index)
+ np.random.chisquare(10, (y.dataframe.shape[0], 1)) / 10, index=y.index
+ )
w = PanelData(weights)
- dm1 = y.demean('entity', weights=w)
+ dm1 = y.demean("entity", weights=w)
g = PanelData(DataFrame(y.entity_ids, index=y.index))
dm2 = y.general_demean(g, w)
assert_allclose(dm1.values2d, dm2.values2d)
- dm1 = y.demean('time', weights=w)
+ dm1 = y.demean("time", weights=w)
g = PanelData(DataFrame(y.time_ids, index=y.index))
dm2 = y.general_demean(g, w)
assert_allclose(dm1.values2d, dm2.values2d)
- g = PanelData(DataFrame(np.random.randint(0, 10, g.dataframe.shape),
- index=y.index))
+ g = PanelData(DataFrame(np.random.randint(0, 10, g.dataframe.shape), index=y.index))
dm2 = y.general_demean(g, w)
g = Categorical(g.dataframe.iloc[:, 0])
d = get_dummies(g)
@@ -726,15 +750,15 @@ def test_general_unit_weighted_demean_twoway(mi_df):
np.random.seed(12345)
y = PanelData(mi_df)
weights = DataFrame(
- np.random.chisquare(10, (y.dataframe.shape[0], 1)) / 10, index=y.index)
+ np.random.chisquare(10, (y.dataframe.shape[0], 1)) / 10, index=y.index
+ )
w = PanelData(weights)
- dm1 = y.demean('both', weights=w)
+ dm1 = y.demean("both", weights=w)
g = DataFrame(y.entity_ids, index=y.index)
- g['column2'] = Series(y.time_ids.squeeze(), index=y.index)
+ g["column2"] = Series(y.time_ids.squeeze(), index=y.index)
dm2 = y.general_demean(g, weights=w)
- assert_allclose(dm1.values2d - dm2.values2d, np.zeros_like(dm2.values2d),
- atol=1e-7)
+ assert_allclose(dm1.values2d - dm2.values2d, np.zeros_like(dm2.values2d), atol=1e-7)
g = DataFrame(np.random.randint(0, 10, g.shape), index=y.index)
dm2 = y.general_demean(g, weights=w)
@@ -761,8 +785,8 @@ def test_original_unmodified(data):
mi_df_y = PanelData(data.y).dataframe
mi_df_x = PanelData(data.x).dataframe
- mi_df_y.index.names = ['firm', 'period']
- mi_df_x.index.names = ['firm', 'period']
+ mi_df_y.index.names = ["firm", "period"]
+ mi_df_x.index.names = ["firm", "period"]
mi_df_w = PanelData(data.w).dataframe
pre_y = mi_df_y.copy()
pre_x = mi_df_x.copy()
@@ -784,38 +808,44 @@ def test_original_unmodified(data):
def test_incorrect_time_axis():
x = np.random.randn(3, 3, 1000)
- entities = ['entity.{0}'.format(i) for i in range(1000)]
- time = ['time.{0}'.format(i) for i in range(3)]
- var_names = ['var.{0}'.format(i) for i in range(3)]
- p = panel_to_frame(x, items=var_names, major_axis=time, minor_axis=entities,
- swap=True)
+ entities = ["entity.{0}".format(i) for i in range(1000)]
+ time = ["time.{0}".format(i) for i in range(3)]
+ var_names = ["var.{0}".format(i) for i in range(3)]
+ p = panel_to_frame(
+ x, items=var_names, major_axis=time, minor_axis=entities, swap=True
+ )
with pytest.raises(ValueError):
PanelData(p)
time = [1, 2, 3]
- var_names = ['var.{0}'.format(i) for i in range(3)]
- p = panel_to_frame(x, items=var_names, major_axis=time, minor_axis=entities,
- swap=True)
- p.index = p.index.set_levels([1, datetime(1960, 1, 1), 'a'], 1)
+ var_names = ["var.{0}".format(i) for i in range(3)]
+ p = panel_to_frame(
+ x, items=var_names, major_axis=time, minor_axis=entities, swap=True
+ )
+ p.index = p.index.set_levels([1, datetime(1960, 1, 1), "a"], 1)
with pytest.raises(ValueError):
PanelData(p)
-@pytest.mark.skipif(MISSING_XARRAY, reason='xarray is not installed')
+@pytest.mark.skipif(MISSING_XARRAY, reason="xarray is not installed")
def test_incorrect_time_axis_xarray():
x = np.random.randn(3, 3, 1000)
- entities = ['entity.{0}'.format(i) for i in range(1000)]
- time = ['time.{0}'.format(i) for i in range(3)]
- vars = ['x.{0}'.format(i) for i in range(3)]
- da = xr.DataArray(x, coords={'entities': entities, 'time': time,
- 'vars': vars},
- dims=['vars', 'time', 'entities'])
+ entities = ["entity.{0}".format(i) for i in range(1000)]
+ time = ["time.{0}".format(i) for i in range(3)]
+ vars = ["x.{0}".format(i) for i in range(3)]
+ da = xr.DataArray(
+ x,
+ coords={"entities": entities, "time": time, "vars": vars},
+ dims=["vars", "time", "entities"],
+ )
with pytest.raises(ValueError):
PanelData(da)
- da = xr.DataArray(x, coords={'entities': entities, 'time': time,
- 'vars': vars},
- dims=['vars', 'time', 'entities'])
+ da = xr.DataArray(
+ x,
+ coords={"entities": entities, "time": time, "vars": vars},
+ dims=["vars", "time", "entities"],
+ )
with pytest.raises(ValueError):
PanelData(da)
@@ -829,8 +859,8 @@ def test_named_index(data):
data.x.index.set_names([None, None], inplace=True)
pdata = PanelData(data.x)
- assert pdata.dataframe.index.levels[0].name == 'entity'
- assert pdata.dataframe.index.levels[1].name == 'time'
+ assert pdata.dataframe.index.levels[0].name == "entity"
+ assert pdata.dataframe.index.levels[1].name == "time"
def test_fake_panel_properties(mi_df):
diff --git a/linearmodels/tests/panel/test_fama_macbeth.py b/linearmodels/tests/panel/test_fama_macbeth.py
index e1b99b6fc5..1b0d5f4f09 100644
--- a/linearmodels/tests/panel/test_fama_macbeth.py
+++ b/linearmodels/tests/panel/test_fama_macbeth.py
@@ -15,18 +15,22 @@
from linearmodels.utility import (InferenceUnavailableWarning,
MissingValueWarning)
-pytestmark = pytest.mark.filterwarnings('ignore::linearmodels.utility.MissingValueWarning')
+pytestmark = pytest.mark.filterwarnings(
+ "ignore::linearmodels.utility.MissingValueWarning"
+)
missing = [0.0, 0.20]
has_const = [True, False]
perms = list(product(missing, datatypes, has_const))
-ids = list(map(lambda s: '-'.join(map(str, s)), perms))
+ids = list(map(lambda s: "-".join(map(str, s)), perms))
@pytest.fixture(params=perms, ids=ids)
def data(request):
missing, datatype, const = request.param
- return generate_data(missing, datatype, const=const, other_effects=1, ntk=(25, 200, 5))
+ return generate_data(
+ missing, datatype, const=const, other_effects=1, ntk=(25, 200, 5)
+ )
def test_fama_macbeth(data):
@@ -60,17 +64,17 @@ def test_fama_macbeth(data):
def test_unknown_cov_type(data):
with pytest.raises(ValueError):
- FamaMacBeth(data.y, data.x).fit(cov_type='unknown')
+ FamaMacBeth(data.y, data.x).fit(cov_type="unknown")
def test_fama_macbeth_kernel_smoke(data):
- FamaMacBeth(data.y, data.x).fit(cov_type='kernel')
- FamaMacBeth(data.y, data.x).fit(cov_type='kernel', kernel='bartlett')
- FamaMacBeth(data.y, data.x).fit(cov_type='kernel', kernel='newey-west')
- FamaMacBeth(data.y, data.x).fit(cov_type='kernel', kernel='parzen')
- FamaMacBeth(data.y, data.x).fit(cov_type='kernel', kernel='qs')
- FamaMacBeth(data.y, data.x).fit(cov_type='kernel', bandwidth=3)
- res = FamaMacBeth(data.y, data.x).fit(cov_type='kernel', kernel='andrews')
+ FamaMacBeth(data.y, data.x).fit(cov_type="kernel")
+ FamaMacBeth(data.y, data.x).fit(cov_type="kernel", kernel="bartlett")
+ FamaMacBeth(data.y, data.x).fit(cov_type="kernel", kernel="newey-west")
+ FamaMacBeth(data.y, data.x).fit(cov_type="kernel", kernel="parzen")
+ FamaMacBeth(data.y, data.x).fit(cov_type="kernel", kernel="qs")
+ FamaMacBeth(data.y, data.x).fit(cov_type="kernel", bandwidth=3)
+ res = FamaMacBeth(data.y, data.x).fit(cov_type="kernel", kernel="andrews")
access_attributes(res)
@@ -79,30 +83,32 @@ def test_fitted_effects_residuals(data):
res = mod.fit()
expected = mod.exog.values2d @ res.params.values
- expected = pd.DataFrame(expected, index=mod.exog.index, columns=['fitted_values'])
+ expected = pd.DataFrame(expected, index=mod.exog.index, columns=["fitted_values"])
assert_allclose(res.fitted_values, expected)
assert_frame_similar(res.fitted_values, expected)
expected.iloc[:, 0] = mod.dependent.values2d - expected.values
- expected.columns = ['idiosyncratic']
+ expected.columns = ["idiosyncratic"]
assert_allclose(res.idiosyncratic, expected)
assert_frame_similar(res.idiosyncratic, expected)
expected.iloc[:, 0] = np.nan
- expected.columns = ['estimated_effects']
+ expected.columns = ["estimated_effects"]
assert_allclose(res.estimated_effects, expected)
assert_frame_similar(res.estimated_effects, expected)
-@pytest.mark.filterwarnings('always::linearmodels.utility.MissingValueWarning')
+@pytest.mark.filterwarnings("always::linearmodels.utility.MissingValueWarning")
def test_block_size_warnings():
y = np.arange(12.0)[:, None]
x = np.ones((12, 3))
x[:, 1] = np.arange(12.0)
x[:, 2] = np.arange(12.0) ** 2
- idx = pd.MultiIndex.from_product([['a', 'b', 'c'], pd.date_range('2000-1-1', periods=4)])
- y = pd.DataFrame(y, index=idx, columns=['y'])
- x = pd.DataFrame(x, index=idx, columns=['x1', 'x2', 'x3'])
+ idx = pd.MultiIndex.from_product(
+ [["a", "b", "c"], pd.date_range("2000-1-1", periods=4)]
+ )
+ y = pd.DataFrame(y, index=idx, columns=["y"])
+ x = pd.DataFrame(x, index=idx, columns=["x1", "x2", "x3"])
with pytest.warns(MissingValueWarning):
FamaMacBeth(y.iloc[:11], x.iloc[:11])
with pytest.warns(InferenceUnavailableWarning):
@@ -114,8 +120,10 @@ def test_block_size_error():
x = np.ones((12, 2))
x[1::4, 1] = 2
x[2::4, 1] = 3
- idx = pd.MultiIndex.from_product([['a', 'b', 'c'], pd.date_range('2000-1-1', periods=4)])
- y = pd.DataFrame(y, index=idx, columns=['y'])
- x = pd.DataFrame(x, index=idx, columns=['x1', 'x2'])
+ idx = pd.MultiIndex.from_product(
+ [["a", "b", "c"], pd.date_range("2000-1-1", periods=4)]
+ )
+ y = pd.DataFrame(y, index=idx, columns=["y"])
+ x = pd.DataFrame(x, index=idx, columns=["x1", "x2"])
with pytest.raises(ValueError):
FamaMacBeth(y, x)
diff --git a/linearmodels/tests/panel/test_firstdifference_ols.py b/linearmodels/tests/panel/test_firstdifference_ols.py
index 3886aeb054..0f45b41845 100644
--- a/linearmodels/tests/panel/test_firstdifference_ols.py
+++ b/linearmodels/tests/panel/test_firstdifference_ols.py
@@ -12,11 +12,13 @@
assert_results_equal, datatypes,
generate_data)
-pytestmark = pytest.mark.filterwarnings('ignore::linearmodels.utility.MissingValueWarning')
+pytestmark = pytest.mark.filterwarnings(
+ "ignore::linearmodels.utility.MissingValueWarning"
+)
missing = [0.0, 0.20]
perms = list(product(missing, datatypes))
-ids = list(map(lambda s: '-'.join(map(str, s)), perms))
+ids = list(map(lambda s: "-".join(map(str, s)), perms))
@pytest.fixture(params=perms, ids=ids)
@@ -32,16 +34,22 @@ def test_firstdifference_ols(data):
y = mod.dependent.values3d
x = mod.exog.values3d
dy = np.array(y[0, 1:] - y[0, :-1])
- dy = pd.DataFrame(dy, index=mod.dependent.panel.major_axis[1:],
- columns=mod.dependent.panel.minor_axis)
+ dy = pd.DataFrame(
+ dy,
+ index=mod.dependent.panel.major_axis[1:],
+ columns=mod.dependent.panel.minor_axis,
+ )
dy = dy.T.stack()
dy = dy.reindex(mod.dependent.index)
dx = x[:, 1:] - x[:, :-1]
_dx = {}
for i, dxi in enumerate(dx):
- temp = pd.DataFrame(dxi, index=mod.dependent.panel.major_axis[1:],
- columns=mod.dependent.panel.minor_axis)
+ temp = pd.DataFrame(
+ dxi,
+ index=mod.dependent.panel.major_axis[1:],
+ columns=mod.dependent.panel.minor_axis,
+ )
temp = temp.T.stack()
temp = temp.reindex(mod.dependent.index)
_dx[mod.exog.vars[i]] = temp
@@ -54,30 +62,32 @@ def test_firstdifference_ols(data):
dx = dx.loc[~drop]
ols_mod = IV2SLS(dy, dx, None, None)
- ols_res = ols_mod.fit(cov_type='unadjusted')
+ ols_res = ols_mod.fit(cov_type="unadjusted")
assert_results_equal(res, ols_res)
- res = mod.fit(cov_type='robust', debiased=False)
- ols_res = ols_mod.fit(cov_type='robust')
+ res = mod.fit(cov_type="robust", debiased=False)
+ ols_res = ols_mod.fit(cov_type="robust")
assert_results_equal(res, ols_res)
clusters = data.vc1
ols_clusters = mod.reformat_clusters(data.vc1)
fd = mod.dependent.first_difference()
ols_clusters = ols_clusters.dataframe.loc[fd.index]
- res = mod.fit(cov_type='clustered', clusters=clusters, debiased=False)
- ols_res = ols_mod.fit(cov_type='clustered', clusters=ols_clusters)
+ res = mod.fit(cov_type="clustered", clusters=clusters, debiased=False)
+ ols_res = ols_mod.fit(cov_type="clustered", clusters=ols_clusters)
assert_results_equal(res, ols_res)
- res = mod.fit(cov_type='clustered', cluster_entity=True, debiased=False)
+ res = mod.fit(cov_type="clustered", cluster_entity=True, debiased=False)
entity_clusters = mod.dependent.first_difference().entity_ids
- ols_res = ols_mod.fit(cov_type='clustered', clusters=entity_clusters)
+ ols_res = ols_mod.fit(cov_type="clustered", clusters=entity_clusters)
assert_results_equal(res, ols_res)
- ols_clusters['entity.clusters'] = entity_clusters
+ ols_clusters["entity.clusters"] = entity_clusters
ols_clusters = ols_clusters.astype(np.int32)
- res = mod.fit(cov_type='clustered', cluster_entity=True, clusters=data.vc1, debiased=False)
- ols_res = ols_mod.fit(cov_type='clustered', clusters=ols_clusters)
+ res = mod.fit(
+ cov_type="clustered", cluster_entity=True, clusters=data.vc1, debiased=False
+ )
+ ols_res = ols_mod.fit(cov_type="clustered", clusters=ols_clusters)
assert_results_equal(res, ols_res)
@@ -88,16 +98,22 @@ def test_firstdifference_ols_weighted(data):
y = mod.dependent.values3d
x = mod.exog.values3d
dy = np.array(y[0, 1:] - y[0, :-1])
- dy = pd.DataFrame(dy, index=mod.dependent.panel.major_axis[1:],
- columns=mod.dependent.panel.minor_axis)
+ dy = pd.DataFrame(
+ dy,
+ index=mod.dependent.panel.major_axis[1:],
+ columns=mod.dependent.panel.minor_axis,
+ )
dy = dy.T.stack()
dy = dy.reindex(mod.dependent.index)
dx = x[:, 1:] - x[:, :-1]
_dx = {}
for i, dxi in enumerate(dx):
- temp = pd.DataFrame(dxi, index=mod.dependent.panel.major_axis[1:],
- columns=mod.dependent.panel.minor_axis)
+ temp = pd.DataFrame(
+ dxi,
+ index=mod.dependent.panel.major_axis[1:],
+ columns=mod.dependent.panel.minor_axis,
+ )
temp = temp.T.stack()
temp = temp.reindex(mod.dependent.index)
_dx[mod.exog.vars[i]] = temp
@@ -109,8 +125,11 @@ def test_firstdifference_ols_weighted(data):
w = mod.weights.values3d
w = 1.0 / w
sw = w[0, 1:] + w[0, :-1]
- sw = pd.DataFrame(sw, index=mod.dependent.panel.major_axis[1:],
- columns=mod.dependent.panel.minor_axis)
+ sw = pd.DataFrame(
+ sw,
+ index=mod.dependent.panel.major_axis[1:],
+ columns=mod.dependent.panel.minor_axis,
+ )
sw = sw.T.stack()
sw = sw.reindex(mod.dependent.index)
sw = 1.0 / sw
@@ -122,11 +141,11 @@ def test_firstdifference_ols_weighted(data):
sw = sw.loc[~drop]
ols_mod = IV2SLS(dy, dx, None, None, weights=sw)
- ols_res = ols_mod.fit(cov_type='unadjusted')
+ ols_res = ols_mod.fit(cov_type="unadjusted")
assert_results_equal(res, ols_res)
- res = mod.fit(cov_type='robust', debiased=False)
- ols_res = ols_mod.fit(cov_type='robust')
+ res = mod.fit(cov_type="robust", debiased=False)
+ ols_res = ols_mod.fit(cov_type="robust")
assert_results_equal(res, ols_res)
clusters = data.vc1
@@ -134,8 +153,8 @@ def test_firstdifference_ols_weighted(data):
fd = mod.dependent.first_difference()
ols_clusters = ols_clusters.dataframe.loc[fd.index]
- res = mod.fit(cov_type='clustered', clusters=clusters, debiased=False)
- ols_res = ols_mod.fit(cov_type='clustered', clusters=ols_clusters)
+ res = mod.fit(cov_type="clustered", clusters=clusters, debiased=False)
+ ols_res = ols_mod.fit(cov_type="clustered", clusters=ols_clusters)
assert_results_equal(res, ols_res)
@@ -153,7 +172,7 @@ def test_first_difference_errors(data):
if not isinstance(data.x, pd.DataFrame):
return
x = data.x.copy()
- x['Intercept'] = 1.0
+ x["Intercept"] = 1.0
with pytest.raises(ValueError):
FirstDifferenceOLS(data.y, x)
@@ -173,7 +192,7 @@ def test_firstdifference_error(data):
clusters.iloc[::3, :] = clusters.iloc[::3, :] + 1
with pytest.raises(ValueError):
- mod.fit(cov_type='clustered', clusters=clusters)
+ mod.fit(cov_type="clustered", clusters=clusters)
def test_fitted_effects_residuals(data):
@@ -181,16 +200,16 @@ def test_fitted_effects_residuals(data):
res = mod.fit()
expected = mod.exog.values2d @ res.params.values
- expected = pd.DataFrame(expected, index=mod.exog.index, columns=['fitted_values'])
+ expected = pd.DataFrame(expected, index=mod.exog.index, columns=["fitted_values"])
assert_allclose(res.fitted_values, expected)
assert_frame_similar(res.fitted_values, expected)
expected.iloc[:, 0] = mod.dependent.values2d - expected.values
- expected.columns = ['idiosyncratic']
+ expected.columns = ["idiosyncratic"]
assert_allclose(res.idiosyncratic, expected)
assert_frame_similar(res.idiosyncratic, expected)
expected.iloc[:, 0] = np.nan
- expected.columns = ['estimated_effects']
+ expected.columns = ["estimated_effects"]
assert_allclose(res.estimated_effects, expected)
assert_frame_similar(res.estimated_effects, expected)
diff --git a/linearmodels/tests/panel/test_formula.py b/linearmodels/tests/panel/test_formula.py
index cdd2a6fc18..4760ad0245 100644
--- a/linearmodels/tests/panel/test_formula.py
+++ b/linearmodels/tests/panel/test_formula.py
@@ -13,23 +13,29 @@
PanelOLS, PooledOLS, RandomEffects)
from linearmodels.tests.panel._utility import datatypes, generate_data
-pytestmark = pytest.mark.filterwarnings('ignore::linearmodels.utility.MissingValueWarning')
+pytestmark = pytest.mark.filterwarnings(
+ "ignore::linearmodels.utility.MissingValueWarning"
+)
PERC_MISSING = [0, 0.02, 0.10, 0.33]
TYPES = datatypes
-@pytest.fixture(params=list(product(PERC_MISSING, TYPES)),
- ids=list(map(lambda x: str(int(100 * x[0])) + '-' + str(x[1]),
- product(PERC_MISSING, TYPES))))
+@pytest.fixture(
+ params=list(product(PERC_MISSING, TYPES)),
+ ids=list(
+ map(
+ lambda x: str(int(100 * x[0])) + "-" + str(x[1]),
+ product(PERC_MISSING, TYPES),
+ )
+ ),
+)
def data(request):
missing, datatype = request.param
return generate_data(missing, datatype, ntk=(91, 7, 5))
-@pytest.fixture(params=['y ~ x1 + x2',
- 'y ~ x0 + x1 + x2 + x3 + x4 '],
- scope='module')
+@pytest.fixture(params=["y ~ x1 + x2", "y ~ x0 + x1 + x2 + x3 + x4 "], scope="module")
def formula(request):
return request.param
@@ -56,7 +62,7 @@ def test_basic_formulas(data, models, formula):
if not isinstance(data.y, DataFrame):
return
joined = data.x
- joined['y'] = data.y
+ joined["y"] = data.y
model, formula_func = models
mod = model.from_formula(formula, joined)
res = mod.fit()
@@ -67,8 +73,8 @@ def test_basic_formulas(data, models, formula):
res2 = mod2.fit()
np.testing.assert_allclose(res.params, res2.params)
- parts = formula.split('~')
- vars = parts[1].replace(' 1 ', ' const ').split('+')
+ parts = formula.split("~")
+ vars = parts[1].replace(" 1 ", " const ").split("+")
vars = list(map(lambda s: s.strip(), vars))
x = data.x
res2 = model(data.y, x[vars]).fit()
@@ -81,9 +87,9 @@ def test_basic_formulas(data, models, formula):
if model is FirstDifferenceOLS:
return
- formula = formula.split('~')
- formula[1] = ' 1 + ' + formula[1]
- formula = '~'.join(formula)
+ formula = formula.split("~")
+ formula[1] = " 1 + " + formula[1]
+ formula = "~".join(formula)
mod = model.from_formula(formula, joined)
res = mod.fit()
@@ -91,8 +97,8 @@ def test_basic_formulas(data, models, formula):
res2 = mod2.fit()
np.testing.assert_allclose(res.params, res2.params)
- x['Intercept'] = 1.0
- vars = ['Intercept'] + vars
+ x["Intercept"] = 1.0
+ vars = ["Intercept"] + vars
mod2 = model(data.y, x[vars])
res2 = mod2.fit()
np.testing.assert_allclose(res.params, res2.params)
@@ -103,9 +109,9 @@ def test_basic_formulas_math_op(data, models, formula):
if not isinstance(data.y, DataFrame):
return
joined = data.x
- joined['y'] = data.y
- formula = formula.replace('x0', 'np.exp(x0)')
- formula = formula.replace('x1', 'sigmoid(x1)')
+ joined["y"] = data.y
+ formula = formula.replace("x0", "np.exp(x0)")
+ formula = formula.replace("x1", "sigmoid(x1)")
model, formula_func = models
res = model.from_formula(formula, joined).fit()
pred = res.predict(data=joined)
@@ -117,8 +123,8 @@ def test_panel_ols_formulas_math_op(data):
if not isinstance(data.y, DataFrame):
return
joined = data.x
- joined['y'] = data.y
- formula = 'y ~ x1 + np.exp(x2)'
+ joined["y"] = data.y
+ formula = "y ~ x1 + np.exp(x2)"
mod = PanelOLS.from_formula(formula, joined)
mod.fit()
@@ -127,24 +133,24 @@ def test_panel_ols_formula(data):
if not isinstance(data.y, DataFrame):
return
joined = data.x
- joined['y'] = data.y
- formula = 'y ~ x1 + x2'
+ joined["y"] = data.y
+ formula = "y ~ x1 + x2"
mod = PanelOLS.from_formula(formula, joined)
assert mod.formula == formula
- formula = 'y ~ x1 + x2 + EntityEffects'
+ formula = "y ~ x1 + x2 + EntityEffects"
mod = PanelOLS.from_formula(formula, joined)
assert mod.formula == formula
assert mod.entity_effects is True
assert mod.time_effects is False
- formula = 'y ~ x1 + x2 + TimeEffects'
+ formula = "y ~ x1 + x2 + TimeEffects"
mod = PanelOLS.from_formula(formula, joined)
assert mod.formula == formula
assert mod.time_effects is True
assert mod.entity_effects is False
- formula = 'y ~ x1 + EntityEffects + TimeEffects + x2 '
+ formula = "y ~ x1 + EntityEffects + TimeEffects + x2 "
mod = PanelOLS.from_formula(formula, joined)
assert mod.formula == formula
assert mod.entity_effects is True
@@ -154,7 +160,7 @@ def test_panel_ols_formula(data):
res2 = mod2.fit()
np.testing.assert_allclose(res.params, res2.params)
- formula = 'y ~ x1 + EntityEffects + FixedEffects + x2 '
+ formula = "y ~ x1 + EntityEffects + FixedEffects + x2 "
with pytest.raises(ValueError):
PanelOLS.from_formula(formula, joined)
@@ -163,7 +169,7 @@ def test_basic_formulas_predict(data, models, formula):
if not isinstance(data.y, DataFrame):
return
joined = data.x
- joined['y'] = data.y
+ joined["y"] = data.y
model, formula_func = models
mod = model.from_formula(formula, joined)
res = mod.fit()
@@ -174,8 +180,8 @@ def test_basic_formulas_predict(data, models, formula):
pred2 = res2.predict(data=joined)
np.testing.assert_allclose(pred.values, pred2.values, atol=1e-8)
- parts = formula.split('~')
- vars = parts[1].replace(' 1 ', ' const ').split('+')
+ parts = formula.split("~")
+ vars = parts[1].replace(" 1 ", " const ").split("+")
vars = list(map(lambda s: s.strip(), vars))
x = data.x
res2 = model(data.y, x[vars]).fit()
@@ -187,15 +193,15 @@ def test_basic_formulas_predict(data, models, formula):
if model is FirstDifferenceOLS:
return
- formula = formula.split('~')
- formula[1] = ' 1 + ' + formula[1]
- formula = '~'.join(formula)
+ formula = formula.split("~")
+ formula[1] = " 1 + " + formula[1]
+ formula = "~".join(formula)
mod = model.from_formula(formula, joined)
res = mod.fit()
pred = res.predict(data=joined)
- x['Intercept'] = 1.0
- vars = ['Intercept'] + vars
+ x["Intercept"] = 1.0
+ vars = ["Intercept"] + vars
mod2 = model(data.y, x[vars])
res2 = mod2.fit()
pred2 = res.predict(x[vars])
@@ -208,7 +214,7 @@ def test_formulas_predict_error(data, models, formula):
if not isinstance(data.y, DataFrame):
return
joined = data.x
- joined['y'] = data.y
+ joined["y"] = data.y
model, formula_func = models
mod = model.from_formula(formula, joined)
res = mod.fit()
@@ -217,8 +223,8 @@ def test_formulas_predict_error(data, models, formula):
with pytest.raises(ValueError):
mod.predict(params=res.params, exog=joined, data=joined)
- parts = formula.split('~')
- vars = parts[1].replace(' 1 ', ' const ').split('+')
+ parts = formula.split("~")
+ vars = parts[1].replace(" 1 ", " const ").split("+")
vars = list(map(lambda s: s.strip(), vars))
x = data.x
res = model(data.y, x[vars]).fit()
@@ -230,9 +236,9 @@ def test_parser(data, formula, effects):
if not isinstance(data.y, DataFrame):
return
if effects:
- formula += ' + EntityEffects + TimeEffects'
+ formula += " + EntityEffects + TimeEffects"
joined = data.x
- joined['y'] = data.y
+ joined["y"] = data.y
parser = PanelFormulaParser(formula, joined)
dep, exog = parser.data
assert_frame_equal(parser.dependent, dep)
@@ -241,10 +247,10 @@ def test_parser(data, formula, effects):
assert parser.eval_env == 3
parser.eval_env = 2
assert parser.eval_env == 2
- assert parser.entity_effect == ('EntityEffects' in formula)
- assert parser.time_effect == ('TimeEffects' in formula)
+ assert parser.entity_effect == ("EntityEffects" in formula)
+ assert parser.time_effect == ("TimeEffects" in formula)
- formula += ' + FixedEffects '
+ formula += " + FixedEffects "
if effects:
with pytest.raises(ValueError):
PanelFormulaParser(formula, joined)
diff --git a/linearmodels/tests/panel/test_model.py b/linearmodels/tests/panel/test_model.py
index db2f40657d..b4094842e1 100644
--- a/linearmodels/tests/panel/test_model.py
+++ b/linearmodels/tests/panel/test_model.py
@@ -10,15 +10,23 @@
from linearmodels.panel.utility import AbsorbingEffectError
from linearmodels.tests.panel._utility import datatypes, generate_data, lsdv
-pytestmark = pytest.mark.filterwarnings('ignore::linearmodels.utility.MissingValueWarning')
+pytestmark = pytest.mark.filterwarnings(
+ "ignore::linearmodels.utility.MissingValueWarning"
+)
PERC_MISSING = [0, 0.02, 0.10, 0.33]
TYPES = datatypes
-@pytest.fixture(params=list(product(PERC_MISSING, TYPES)),
- ids=list(map(lambda x: str(int(100 * x[0])) + '-' + str(x[1]),
- product(PERC_MISSING, TYPES))))
+@pytest.fixture(
+ params=list(product(PERC_MISSING, TYPES)),
+ ids=list(
+ map(
+ lambda x: str(int(100 * x[0])) + "-" + str(x[1]),
+ product(PERC_MISSING, TYPES),
+ )
+ ),
+)
def data(request):
missing, datatype = request.param
rng = np.random.RandomState(12345)
@@ -155,7 +163,7 @@ def test_incorrect_weight_shape(data):
w = data.w
if isinstance(w, pd.DataFrame):
entities = w.index.levels[0][:4]
- w = w.loc[pd.IndexSlice[entities[0]:entities[-1]], :]
+ w = w.loc[pd.IndexSlice[entities[0] : entities[-1]], :]
elif isinstance(w, np.ndarray):
w = w[:3]
w = w[None, :, :]
@@ -170,7 +178,7 @@ def test_weight_ambiguity(data):
if isinstance(data.x, pd.DataFrame):
t = len(data.y.index.levels[1])
entities = data.x.index.levels[0]
- slice = pd.IndexSlice[entities[0]:entities[t - 1]]
+ slice = pd.IndexSlice[entities[0] : entities[t - 1]]
x = data.x.loc[slice, :]
else:
t = data.x.shape[1]
@@ -181,7 +189,7 @@ def test_weight_ambiguity(data):
PanelOLS(y, x, weights=weights)
-@pytest.mark.parametrize('intercept', [True, False])
+@pytest.mark.parametrize("intercept", [True, False])
def test_absorbing_effect(data, intercept):
x = data.x.copy()
if isinstance(data.x, pd.DataFrame):
@@ -189,15 +197,15 @@ def test_absorbing_effect(data, intercept):
ntime = len(x.index.levels[1])
temp = data.x.iloc[:, 0].copy()
temp.values[:] = 1.0
- temp.values[:(ntime * (nentity // 2))] = 0
+ temp.values[: (ntime * (nentity // 2))] = 0
if intercept:
- x['Intercept'] = 1.0
- x['absorbed'] = temp
+ x["Intercept"] = 1.0
+ x["absorbed"] = temp
else:
intercept_vals = np.ones((1, x.shape[1], x.shape[2]))
absorbed = np.ones((1, x.shape[1], x.shape[2]))
- absorbed[:, :, :x.shape[2] // 2] = 0
+ absorbed[:, :, : x.shape[2] // 2] = 0
if intercept:
extra = [x, intercept_vals, absorbed]
else:
@@ -209,10 +217,10 @@ def test_absorbing_effect(data, intercept):
mod.fit()
var_names = mod.exog.vars
assert var_names[3] in str(exc_info.value)
- assert (' ' * (2 - intercept) + var_names[-1]) in str(exc_info.value)
+ assert (" " * (2 - intercept) + var_names[-1]) in str(exc_info.value)
-@pytest.mark.filterwarnings('ignore::DeprecationWarning')
+@pytest.mark.filterwarnings("ignore::DeprecationWarning")
def test_all_missing(data):
y = PanelData(data.y)
x = PanelData(data.x)
@@ -220,6 +228,7 @@ def test_all_missing(data):
y.drop(missing)
x.drop(missing)
import warnings
+
with warnings.catch_warnings(record=True) as w:
PanelOLS(y.dataframe, x.dataframe).fit()
assert len(w) == 0
diff --git a/linearmodels/tests/panel/test_panel_covariance.py b/linearmodels/tests/panel/test_panel_covariance.py
index dbdd087e19..48822566a2 100644
--- a/linearmodels/tests/panel/test_panel_covariance.py
+++ b/linearmodels/tests/panel/test_panel_covariance.py
@@ -25,96 +25,162 @@ def setup_class(cls):
cls.cluster5 = np.random.randint(0, 10, (cls.n * cls.t, 3))
def test_heteroskedastic_smoke(self):
- cov = HeteroskedasticCovariance(self.y, self.x, self.params, self.entity_ids,
- self.time_ids, extra_df=0).cov
+ cov = HeteroskedasticCovariance(
+ self.y, self.x, self.params, self.entity_ids, self.time_ids, extra_df=0
+ ).cov
assert cov.shape == (self.k, self.k)
- cov = HeteroskedasticCovariance(self.y, self.x, self.params, self.entity_ids,
- self.time_ids, extra_df=0).cov
+ cov = HeteroskedasticCovariance(
+ self.y, self.x, self.params, self.entity_ids, self.time_ids, extra_df=0
+ ).cov
assert cov.shape == (self.k, self.k)
def test_homoskedastic_smoke(self):
- cov = HomoskedasticCovariance(self.y, self.x, self.params, self.entity_ids, self.time_ids,
- extra_df=0).cov
+ cov = HomoskedasticCovariance(
+ self.y, self.x, self.params, self.entity_ids, self.time_ids, extra_df=0
+ ).cov
assert cov.shape == (self.k, self.k)
- cov = HomoskedasticCovariance(self.y, self.x, self.params, self.entity_ids, self.time_ids,
- extra_df=0).cov
+ cov = HomoskedasticCovariance(
+ self.y, self.x, self.params, self.entity_ids, self.time_ids, extra_df=0
+ ).cov
assert cov.shape == (self.k, self.k)
def test_clustered_covariance_smoke(self):
- cov = ClusteredCovariance(self.y, self.x, self.params, self.entity_ids, self.time_ids,
- extra_df=0).cov
- assert cov.shape == (self.k, self.k)
-
- cov = ClusteredCovariance(self.y, self.x, self.params, self.entity_ids, self.time_ids,
- extra_df=0,
- clusters=self.cluster1).cov
- assert cov.shape == (self.k, self.k)
-
- cov = ClusteredCovariance(self.y, self.x, self.params, self.entity_ids, self.time_ids,
- extra_df=0,
- clusters=self.cluster2, group_debias=True).cov
- assert cov.shape == (self.k, self.k)
-
- cov = ClusteredCovariance(self.y, self.x, self.params, self.entity_ids, self.time_ids,
- extra_df=0,
- clusters=self.cluster3).cov
- assert cov.shape == (self.k, self.k)
- cov = ClusteredCovariance(self.y, self.x, self.params, self.entity_ids, self.time_ids,
- extra_df=0,
- clusters=self.cluster3, group_debias=True).cov
- assert cov.shape == (self.k, self.k)
-
- cov = ClusteredCovariance(self.y, self.x, self.params, self.entity_ids, self.time_ids,
- extra_df=0,
- clusters=self.cluster4).cov
- assert cov.shape == (self.k, self.k)
-
- cov = ClusteredCovariance(self.y, self.x, self.params, self.entity_ids, self.time_ids,
- extra_df=0,
- clusters=self.cluster4, group_debias=True).cov
+ cov = ClusteredCovariance(
+ self.y, self.x, self.params, self.entity_ids, self.time_ids, extra_df=0
+ ).cov
+ assert cov.shape == (self.k, self.k)
+
+ cov = ClusteredCovariance(
+ self.y,
+ self.x,
+ self.params,
+ self.entity_ids,
+ self.time_ids,
+ extra_df=0,
+ clusters=self.cluster1,
+ ).cov
+ assert cov.shape == (self.k, self.k)
+
+ cov = ClusteredCovariance(
+ self.y,
+ self.x,
+ self.params,
+ self.entity_ids,
+ self.time_ids,
+ extra_df=0,
+ clusters=self.cluster2,
+ group_debias=True,
+ ).cov
+ assert cov.shape == (self.k, self.k)
+
+ cov = ClusteredCovariance(
+ self.y,
+ self.x,
+ self.params,
+ self.entity_ids,
+ self.time_ids,
+ extra_df=0,
+ clusters=self.cluster3,
+ ).cov
+ assert cov.shape == (self.k, self.k)
+ cov = ClusteredCovariance(
+ self.y,
+ self.x,
+ self.params,
+ self.entity_ids,
+ self.time_ids,
+ extra_df=0,
+ clusters=self.cluster3,
+ group_debias=True,
+ ).cov
+ assert cov.shape == (self.k, self.k)
+
+ cov = ClusteredCovariance(
+ self.y,
+ self.x,
+ self.params,
+ self.entity_ids,
+ self.time_ids,
+ extra_df=0,
+ clusters=self.cluster4,
+ ).cov
+ assert cov.shape == (self.k, self.k)
+
+ cov = ClusteredCovariance(
+ self.y,
+ self.x,
+ self.params,
+ self.entity_ids,
+ self.time_ids,
+ extra_df=0,
+ clusters=self.cluster4,
+ group_debias=True,
+ ).cov
assert cov.shape == (self.k, self.k)
def test_clustered_covariance_error(self):
with pytest.raises(ValueError):
- ClusteredCovariance(self.y, self.x, self.params, self.entity_ids, self.time_ids,
- extra_df=0,
- clusters=self.cluster5)
+ ClusteredCovariance(
+ self.y,
+ self.x,
+ self.params,
+ self.entity_ids,
+ self.time_ids,
+ extra_df=0,
+ clusters=self.cluster5,
+ )
with pytest.raises(ValueError):
- ClusteredCovariance(self.y, self.x, self.params, self.entity_ids, self.time_ids,
- extra_df=0,
- clusters=self.cluster4[::2])
+ ClusteredCovariance(
+ self.y,
+ self.x,
+ self.params,
+ self.entity_ids,
+ self.time_ids,
+ extra_df=0,
+ clusters=self.cluster4[::2],
+ )
def test_driscoll_kraay_smoke(self):
- cov = DriscollKraay(self.y, self.x, self.params, self.entity_ids, self.time_ids).cov
+ cov = DriscollKraay(
+ self.y, self.x, self.params, self.entity_ids, self.time_ids
+ ).cov
assert cov.shape == (self.k, self.k)
- cov = DriscollKraay(self.y, self.x, self.params, self.entity_ids, self.time_ids,
- kernel='parzen').cov
+ cov = DriscollKraay(
+ self.y, self.x, self.params, self.entity_ids, self.time_ids, kernel="parzen"
+ ).cov
assert cov.shape == (self.k, self.k)
- cov = DriscollKraay(self.y, self.x, self.params, self.entity_ids, self.time_ids,
- bandwidth=12).cov
+ cov = DriscollKraay(
+ self.y, self.x, self.params, self.entity_ids, self.time_ids, bandwidth=12
+ ).cov
assert cov.shape == (self.k, self.k)
def test_ac_covariance_smoke(self):
- cov = ACCovariance(self.y, self.x, self.params, self.entity_ids, self.time_ids).cov
+ cov = ACCovariance(
+ self.y, self.x, self.params, self.entity_ids, self.time_ids
+ ).cov
assert cov.shape == (self.k, self.k)
- cov = ACCovariance(self.y, self.x, self.params, self.entity_ids, self.time_ids,
- kernel='parzen').cov
+ cov = ACCovariance(
+ self.y, self.x, self.params, self.entity_ids, self.time_ids, kernel="parzen"
+ ).cov
assert cov.shape == (self.k, self.k)
- cov = ACCovariance(self.y, self.x, self.params, self.entity_ids, self.time_ids,
- bandwidth=12).cov
+ cov = ACCovariance(
+ self.y, self.x, self.params, self.entity_ids, self.time_ids, bandwidth=12
+ ).cov
assert cov.shape == (self.k, self.k)
def test_covariance_manager():
- cm = CovarianceManager('made-up-class', HomoskedasticCovariance, HeteroskedasticCovariance)
+ cm = CovarianceManager(
+ "made-up-class", HomoskedasticCovariance, HeteroskedasticCovariance
+ )
with pytest.raises(ValueError):
- cm['clustered']
+ cm["clustered"]
with pytest.raises(KeyError):
- cm['unknown']
+ cm["unknown"]
- assert cm['unadjusted'] is HomoskedasticCovariance
- assert cm['homoskedastic'] is HomoskedasticCovariance
- assert cm['robust'] is HeteroskedasticCovariance
- assert cm['heteroskedastic'] is HeteroskedasticCovariance
+ assert cm["unadjusted"] is HomoskedasticCovariance
+ assert cm["homoskedastic"] is HomoskedasticCovariance
+ assert cm["robust"] is HeteroskedasticCovariance
+ assert cm["heteroskedastic"] is HeteroskedasticCovariance
diff --git a/linearmodels/tests/panel/test_panel_ols.py b/linearmodels/tests/panel/test_panel_ols.py
index 2d2a21f775..5c7f41bcd5 100644
--- a/linearmodels/tests/panel/test_panel_ols.py
+++ b/linearmodels/tests/panel/test_panel_ols.py
@@ -10,29 +10,33 @@
from linearmodels.iv.model import IV2SLS
from linearmodels.panel.data import PanelData
from linearmodels.panel.model import PanelOLS, PooledOLS
+from linearmodels.panel.utility import AbsorbingEffectWarning
from linearmodels.tests.panel._utility import (access_attributes,
assert_frame_similar,
assert_results_equal, datatypes,
generate_data)
-from linearmodels.panel.utility import AbsorbingEffectWarning
from linearmodels.utility import AttrDict, MemoryWarning
-pytestmark = pytest.mark.filterwarnings('ignore::linearmodels.utility.MissingValueWarning',
- 'ignore:the matrix subclass:PendingDeprecationWarning')
+pytestmark = pytest.mark.filterwarnings(
+ "ignore::linearmodels.utility.MissingValueWarning",
+ "ignore:the matrix subclass:PendingDeprecationWarning",
+)
missing = [0.0, 0.02, 0.20]
has_const = [True, False]
perms = list(product(missing, datatypes, has_const))
-ids = list(map(lambda s: '-'.join(map(str, s)), perms))
+ids = list(map(lambda s: "-".join(map(str, s)), perms))
@pytest.fixture(params=perms, ids=ids)
def data(request):
missing, datatype, const = request.param
- return generate_data(missing, datatype, const=const, ntk=(91, 15, 5), other_effects=2)
+ return generate_data(
+ missing, datatype, const=const, ntk=(91, 15, 5), other_effects=2
+ )
-@pytest.fixture(params=['numpy', 'pandas'])
+@pytest.fixture(params=["numpy", "pandas"])
def absorbed_data(request):
datatype = request.param
rng = np.random.RandomState(12345)
@@ -49,29 +53,37 @@ def absorbed_data(request):
# pandas < 0.24
codes = data.x.index.labels
absorbed = np.array(codes[0]).astype(np.double)
- data.x['x_absorbed'] = absorbed
+ data.x["x_absorbed"] = absorbed
return data
@pytest.fixture(params=perms, ids=ids)
def large_data(request):
missing, datatype, const = request.param
- return generate_data(missing, datatype, const=const, ntk=(51, 71, 5), other_effects=2)
+ return generate_data(
+ missing, datatype, const=const, ntk=(51, 71, 5), other_effects=2
+ )
-singleton_ids = [i for i, p in zip(ids, perms) if p[1] == 'pandas' and not p[-1]]
-singleton_perms = [p for p in perms if p[1] == 'pandas' and not p[-1]]
+singleton_ids = [i for i, p in zip(ids, perms) if p[1] == "pandas" and not p[-1]]
+singleton_perms = [p for p in perms if p[1] == "pandas" and not p[-1]]
@pytest.fixture(params=singleton_perms, ids=singleton_ids)
def singleton_data(request):
missing, datatype, const = request.param
- return generate_data(missing, datatype, const=const, ntk=(91, 15, 5), other_effects=2,
- num_cats=[5 * 91, 15])
+ return generate_data(
+ missing,
+ datatype,
+ const=const,
+ ntk=(91, 15, 5),
+ other_effects=2,
+ num_cats=[5 * 91, 15],
+ )
perms = list(product(missing, datatypes))
-ids = list(map(lambda s: '-'.join(map(str, s)), perms))
+ids = list(map(lambda s: "-".join(map(str, s)), perms))
@pytest.fixture(params=perms, ids=ids)
@@ -81,7 +93,7 @@ def const_data(request):
y = PanelData(data.y).dataframe
x = y.copy()
x.iloc[:, :] = 1
- x.columns = ['Const']
+ x.columns = ["Const"]
return AttrDict(y=y, x=x, w=PanelData(data.w).dataframe)
@@ -95,22 +107,29 @@ def time_eff(request):
return request.param
-perms = [p for p in product([True, False], [True, False], [True, False], [0, 1, 2]) if
- sum(p[1:]) <= 2]
+perms = [
+ p
+ for p in product([True, False], [True, False], [True, False], [0, 1, 2])
+ if sum(p[1:]) <= 2
+]
ids = []
for p in perms:
- str_id = 'weighted' if p[0] else 'unweighted'
- str_id += '-entity_effects' if p[1] else ''
- str_id += '-time_effects' if p[2] else ''
- str_id += '-{0}_other_effects'.format(p[3]) if p[3] else ''
+ str_id = "weighted" if p[0] else "unweighted"
+ str_id += "-entity_effects" if p[1] else ""
+ str_id += "-time_effects" if p[2] else ""
+ str_id += "-{0}_other_effects".format(p[3]) if p[3] else ""
ids.append(str_id)
@pytest.fixture(params=perms, ids=ids)
def lsdv_config(request):
weights, entity_effects, time_effects, other_effects = request.param
- return AttrDict(weights=weights, entity_effects=entity_effects, time_effects=time_effects,
- other_effects=other_effects)
+ return AttrDict(
+ weights=weights,
+ entity_effects=entity_effects,
+ time_effects=time_effects,
+ other_effects=other_effects,
+ )
def test_const_data_only(const_data):
@@ -135,7 +154,7 @@ def test_const_data_entity(const_data):
res = mod.fit(debiased=False)
x = mod.exog.dataframe
- d = mod.dependent.dummies('entity', drop_first=True)
+ d = mod.dependent.dummies("entity", drop_first=True)
d.iloc[:, :] = d.values - x.values @ lstsq(x.values, d.values)[0]
xd = np.c_[x.values, d.values]
@@ -153,7 +172,7 @@ def test_const_data_entity_weights(const_data):
y = mod.dependent.dataframe
w = mod.weights.dataframe
x = mod.exog.dataframe
- d = mod.dependent.dummies('entity', drop_first=True)
+ d = mod.dependent.dummies("entity", drop_first=True)
d_columns = list(d.columns)
root_w = np.sqrt(w.values)
@@ -175,7 +194,7 @@ def test_const_data_time(const_data):
res = mod.fit(debiased=False)
x = mod.exog.dataframe
- d = mod.dependent.dummies('time', drop_first=True)
+ d = mod.dependent.dummies("time", drop_first=True)
d.iloc[:, :] = d.values - x.values @ lstsq(x.values, d.values)[0]
xd = np.c_[x.values, d.values]
@@ -193,7 +212,7 @@ def test_const_data_time_weights(const_data):
y = mod.dependent.dataframe
w = mod.weights.dataframe
x = mod.exog.dataframe
- d = mod.dependent.dummies('time', drop_first=True)
+ d = mod.dependent.dummies("time", drop_first=True)
d_columns = list(d.columns)
root_w = np.sqrt(w.values)
@@ -215,10 +234,10 @@ def test_const_data_both(const_data):
res = mod.fit(debiased=False)
x = mod.exog.dataframe
- d1 = mod.dependent.dummies('entity', drop_first=True)
- d1.columns = ['d.entity.{0}'.format(i) for i in d1]
- d2 = mod.dependent.dummies('time', drop_first=True)
- d2.columns = ['d.time.{0}'.format(i) for i in d2]
+ d1 = mod.dependent.dummies("entity", drop_first=True)
+ d1.columns = ["d.entity.{0}".format(i) for i in d1]
+ d2 = mod.dependent.dummies("time", drop_first=True)
+ d2.columns = ["d.time.{0}".format(i) for i in d2]
d = np.c_[d1.values, d2.values]
d = pd.DataFrame(d, index=x.index, columns=list(d1.columns) + list(d2.columns))
d.iloc[:, :] = d.values - x.values @ lstsq(x.values, d.values)[0]
@@ -238,10 +257,10 @@ def test_const_data_both_weights(const_data):
w = mod.weights.dataframe
x = mod.exog.dataframe
- d1 = mod.dependent.dummies('entity', drop_first=True)
- d1.columns = ['d.entity.{0}'.format(i) for i in d1]
- d2 = mod.dependent.dummies('time', drop_first=True)
- d2.columns = ['d.time.{0}'.format(i) for i in d2]
+ d1 = mod.dependent.dummies("entity", drop_first=True)
+ d1.columns = ["d.entity.{0}".format(i) for i in d1]
+ d2 = mod.dependent.dummies("time", drop_first=True)
+ d2.columns = ["d.time.{0}".format(i) for i in d2]
d = np.c_[d1.values, d2.values]
root_w = np.sqrt(w.values)
z = np.ones_like(x)
@@ -276,53 +295,73 @@ def test_panel_entity_lsdv(data):
y = mod.dependent.dataframe
x = mod.exog.dataframe
if mod.has_constant:
- d = mod.dependent.dummies('entity', drop_first=True)
+ d = mod.dependent.dummies("entity", drop_first=True)
z = np.ones_like(y)
d_demean = d.values - z @ lstsq(z, d.values)[0]
else:
- d = mod.dependent.dummies('entity', drop_first=False)
+ d = mod.dependent.dummies("entity", drop_first=False)
d_demean = d.values
xd = np.c_[x.values, d_demean]
xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + list(d.columns))
ols_mod = IV2SLS(y, xd, None, None)
- res2 = ols_mod.fit(cov_type='unadjusted', debiased=False)
+ res2 = ols_mod.fit(cov_type="unadjusted", debiased=False)
assert_results_equal(res, res2, test_fit=False)
assert_allclose(res.rsquared_inclusive, res2.rsquared)
- res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False)
- res2 = ols_mod.fit(cov_type='robust')
+ res = mod.fit(cov_type="robust", auto_df=False, count_effects=False, debiased=False)
+ res2 = ols_mod.fit(cov_type="robust")
assert_results_equal(res, res2, test_fit=False)
clusters = data.vc1
ols_clusters = mod.reformat_clusters(data.vc1)
- res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False,
- debiased=False)
- res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe)
+ res = mod.fit(
+ cov_type="clustered",
+ clusters=clusters,
+ auto_df=False,
+ count_effects=False,
+ debiased=False,
+ )
+ res2 = ols_mod.fit(cov_type="clustered", clusters=ols_clusters.dataframe)
assert_results_equal(res, res2, test_fit=False)
clusters = data.vc2
ols_clusters = mod.reformat_clusters(data.vc2)
- res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False,
- debiased=False)
- res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe)
+ res = mod.fit(
+ cov_type="clustered",
+ clusters=clusters,
+ auto_df=False,
+ count_effects=False,
+ debiased=False,
+ )
+ res2 = ols_mod.fit(cov_type="clustered", clusters=ols_clusters.dataframe)
assert_results_equal(res, res2, test_fit=False)
- res = mod.fit(cov_type='clustered', cluster_time=True, auto_df=False, count_effects=False,
- debiased=False)
- clusters = pd.DataFrame(mod.dependent.time_ids,
- index=mod.dependent.index,
- columns=['var.clust'])
- res2 = ols_mod.fit(cov_type='clustered', clusters=clusters)
+ res = mod.fit(
+ cov_type="clustered",
+ cluster_time=True,
+ auto_df=False,
+ count_effects=False,
+ debiased=False,
+ )
+ clusters = pd.DataFrame(
+ mod.dependent.time_ids, index=mod.dependent.index, columns=["var.clust"]
+ )
+ res2 = ols_mod.fit(cov_type="clustered", clusters=clusters)
assert_results_equal(res, res2, test_fit=False)
- res = mod.fit(cov_type='clustered', cluster_entity=True, auto_df=False, count_effects=False,
- debiased=False)
- clusters = pd.DataFrame(mod.dependent.entity_ids,
- index=mod.dependent.index,
- columns=['var.clust'])
- res2 = ols_mod.fit(cov_type='clustered', clusters=clusters)
+ res = mod.fit(
+ cov_type="clustered",
+ cluster_entity=True,
+ auto_df=False,
+ count_effects=False,
+ debiased=False,
+ )
+ clusters = pd.DataFrame(
+ mod.dependent.entity_ids, index=mod.dependent.index, columns=["var.clust"]
+ )
+ res2 = ols_mod.fit(cov_type="clustered", clusters=clusters)
assert_results_equal(res, res2, test_fit=False)
@@ -333,22 +372,22 @@ def test_panel_entity_fwl(data):
y = mod.dependent.dataframe
x = mod.exog.dataframe
if mod.has_constant:
- d = mod.dependent.dummies('entity', drop_first=True)
+ d = mod.dependent.dummies("entity", drop_first=True)
z = np.ones_like(y)
d_demean = d.values - z @ lstsq(z, d.values)[0]
else:
- d = mod.dependent.dummies('entity', drop_first=False)
+ d = mod.dependent.dummies("entity", drop_first=False)
d_demean = d.values
x = x - d_demean @ lstsq(d_demean, x)[0]
y = y - d_demean @ lstsq(d_demean, y)[0]
ols_mod = IV2SLS(y, x, None, None)
- res2 = ols_mod.fit(cov_type='unadjusted')
+ res2 = ols_mod.fit(cov_type="unadjusted")
assert_results_equal(res, res2, test_df=False)
- res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False)
- res2 = ols_mod.fit(cov_type='robust')
+ res = mod.fit(cov_type="robust", auto_df=False, count_effects=False, debiased=False)
+ res2 = ols_mod.fit(cov_type="robust")
assert_results_equal(res, res2, test_df=False)
@@ -358,7 +397,7 @@ def test_panel_time_lsdv(large_data):
y = mod.dependent.dataframe
x = mod.exog.dataframe
- d = mod.dependent.dummies('time', drop_first=mod.has_constant)
+ d = mod.dependent.dummies("time", drop_first=mod.has_constant)
d_cols = list(d.columns)
d = d.values
if mod.has_constant:
@@ -369,42 +408,62 @@ def test_panel_time_lsdv(large_data):
xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + d_cols)
ols_mod = IV2SLS(y, xd, None, None)
- res2 = ols_mod.fit(cov_type='unadjusted')
+ res2 = ols_mod.fit(cov_type="unadjusted")
assert_results_equal(res, res2, test_fit=False)
assert_allclose(res.rsquared_inclusive, res2.rsquared)
- res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False)
- res2 = ols_mod.fit(cov_type='robust')
+ res = mod.fit(cov_type="robust", auto_df=False, count_effects=False, debiased=False)
+ res2 = ols_mod.fit(cov_type="robust")
assert_results_equal(res, res2, test_fit=False)
clusters = large_data.vc1
ols_clusters = mod.reformat_clusters(clusters)
- res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False,
- debiased=False)
- res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe)
+ res = mod.fit(
+ cov_type="clustered",
+ clusters=clusters,
+ auto_df=False,
+ count_effects=False,
+ debiased=False,
+ )
+ res2 = ols_mod.fit(cov_type="clustered", clusters=ols_clusters.dataframe)
assert_results_equal(res, res2, test_fit=False)
clusters = large_data.vc2
ols_clusters = mod.reformat_clusters(clusters)
- res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False,
- debiased=False)
- res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe)
+ res = mod.fit(
+ cov_type="clustered",
+ clusters=clusters,
+ auto_df=False,
+ count_effects=False,
+ debiased=False,
+ )
+ res2 = ols_mod.fit(cov_type="clustered", clusters=ols_clusters.dataframe)
assert_results_equal(res, res2, test_fit=False)
- res = mod.fit(cov_type='clustered', cluster_time=True, auto_df=False, count_effects=False,
- debiased=False)
- clusters = pd.DataFrame(mod.dependent.time_ids,
- index=mod.dependent.index,
- columns=['var.clust'])
- res2 = ols_mod.fit(cov_type='clustered', clusters=clusters)
+ res = mod.fit(
+ cov_type="clustered",
+ cluster_time=True,
+ auto_df=False,
+ count_effects=False,
+ debiased=False,
+ )
+ clusters = pd.DataFrame(
+ mod.dependent.time_ids, index=mod.dependent.index, columns=["var.clust"]
+ )
+ res2 = ols_mod.fit(cov_type="clustered", clusters=clusters)
assert_results_equal(res, res2, test_fit=False)
- res = mod.fit(cov_type='clustered', cluster_entity=True, auto_df=False, count_effects=False,
- debiased=False)
- clusters = pd.DataFrame(mod.dependent.entity_ids,
- index=mod.dependent.index,
- columns=['var.clust'])
- res2 = ols_mod.fit(cov_type='clustered', clusters=clusters)
+ res = mod.fit(
+ cov_type="clustered",
+ cluster_entity=True,
+ auto_df=False,
+ count_effects=False,
+ debiased=False,
+ )
+ clusters = pd.DataFrame(
+ mod.dependent.entity_ids, index=mod.dependent.index, columns=["var.clust"]
+ )
+ res2 = ols_mod.fit(cov_type="clustered", clusters=clusters)
assert_results_equal(res, res2, test_fit=False)
@@ -414,7 +473,7 @@ def test_panel_time_fwl(data):
y = mod.dependent.dataframe
x = mod.exog.dataframe
- d = mod.dependent.dummies('time', drop_first=mod.has_constant)
+ d = mod.dependent.dummies("time", drop_first=mod.has_constant)
d = d.values
if mod.has_constant:
z = np.ones_like(y)
@@ -424,11 +483,11 @@ def test_panel_time_fwl(data):
y = y - d @ lstsq(d, y)[0]
ols_mod = IV2SLS(y, x, None, None)
- res2 = ols_mod.fit(cov_type='unadjusted')
+ res2 = ols_mod.fit(cov_type="unadjusted")
assert_results_equal(res, res2, test_df=False)
- res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False)
- res2 = ols_mod.fit(cov_type='robust')
+ res = mod.fit(cov_type="robust", auto_df=False, count_effects=False, debiased=False)
+ res2 = ols_mod.fit(cov_type="robust")
assert_results_equal(res, res2, test_df=False)
@@ -438,8 +497,8 @@ def test_panel_both_lsdv(data):
y = mod.dependent.dataframe
x = mod.exog.dataframe
- d1 = mod.dependent.dummies('entity', drop_first=mod.has_constant)
- d2 = mod.dependent.dummies('time', drop_first=True)
+ d1 = mod.dependent.dummies("entity", drop_first=mod.has_constant)
+ d2 = mod.dependent.dummies("time", drop_first=True)
d = np.c_[d1.values, d2.values]
if mod.has_constant:
@@ -447,47 +506,67 @@ def test_panel_both_lsdv(data):
d = d - z @ lstsq(z, d)[0]
xd = np.c_[x.values, d]
- xd = pd.DataFrame(xd,
- index=x.index,
- columns=list(x.columns) + list(d1.columns) + list(d2.columns))
+ xd = pd.DataFrame(
+ xd, index=x.index, columns=list(x.columns) + list(d1.columns) + list(d2.columns)
+ )
ols_mod = IV2SLS(y, xd, None, None)
- res2 = ols_mod.fit(cov_type='unadjusted')
+ res2 = ols_mod.fit(cov_type="unadjusted")
assert_results_equal(res, res2, test_fit=False)
assert_allclose(res.rsquared_inclusive, res2.rsquared)
- res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False)
- res2 = ols_mod.fit(cov_type='robust')
+ res = mod.fit(cov_type="robust", auto_df=False, count_effects=False, debiased=False)
+ res2 = ols_mod.fit(cov_type="robust")
assert_results_equal(res, res2, test_fit=False)
clusters = data.vc1
ols_clusters = mod.reformat_clusters(clusters)
- res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False,
- debiased=False)
- res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe)
+ res = mod.fit(
+ cov_type="clustered",
+ clusters=clusters,
+ auto_df=False,
+ count_effects=False,
+ debiased=False,
+ )
+ res2 = ols_mod.fit(cov_type="clustered", clusters=ols_clusters.dataframe)
assert_results_equal(res, res2, test_fit=False)
clusters = data.vc2
ols_clusters = mod.reformat_clusters(clusters)
- res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False,
- debiased=False)
- res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe)
+ res = mod.fit(
+ cov_type="clustered",
+ clusters=clusters,
+ auto_df=False,
+ count_effects=False,
+ debiased=False,
+ )
+ res2 = ols_mod.fit(cov_type="clustered", clusters=ols_clusters.dataframe)
assert_results_equal(res, res2, test_fit=False)
- res = mod.fit(cov_type='clustered', cluster_time=True, auto_df=False, count_effects=False,
- debiased=False)
- clusters = pd.DataFrame(mod.dependent.time_ids,
- index=mod.dependent.index,
- columns=['var.clust'])
- res2 = ols_mod.fit(cov_type='clustered', clusters=clusters)
+ res = mod.fit(
+ cov_type="clustered",
+ cluster_time=True,
+ auto_df=False,
+ count_effects=False,
+ debiased=False,
+ )
+ clusters = pd.DataFrame(
+ mod.dependent.time_ids, index=mod.dependent.index, columns=["var.clust"]
+ )
+ res2 = ols_mod.fit(cov_type="clustered", clusters=clusters)
assert_results_equal(res, res2, test_fit=False)
- res = mod.fit(cov_type='clustered', cluster_entity=True, auto_df=False, count_effects=False,
- debiased=False)
- clusters = pd.DataFrame(mod.dependent.entity_ids,
- index=mod.dependent.index,
- columns=['var.clust'])
- res2 = ols_mod.fit(cov_type='clustered', clusters=clusters)
+ res = mod.fit(
+ cov_type="clustered",
+ cluster_entity=True,
+ auto_df=False,
+ count_effects=False,
+ debiased=False,
+ )
+ clusters = pd.DataFrame(
+ mod.dependent.entity_ids, index=mod.dependent.index, columns=["var.clust"]
+ )
+ res2 = ols_mod.fit(cov_type="clustered", clusters=clusters)
assert_results_equal(res, res2, test_fit=False)
@@ -497,8 +576,8 @@ def test_panel_both_fwl(data):
y = mod.dependent.dataframe
x = mod.exog.dataframe
- d1 = mod.dependent.dummies('entity', drop_first=mod.has_constant)
- d2 = mod.dependent.dummies('time', drop_first=True)
+ d1 = mod.dependent.dummies("entity", drop_first=mod.has_constant)
+ d2 = mod.dependent.dummies("time", drop_first=True)
d = np.c_[d1.values, d2.values]
if mod.has_constant:
@@ -509,11 +588,11 @@ def test_panel_both_fwl(data):
y = y - d @ lstsq(d, y)[0]
ols_mod = IV2SLS(y, x, None, None)
- res2 = ols_mod.fit(cov_type='unadjusted')
+ res2 = ols_mod.fit(cov_type="unadjusted")
assert_results_equal(res, res2, test_df=False)
- res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False)
- res2 = ols_mod.fit(cov_type='robust')
+ res = mod.fit(cov_type="robust", auto_df=False, count_effects=False, debiased=False)
+ res2 = ols_mod.fit(cov_type="robust")
assert_results_equal(res, res2, test_df=False)
@@ -524,7 +603,7 @@ def test_panel_entity_lsdv_weighted(data):
y = mod.dependent.dataframe
x = mod.exog.dataframe
w = mod.weights.dataframe
- d = mod.dependent.dummies('entity', drop_first=mod.has_constant)
+ d = mod.dependent.dummies("entity", drop_first=mod.has_constant)
d_cols = d.columns
d = d.values
if mod.has_constant:
@@ -538,42 +617,62 @@ def test_panel_entity_lsdv_weighted(data):
xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + list(d_cols))
ols_mod = IV2SLS(y, xd, None, None, weights=w)
- res2 = ols_mod.fit(cov_type='unadjusted')
+ res2 = ols_mod.fit(cov_type="unadjusted")
assert_results_equal(res, res2, test_fit=False)
assert_allclose(res.rsquared_inclusive, res2.rsquared)
- res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False)
- res2 = ols_mod.fit(cov_type='robust')
+ res = mod.fit(cov_type="robust", auto_df=False, count_effects=False, debiased=False)
+ res2 = ols_mod.fit(cov_type="robust")
assert_results_equal(res, res2, test_fit=False)
clusters = data.vc1
ols_clusters = mod.reformat_clusters(clusters)
- res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False,
- debiased=False)
- res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe)
+ res = mod.fit(
+ cov_type="clustered",
+ clusters=clusters,
+ auto_df=False,
+ count_effects=False,
+ debiased=False,
+ )
+ res2 = ols_mod.fit(cov_type="clustered", clusters=ols_clusters.dataframe)
assert_results_equal(res, res2, test_fit=False)
clusters = data.vc2
ols_clusters = mod.reformat_clusters(clusters)
- res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False,
- debiased=False)
- res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe)
+ res = mod.fit(
+ cov_type="clustered",
+ clusters=clusters,
+ auto_df=False,
+ count_effects=False,
+ debiased=False,
+ )
+ res2 = ols_mod.fit(cov_type="clustered", clusters=ols_clusters.dataframe)
assert_results_equal(res, res2, test_fit=False)
- res = mod.fit(cov_type='clustered', cluster_time=True, auto_df=False, count_effects=False,
- debiased=False)
- clusters = pd.DataFrame(mod.dependent.time_ids,
- index=mod.dependent.index,
- columns=['var.clust'])
- res2 = ols_mod.fit(cov_type='clustered', clusters=clusters)
+ res = mod.fit(
+ cov_type="clustered",
+ cluster_time=True,
+ auto_df=False,
+ count_effects=False,
+ debiased=False,
+ )
+ clusters = pd.DataFrame(
+ mod.dependent.time_ids, index=mod.dependent.index, columns=["var.clust"]
+ )
+ res2 = ols_mod.fit(cov_type="clustered", clusters=clusters)
assert_results_equal(res, res2, test_fit=False)
- res = mod.fit(cov_type='clustered', cluster_entity=True, auto_df=False, count_effects=False,
- debiased=False)
- clusters = pd.DataFrame(mod.dependent.entity_ids,
- index=mod.dependent.index,
- columns=['var.clust'])
- res2 = ols_mod.fit(cov_type='clustered', clusters=clusters)
+ res = mod.fit(
+ cov_type="clustered",
+ cluster_entity=True,
+ auto_df=False,
+ count_effects=False,
+ debiased=False,
+ )
+ clusters = pd.DataFrame(
+ mod.dependent.entity_ids, index=mod.dependent.index, columns=["var.clust"]
+ )
+ res2 = ols_mod.fit(cov_type="clustered", clusters=clusters)
assert_results_equal(res, res2, test_fit=False)
@@ -584,7 +683,7 @@ def test_panel_time_lsdv_weighted(large_data):
y = mod.dependent.dataframe
x = mod.exog.dataframe
w = mod.weights.dataframe
- d = mod.dependent.dummies('time', drop_first=mod.has_constant)
+ d = mod.dependent.dummies("time", drop_first=mod.has_constant)
d_cols = d.columns
d = d.values
if mod.has_constant:
@@ -598,53 +697,75 @@ def test_panel_time_lsdv_weighted(large_data):
xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + list(d_cols))
ols_mod = IV2SLS(y, xd, None, None, weights=w)
- res2 = ols_mod.fit(cov_type='unadjusted')
+ res2 = ols_mod.fit(cov_type="unadjusted")
assert_results_equal(res, res2, test_fit=False)
- res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False)
- res2 = ols_mod.fit(cov_type='robust')
+ res = mod.fit(cov_type="robust", auto_df=False, count_effects=False, debiased=False)
+ res2 = ols_mod.fit(cov_type="robust")
assert_results_equal(res, res2, test_fit=False)
clusters = large_data.vc1
ols_clusters = mod.reformat_clusters(clusters)
- res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False,
- debiased=False)
- res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe)
+ res = mod.fit(
+ cov_type="clustered",
+ clusters=clusters,
+ auto_df=False,
+ count_effects=False,
+ debiased=False,
+ )
+ res2 = ols_mod.fit(cov_type="clustered", clusters=ols_clusters.dataframe)
assert_results_equal(res, res2, test_fit=False)
clusters = large_data.vc2
ols_clusters = mod.reformat_clusters(clusters)
- res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False,
- debiased=False)
- res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe)
+ res = mod.fit(
+ cov_type="clustered",
+ clusters=clusters,
+ auto_df=False,
+ count_effects=False,
+ debiased=False,
+ )
+ res2 = ols_mod.fit(cov_type="clustered", clusters=ols_clusters.dataframe)
assert_results_equal(res, res2, test_fit=False)
- res = mod.fit(cov_type='clustered', cluster_time=True, auto_df=False, count_effects=False,
- debiased=False)
- clusters = pd.DataFrame(mod.dependent.time_ids,
- index=mod.dependent.index,
- columns=['var.clust'])
- res2 = ols_mod.fit(cov_type='clustered', clusters=clusters)
+ res = mod.fit(
+ cov_type="clustered",
+ cluster_time=True,
+ auto_df=False,
+ count_effects=False,
+ debiased=False,
+ )
+ clusters = pd.DataFrame(
+ mod.dependent.time_ids, index=mod.dependent.index, columns=["var.clust"]
+ )
+ res2 = ols_mod.fit(cov_type="clustered", clusters=clusters)
assert_results_equal(res, res2, test_fit=False)
- res = mod.fit(cov_type='clustered', cluster_entity=True, auto_df=False, count_effects=False,
- debiased=False)
- clusters = pd.DataFrame(mod.dependent.entity_ids,
- index=mod.dependent.index,
- columns=['var.clust'])
- res2 = ols_mod.fit(cov_type='clustered', clusters=clusters)
+ res = mod.fit(
+ cov_type="clustered",
+ cluster_entity=True,
+ auto_df=False,
+ count_effects=False,
+ debiased=False,
+ )
+ clusters = pd.DataFrame(
+ mod.dependent.entity_ids, index=mod.dependent.index, columns=["var.clust"]
+ )
+ res2 = ols_mod.fit(cov_type="clustered", clusters=clusters)
assert_results_equal(res, res2, test_fit=False)
def test_panel_both_lsdv_weighted(data):
- mod = PanelOLS(data.y, data.x, entity_effects=True, time_effects=True, weights=data.w)
+ mod = PanelOLS(
+ data.y, data.x, entity_effects=True, time_effects=True, weights=data.w
+ )
res = mod.fit(auto_df=False, count_effects=False, debiased=False)
y = mod.dependent.dataframe
x = mod.exog.dataframe
w = mod.weights.dataframe
- d1 = mod.dependent.dummies('entity', drop_first=mod.has_constant)
- d2 = mod.dependent.dummies('time', drop_first=True)
+ d1 = mod.dependent.dummies("entity", drop_first=mod.has_constant)
+ d2 = mod.dependent.dummies("time", drop_first=True)
d = np.c_[d1.values, d2.values]
if mod.has_constant:
@@ -655,47 +776,67 @@ def test_panel_both_lsdv_weighted(data):
d = d - z @ lstsq(wz, wd)[0]
xd = np.c_[x.values, d]
- xd = pd.DataFrame(xd,
- index=x.index,
- columns=list(x.columns) + list(d1.columns) + list(d2.columns))
+ xd = pd.DataFrame(
+ xd, index=x.index, columns=list(x.columns) + list(d1.columns) + list(d2.columns)
+ )
ols_mod = IV2SLS(y, xd, None, None, weights=w)
- res2 = ols_mod.fit(cov_type='unadjusted')
+ res2 = ols_mod.fit(cov_type="unadjusted")
assert_results_equal(res, res2, test_fit=False)
assert_allclose(res.rsquared_inclusive, res2.rsquared)
- res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False)
- res2 = ols_mod.fit(cov_type='robust')
+ res = mod.fit(cov_type="robust", auto_df=False, count_effects=False, debiased=False)
+ res2 = ols_mod.fit(cov_type="robust")
assert_results_equal(res, res2, test_fit=False)
clusters = data.vc1
ols_clusters = mod.reformat_clusters(clusters)
- res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False,
- debiased=False)
- res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe)
+ res = mod.fit(
+ cov_type="clustered",
+ clusters=clusters,
+ auto_df=False,
+ count_effects=False,
+ debiased=False,
+ )
+ res2 = ols_mod.fit(cov_type="clustered", clusters=ols_clusters.dataframe)
assert_results_equal(res, res2, test_fit=False)
clusters = data.vc2
ols_clusters = mod.reformat_clusters(clusters)
- res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False,
- debiased=False)
- res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe)
+ res = mod.fit(
+ cov_type="clustered",
+ clusters=clusters,
+ auto_df=False,
+ count_effects=False,
+ debiased=False,
+ )
+ res2 = ols_mod.fit(cov_type="clustered", clusters=ols_clusters.dataframe)
assert_results_equal(res, res2, test_fit=False)
- res = mod.fit(cov_type='clustered', cluster_time=True, auto_df=False, count_effects=False,
- debiased=False)
- clusters = pd.DataFrame(mod.dependent.time_ids,
- index=mod.dependent.index,
- columns=['var.clust'])
- res2 = ols_mod.fit(cov_type='clustered', clusters=clusters)
+ res = mod.fit(
+ cov_type="clustered",
+ cluster_time=True,
+ auto_df=False,
+ count_effects=False,
+ debiased=False,
+ )
+ clusters = pd.DataFrame(
+ mod.dependent.time_ids, index=mod.dependent.index, columns=["var.clust"]
+ )
+ res2 = ols_mod.fit(cov_type="clustered", clusters=clusters)
assert_results_equal(res, res2, test_fit=False)
- res = mod.fit(cov_type='clustered', cluster_entity=True, auto_df=False, count_effects=False,
- debiased=False)
- clusters = pd.DataFrame(mod.dependent.entity_ids,
- index=mod.dependent.index,
- columns=['var.clust'])
- res2 = ols_mod.fit(cov_type='clustered', clusters=clusters)
+ res = mod.fit(
+ cov_type="clustered",
+ cluster_entity=True,
+ auto_df=False,
+ count_effects=False,
+ debiased=False,
+ )
+ clusters = pd.DataFrame(
+ mod.dependent.entity_ids, index=mod.dependent.index, columns=["var.clust"]
+ )
+ res2 = ols_mod.fit(cov_type="clustered", clusters=clusters)
assert_results_equal(res, res2, test_fit=False)
@@ -740,7 +881,7 @@ def test_panel_entity_time_other_equivalence(data):
def test_panel_other_lsdv(data):
mod = PanelOLS(data.y, data.x, other_effects=data.c)
- assert 'Num Other Effects: 2' in str(mod)
+ assert "Num Other Effects: 2" in str(mod)
res = mod.fit(auto_df=False, count_effects=False, debiased=False)
y = mod.dependent.dataframe.copy()
@@ -750,8 +891,10 @@ def test_panel_other_lsdv(data):
d_columns = []
for i, col in enumerate(c):
s = c[col].copy()
- dummies = pd.get_dummies(s.astype(np.int64), drop_first=(mod.has_constant or i > 0))
- dummies.columns = [s.name + '_val_' + str(c) for c in dummies.columns]
+ dummies = pd.get_dummies(
+ s.astype(np.int64), drop_first=(mod.has_constant or i > 0)
+ )
+ dummies.columns = [s.name + "_val_" + str(c) for c in dummies.columns]
d_columns.extend(list(dummies.columns))
d.append(dummies.values)
d = np.column_stack(d)
@@ -764,44 +907,66 @@ def test_panel_other_lsdv(data):
xd = pd.DataFrame(xd, index=x.index, columns=list(x.columns) + list(d_columns))
ols_mod = IV2SLS(y, xd, None, None)
- res2 = ols_mod.fit(cov_type='unadjusted')
+ res2 = ols_mod.fit(cov_type="unadjusted")
assert_results_equal(res, res2, test_fit=False)
- res3 = mod.fit(cov_type='unadjusted', auto_df=False, count_effects=False, debiased=False)
+ res3 = mod.fit(
+ cov_type="unadjusted", auto_df=False, count_effects=False, debiased=False
+ )
assert_results_equal(res, res3)
- res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False)
- res2 = ols_mod.fit(cov_type='robust')
+ res = mod.fit(cov_type="robust", auto_df=False, count_effects=False, debiased=False)
+ res2 = ols_mod.fit(cov_type="robust")
assert_results_equal(res, res2, test_fit=False)
clusters = data.vc1
ols_clusters = mod.reformat_clusters(clusters)
- res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False, count_effects=False,
- debiased=False)
- res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe)
+ res = mod.fit(
+ cov_type="clustered",
+ clusters=clusters,
+ auto_df=False,
+ count_effects=False,
+ debiased=False,
+ )
+ res2 = ols_mod.fit(cov_type="clustered", clusters=ols_clusters.dataframe)
assert_results_equal(res, res2, test_fit=False)
clusters = data.vc2
ols_clusters = mod.reformat_clusters(clusters)
- res = mod.fit(cov_type='clustered', clusters=clusters, auto_df=False,
- count_effects=False, debiased=False)
- res2 = ols_mod.fit(cov_type='clustered', clusters=ols_clusters.dataframe)
+ res = mod.fit(
+ cov_type="clustered",
+ clusters=clusters,
+ auto_df=False,
+ count_effects=False,
+ debiased=False,
+ )
+ res2 = ols_mod.fit(cov_type="clustered", clusters=ols_clusters.dataframe)
assert_results_equal(res, res2, test_fit=False)
- res = mod.fit(cov_type='clustered', cluster_time=True, auto_df=False,
- count_effects=False, debiased=False)
- clusters = pd.DataFrame(mod.dependent.time_ids,
- index=mod.dependent.index,
- columns=['var.clust'])
- res2 = ols_mod.fit(cov_type='clustered', clusters=clusters)
+ res = mod.fit(
+ cov_type="clustered",
+ cluster_time=True,
+ auto_df=False,
+ count_effects=False,
+ debiased=False,
+ )
+ clusters = pd.DataFrame(
+ mod.dependent.time_ids, index=mod.dependent.index, columns=["var.clust"]
+ )
+ res2 = ols_mod.fit(cov_type="clustered", clusters=clusters)
assert_results_equal(res, res2, test_fit=False)
- res = mod.fit(cov_type='clustered', cluster_entity=True, auto_df=False,
- count_effects=False, debiased=False)
- clusters = pd.DataFrame(mod.dependent.entity_ids,
- index=mod.dependent.index,
- columns=['var.clust'])
- res2 = ols_mod.fit(cov_type='clustered', clusters=clusters)
+ res = mod.fit(
+ cov_type="clustered",
+ cluster_entity=True,
+ auto_df=False,
+ count_effects=False,
+ debiased=False,
+ )
+ clusters = pd.DataFrame(
+ mod.dependent.entity_ids, index=mod.dependent.index, columns=["var.clust"]
+ )
+ res2 = ols_mod.fit(cov_type="clustered", clusters=clusters)
assert_results_equal(res, res2, test_fit=False)
@@ -816,8 +981,10 @@ def test_panel_other_fwl(data):
d_columns = []
for i, col in enumerate(c):
s = c[col].copy()
- dummies = pd.get_dummies(s.astype(np.int64), drop_first=(mod.has_constant or i > 0))
- dummies.columns = [s.name + '_val_' + str(c) for c in dummies.columns]
+ dummies = pd.get_dummies(
+ s.astype(np.int64), drop_first=(mod.has_constant or i > 0)
+ )
+ dummies.columns = [s.name + "_val_" + str(c) for c in dummies.columns]
d_columns.extend(list(dummies.columns))
d.append(dummies.values)
d = np.column_stack(d)
@@ -830,11 +997,11 @@ def test_panel_other_fwl(data):
y = y - d @ lstsq(d, y)[0]
ols_mod = IV2SLS(y, x, None, None)
- res2 = ols_mod.fit(cov_type='unadjusted')
+ res2 = ols_mod.fit(cov_type="unadjusted")
assert_results_equal(res, res2, test_df=False)
- res = mod.fit(cov_type='robust', auto_df=False, count_effects=False, debiased=False)
- res2 = ols_mod.fit(cov_type='robust')
+ res = mod.fit(cov_type="robust", auto_df=False, count_effects=False, debiased=False)
+ res2 = ols_mod.fit(cov_type="robust")
assert_results_equal(res, res2, test_df=False)
@@ -844,7 +1011,7 @@ def test_panel_other_incorrect_size(data):
x = mod.exog.dataframe
cats = pd.DataFrame(mod.dependent.entity_ids, index=mod.dependent.index)
cats = PanelData(cats)
- cats = cats.dataframe.iloc[:cats.dataframe.shape[0] // 2, :]
+ cats = cats.dataframe.iloc[: cats.dataframe.shape[0] // 2, :]
with pytest.raises(ValueError):
PanelOLS(y, x, other_effects=cats)
@@ -869,7 +1036,7 @@ def test_results_access(data):
const = PanelData(data.y).copy()
const.dataframe.iloc[:, :] = 1
- const.dataframe.columns = ['const']
+ const.dataframe.columns = ["const"]
mod = PanelOLS(data.y, const)
res = mod.fit()
access_attributes(res)
@@ -889,50 +1056,57 @@ def test_alt_rsquared_weighted(data):
def test_too_many_effects(data):
with pytest.raises(ValueError):
- PanelOLS(data.y, data.x, entity_effects=True, time_effects=True, other_effects=data.c)
+ PanelOLS(
+ data.y, data.x, entity_effects=True, time_effects=True, other_effects=data.c
+ )
def test_cov_equiv_cluster(data):
mod = PanelOLS(data.y, data.x, entity_effects=True)
- res = mod.fit(cov_type='clustered', cluster_entity=True, debiased=False)
+ res = mod.fit(cov_type="clustered", cluster_entity=True, debiased=False)
y = PanelData(data.y)
clusters = pd.DataFrame(y.entity_ids, index=y.index)
- res2 = mod.fit(cov_type='clustered', clusters=clusters, debiased=False)
+ res2 = mod.fit(cov_type="clustered", clusters=clusters, debiased=False)
assert_results_equal(res, res2)
mod = PanelOLS(data.y, data.x, time_effects=True)
- res = mod.fit(cov_type='clustered', cluster_time=True, debiased=False)
+ res = mod.fit(cov_type="clustered", cluster_time=True, debiased=False)
y = PanelData(data.y)
clusters = pd.DataFrame(y.time_ids, index=y.index)
- res2 = mod.fit(cov_type='clustered', clusters=clusters, debiased=False)
+ res2 = mod.fit(cov_type="clustered", clusters=clusters, debiased=False)
assert_results_equal(res, res2)
- res = mod.fit(cov_type='clustered', debiased=False)
- res2 = mod.fit(cov_type='clustered', clusters=None, debiased=False)
+ res = mod.fit(cov_type="clustered", debiased=False)
+ res2 = mod.fit(cov_type="clustered", clusters=None, debiased=False)
assert_results_equal(res, res2)
def test_cluster_smoke(data):
mod = PanelOLS(data.y, data.x, entity_effects=True)
- mod.fit(cov_type='clustered', cluster_time=True, debiased=False)
- mod.fit(cov_type='clustered', cluster_entity=True, debiased=False)
+ mod.fit(cov_type="clustered", cluster_time=True, debiased=False)
+ mod.fit(cov_type="clustered", cluster_entity=True, debiased=False)
c2 = PanelData(data.vc2)
c1 = PanelData(data.vc1)
- mod.fit(cov_type='clustered', clusters=c2, debiased=False)
- mod.fit(cov_type='clustered', cluster_entity=True, clusters=c1, debiased=False)
- mod.fit(cov_type='clustered', cluster_time=True, clusters=c1, debiased=False)
+ mod.fit(cov_type="clustered", clusters=c2, debiased=False)
+ mod.fit(cov_type="clustered", cluster_entity=True, clusters=c1, debiased=False)
+ mod.fit(cov_type="clustered", cluster_time=True, clusters=c1, debiased=False)
with pytest.raises(ValueError):
- mod.fit(cov_type='clustered', cluster_time=True, clusters=c2, debiased=False)
+ mod.fit(cov_type="clustered", cluster_time=True, clusters=c2, debiased=False)
with pytest.raises(ValueError):
- mod.fit(cov_type='clustered', cluster_entity=True, clusters=c2, debiased=False)
+ mod.fit(cov_type="clustered", cluster_entity=True, clusters=c2, debiased=False)
with pytest.raises(ValueError):
- mod.fit(cov_type='clustered', cluster_entity=True, cluster_time=True, clusters=c1,
- debiased=False)
+ mod.fit(
+ cov_type="clustered",
+ cluster_entity=True,
+ cluster_time=True,
+ clusters=c1,
+ debiased=False,
+ )
with pytest.raises(ValueError):
- clusters = c1.dataframe.iloc[:c1.dataframe.shape[0] // 2]
- mod.fit(cov_type='clustered', clusters=clusters, debiased=False)
+ clusters = c1.dataframe.iloc[: c1.dataframe.shape[0] // 2]
+ mod.fit(cov_type="clustered", clusters=clusters, debiased=False)
def test_f_pooled(data):
@@ -943,7 +1117,7 @@ def test_f_pooled(data):
mod2 = PooledOLS(data.y, data.x)
else:
exog = mod.exog.dataframe.copy()
- exog['Intercept'] = 1.0
+ exog["Intercept"] = 1.0
mod2 = PooledOLS(mod.dependent.dataframe, exog)
res2 = mod2.fit(debiased=False)
@@ -953,7 +1127,7 @@ def test_f_pooled(data):
v1 = res.df_model - res2.df_model
v2 = res.df_resid
f_pool = (eps2.T @ eps2 - eps.T @ eps) / v1
- f_pool /= ((eps.T @ eps) / v2)
+ f_pool /= (eps.T @ eps) / v2
f_pool = float(f_pool)
assert_allclose(res.f_pooled.stat, f_pool)
assert res.f_pooled.df == v1
@@ -966,7 +1140,7 @@ def test_f_pooled(data):
v1 = res.df_model - res2.df_model
v2 = res.df_resid
f_pool = (eps2.T @ eps2 - eps.T @ eps) / v1
- f_pool /= ((eps.T @ eps) / v2)
+ f_pool /= (eps.T @ eps) / v2
f_pool = float(f_pool)
assert_allclose(res.f_pooled.stat, f_pool)
assert res.f_pooled.df == v1
@@ -979,7 +1153,7 @@ def test_f_pooled(data):
v1 = res.df_model - res2.df_model
v2 = res.df_resid
f_pool = (eps2.T @ eps2 - eps.T @ eps) / v1
- f_pool /= ((eps.T @ eps) / v2)
+ f_pool /= (eps.T @ eps) / v2
f_pool = float(f_pool)
assert_allclose(res.f_pooled.stat, f_pool)
assert res.f_pooled.df == v1
@@ -1019,10 +1193,14 @@ def test_methods_equivalent(data, lsdv_config):
elif lsdv_config.other_effects == 2:
other_effects = data.c
weights = data.w if lsdv_config.weights else None
- mod = PanelOLS(data.y, data.x, weights=weights,
- entity_effects=lsdv_config.entity_effects,
- time_effects=lsdv_config.time_effects,
- other_effects=other_effects)
+ mod = PanelOLS(
+ data.y,
+ data.x,
+ weights=weights,
+ entity_effects=lsdv_config.entity_effects,
+ time_effects=lsdv_config.time_effects,
+ other_effects=other_effects,
+ )
res1 = mod.fit()
res2 = mod.fit(use_lsdv=True)
res3 = mod.fit(use_lsmr=True)
@@ -1065,7 +1243,9 @@ def test_panel_effects_sanity(data):
expected += res.estimated_effects.values
assert_allclose(mod.dependent.values2d, expected)
- mod = PanelOLS(data.y, data.x, weights=data.w, entity_effects=True, time_effects=True)
+ mod = PanelOLS(
+ data.y, data.x, weights=data.w, entity_effects=True, time_effects=True
+ )
res = mod.fit(auto_df=False, count_effects=False)
fitted = mod.exog.values2d @ res.params.values[:, None]
expected = fitted
@@ -1075,32 +1255,32 @@ def test_panel_effects_sanity(data):
def test_fitted_effects_residuals(data, entity_eff, time_eff):
- mod = PanelOLS(data.y, data.x,
- entity_effects=entity_eff,
- time_effects=time_eff)
+ mod = PanelOLS(data.y, data.x, entity_effects=entity_eff, time_effects=time_eff)
res = mod.fit()
expected = mod.exog.values2d @ res.params.values
- expected = pd.DataFrame(expected, index=mod.exog.index, columns=['fitted_values'])
+ expected = pd.DataFrame(expected, index=mod.exog.index, columns=["fitted_values"])
assert_allclose(res.fitted_values, expected)
assert_frame_similar(res.fitted_values, expected)
expected.iloc[:, 0] = res.resids
- expected.columns = ['idiosyncratic']
+ expected.columns = ["idiosyncratic"]
assert_allclose(res.idiosyncratic, expected)
assert_frame_similar(res.idiosyncratic, expected)
fitted_error = res.fitted_values + res.idiosyncratic.values
expected.iloc[:, 0] = mod.dependent.values2d - fitted_error
- expected.columns = ['estimated_effects']
+ expected.columns = ["estimated_effects"]
assert_allclose(res.estimated_effects, expected, atol=1e-8)
assert_frame_similar(res.estimated_effects, expected)
-@pytest.mark.parametrize('weighted', [True, False])
+@pytest.mark.parametrize("weighted", [True, False])
def test_low_memory(data, weighted):
if weighted:
- mod = PanelOLS(data.y, data.x, weights=data.w, entity_effects=True, time_effects=True)
+ mod = PanelOLS(
+ data.y, data.x, weights=data.w, entity_effects=True, time_effects=True
+ )
else:
mod = PanelOLS(data.y, data.x, entity_effects=True, time_effects=True)
res = mod.fit()
@@ -1124,15 +1304,15 @@ def test_low_memory_auto():
mod.fit()
-@pytest.mark.filterwarnings('ignore::linearmodels.utility.SingletonWarning')
+@pytest.mark.filterwarnings("ignore::linearmodels.utility.SingletonWarning")
def test_singleton_removal():
entities = []
for i in range(6):
- entities.extend(['entity.{j}'.format(j=j) for j in range(6 - i)])
+ entities.extend(["entity.{j}".format(j=j) for j in range(6 - i)])
nobs = len(entities)
times = np.arange(nobs) % 6
index = pd.MultiIndex.from_arrays((entities, times))
- cols = ['x{0}'.format(i) for i in range(3)]
+ cols = ["x{0}".format(i) for i in range(3)]
x = pd.DataFrame(np.random.randn(nobs, 3), index=index, columns=cols)
y = pd.DataFrame(np.random.randn(nobs, 1), index=index)
mod = PanelOLS(y, x, singletons=False, entity_effects=True, time_effects=True)
@@ -1143,13 +1323,13 @@ def test_singleton_removal():
assert_allclose(res.params, res_with.params)
-@pytest.mark.filterwarnings('ignore::linearmodels.utility.SingletonWarning')
+@pytest.mark.filterwarnings("ignore::linearmodels.utility.SingletonWarning")
def test_masked_singleton_removal():
nobs = 8
- entities = ['A', 'B', 'C', 'D'] * 2
+ entities = ["A", "B", "C", "D"] * 2
times = [0, 1, 1, 1, 1, 2, 2, 2]
index = pd.MultiIndex.from_arrays((entities, times))
- x = pd.DataFrame(np.random.randn(nobs, 1), index=index, columns=['x'])
+ x = pd.DataFrame(np.random.randn(nobs, 1), index=index, columns=["x"])
y = pd.DataFrame(np.random.randn(nobs, 1), index=index)
mod = PanelOLS(y, x, singletons=False, entity_effects=True, time_effects=True)
res = mod.fit()
@@ -1157,30 +1337,37 @@ def test_masked_singleton_removal():
def test_singleton_removal_other_effects(data):
- mod_keep = PanelOLS(data.y, data.x, weights=data.w, other_effects=data.c, singletons=True)
+ mod_keep = PanelOLS(
+ data.y, data.x, weights=data.w, other_effects=data.c, singletons=True
+ )
res_keep = mod_keep.fit()
- mod = PanelOLS(data.y, data.x, weights=data.w, other_effects=data.c, singletons=False)
- res = mod.fit(cov_type='clustered', clusters=data.vc1)
+ mod = PanelOLS(
+ data.y, data.x, weights=data.w, other_effects=data.c, singletons=False
+ )
+ res = mod.fit(cov_type="clustered", clusters=data.vc1)
assert res.nobs <= res_keep.nobs
@pytest.mark.slow
-@pytest.mark.filterwarnings('ignore::linearmodels.utility.SingletonWarning')
-@pytest.mark.parametrize('other_effects', [1, 2])
+@pytest.mark.filterwarnings("ignore::linearmodels.utility.SingletonWarning")
+@pytest.mark.parametrize("other_effects", [1, 2])
def test_singleton_removal_mixed(singleton_data, other_effects):
if other_effects == 1:
other_effects = PanelData(singleton_data.c).dataframe.iloc[:, [0]]
elif other_effects == 2:
other_effects = singleton_data.c
- mod = PanelOLS(singleton_data.y, singleton_data.x,
- other_effects=other_effects)
+ mod = PanelOLS(singleton_data.y, singleton_data.x, other_effects=other_effects)
res_keep = mod.fit(use_lsmr=True)
- mod = PanelOLS(singleton_data.y, singleton_data.x,
- other_effects=other_effects, singletons=False)
- res = mod.fit(cov_type='clustered', clusters=singleton_data.vc2, use_lsmr=True)
+ mod = PanelOLS(
+ singleton_data.y,
+ singleton_data.x,
+ other_effects=other_effects,
+ singletons=False,
+ )
+ res = mod.fit(cov_type="clustered", clusters=singleton_data.vc2, use_lsmr=True)
assert_allclose(res_keep.params, res.params)
assert res.nobs <= res_keep.nobs
@@ -1189,27 +1376,29 @@ def test_repeated_measures_weight():
# Issue reported by email
rs = np.random.RandomState(0)
w = rs.chisquare(5, 300) / 5
- idx1 = ['a']*100 + ['b']*100 + ['c']*100
+ idx1 = ["a"] * 100 + ["b"] * 100 + ["c"] * 100
idx2 = np.arange(300) % 25
mi = pd.MultiIndex.from_arrays([idx1, idx2])
- df = pd.DataFrame(rs.standard_normal((300, 2)),
- index=mi, columns=['y', 'x'])
- w = pd.Series(w, index=mi, name='weight')
- df['weight'] = w
- mod = PanelOLS.from_formula('y ~ x + EntityEffects + TimeEffects', df,
- weights=df['weight'])
+ df = pd.DataFrame(rs.standard_normal((300, 2)), index=mi, columns=["y", "x"])
+ w = pd.Series(w, index=mi, name="weight")
+ df["weight"] = w
+ mod = PanelOLS.from_formula(
+ "y ~ x + EntityEffects + TimeEffects", df, weights=df["weight"]
+ )
res = mod.fit()
- mod = PanelOLS.from_formula('y ~ x + EntityEffects + TimeEffects', df)
+ mod = PanelOLS.from_formula("y ~ x + EntityEffects + TimeEffects", df)
res_un = mod.fit()
assert res.params[0] != res_un.params[0]
def test_absorbed(absorbed_data):
- mod = PanelOLS(absorbed_data.y, absorbed_data.x, drop_absorbed=True, entity_effects=True)
+ mod = PanelOLS(
+ absorbed_data.y, absorbed_data.x, drop_absorbed=True, entity_effects=True
+ )
if isinstance(absorbed_data.y, pd.DataFrame):
- match = 'x_absorbed'
+ match = "x_absorbed"
else:
- match = 'Exog.3'
+ match = "Exog.3"
with pytest.warns(AbsorbingEffectWarning, match=match):
res = mod.fit()
if isinstance(absorbed_data.x, np.ndarray):
diff --git a/linearmodels/tests/panel/test_pooled_ols.py b/linearmodels/tests/panel/test_pooled_ols.py
index e71d13401b..61f63d4a4a 100644
--- a/linearmodels/tests/panel/test_pooled_ols.py
+++ b/linearmodels/tests/panel/test_pooled_ols.py
@@ -13,12 +13,14 @@
assert_results_equal, datatypes,
generate_data)
-pytestmark = pytest.mark.filterwarnings('ignore::linearmodels.utility.MissingValueWarning')
+pytestmark = pytest.mark.filterwarnings(
+ "ignore::linearmodels.utility.MissingValueWarning"
+)
missing = [0.0, 0.20]
has_const = [True, False]
perms = list(product(missing, datatypes, has_const))
-ids = list(map(lambda s: '-'.join(map(str, s)), perms))
+ids = list(map(lambda s: "-".join(map(str, s)), perms))
@pytest.fixture(params=perms, ids=ids)
@@ -36,10 +38,10 @@ def test_pooled_ols(data):
y.index = np.arange(len(y))
x.index = y.index
- res2 = IV2SLS(y, x, None, None).fit(cov_type='unadjusted')
+ res2 = IV2SLS(y, x, None, None).fit(cov_type="unadjusted")
assert_results_equal(res, res2)
- res3 = mod.fit(cov_type='homoskedastic', debiased=False)
+ res3 = mod.fit(cov_type="homoskedastic", debiased=False)
assert_results_equal(res, res3)
@@ -53,14 +55,14 @@ def test_pooled_ols_weighted(data):
y.index = np.arange(len(y))
w.index = x.index = y.index
- res2 = IV2SLS(y, x, None, None, weights=w).fit(cov_type='unadjusted')
+ res2 = IV2SLS(y, x, None, None, weights=w).fit(cov_type="unadjusted")
assert_results_equal(res, res2)
def test_diff_data_size(data):
if isinstance(data.x, pd.DataFrame):
entities = data.x.index.levels[0]
- x = data.x.loc[pd.IndexSlice[entities[0]:entities[-2]]]
+ x = data.x.loc[pd.IndexSlice[entities[0] : entities[-2]]]
y = data.y
elif isinstance(data.x, np.ndarray):
x = data.x
@@ -115,62 +117,63 @@ def test_alt_rsquared_weighted(data):
def test_cov_equiv(data):
mod = PooledOLS(data.y, data.x)
- res = mod.fit(cov_type='robust', debiased=False)
+ res = mod.fit(cov_type="robust", debiased=False)
y = mod.dependent.dataframe.copy()
x = mod.exog.dataframe.copy()
y.index = np.arange(len(y))
x.index = y.index
- res2 = IV2SLS(y, x, None, None).fit(cov_type='robust')
+ res2 = IV2SLS(y, x, None, None).fit(cov_type="robust")
assert_results_equal(res, res2)
- res3 = mod.fit(cov_type='heteroskedastic', debiased=False)
+ res3 = mod.fit(cov_type="heteroskedastic", debiased=False)
assert_results_equal(res, res3)
def test_cov_equiv_weighted(data):
mod = PooledOLS(data.y, data.x, weights=data.w)
- res = mod.fit(cov_type='robust', debiased=False)
+ res = mod.fit(cov_type="robust", debiased=False)
y = mod.dependent.dataframe.copy()
x = mod.exog.dataframe.copy()
w = mod.weights.dataframe.copy()
y.index = np.arange(len(y))
w.index = x.index = y.index
- res2 = IV2SLS(y, x, None, None, weights=w).fit(cov_type='robust')
+ res2 = IV2SLS(y, x, None, None, weights=w).fit(cov_type="robust")
assert_results_equal(res, res2)
- res3 = mod.fit(cov_type='heteroskedastic', debiased=False)
+ res3 = mod.fit(cov_type="heteroskedastic", debiased=False)
assert_results_equal(res, res3)
def test_cov_equiv_cluster(data):
mod = PooledOLS(data.y, data.x)
- res = mod.fit(cov_type='clustered', cluster_entity=True, debiased=False)
+ res = mod.fit(cov_type="clustered", cluster_entity=True, debiased=False)
y = PanelData(data.y)
clusters = pd.DataFrame(y.entity_ids, index=y.index)
- res2 = mod.fit(cov_type='clustered', clusters=clusters, debiased=False)
+ res2 = mod.fit(cov_type="clustered", clusters=clusters, debiased=False)
assert_results_equal(res, res2)
- res = mod.fit(cov_type='clustered', cluster_time=True, debiased=False)
+ res = mod.fit(cov_type="clustered", cluster_time=True, debiased=False)
clusters = pd.DataFrame(y.time_ids, index=y.index)
- res2 = mod.fit(cov_type='clustered', clusters=clusters, debiased=False)
+ res2 = mod.fit(cov_type="clustered", clusters=clusters, debiased=False)
assert_results_equal(res, res2)
- res = mod.fit(cov_type='clustered', clusters=data.vc1, debiased=False)
+ res = mod.fit(cov_type="clustered", clusters=data.vc1, debiased=False)
y = mod.dependent.dataframe.copy()
x = mod.exog.dataframe.copy()
y.index = np.arange(len(y))
x.index = y.index
clusters = mod.reformat_clusters(data.vc1)
ols_mod = IV2SLS(y, x, None, None)
- res2 = ols_mod.fit(cov_type='clustered', clusters=clusters.dataframe,
- debiased=False)
+ res2 = ols_mod.fit(
+ cov_type="clustered", clusters=clusters.dataframe, debiased=False
+ )
assert_results_equal(res, res2)
def test_cov_equiv_cluster_weighted(data):
mod = PooledOLS(data.y, data.x, weights=data.w)
- res = mod.fit(cov_type='clustered', clusters=data.vc1, debiased=False)
+ res = mod.fit(cov_type="clustered", clusters=data.vc1, debiased=False)
y = mod.dependent.dataframe.copy()
x = mod.exog.dataframe.copy()
@@ -179,7 +182,7 @@ def test_cov_equiv_cluster_weighted(data):
w.index = x.index = y.index
clusters = mod.reformat_clusters(data.vc1)
ols_mod = IV2SLS(y, x, None, None, weights=w)
- res2 = ols_mod.fit(cov_type='clustered', clusters=clusters.dataframe)
+ res2 = ols_mod.fit(cov_type="clustered", clusters=clusters.dataframe)
assert_results_equal(res, res2)
@@ -190,9 +193,9 @@ def test_two_way_clustering(data):
entity_clusters = pd.DataFrame(y.entity_ids, index=y.index)
vc1 = PanelData(data.vc1)
clusters = vc1.copy()
- clusters.dataframe['var.cluster.entity'] = entity_clusters
+ clusters.dataframe["var.cluster.entity"] = entity_clusters
clusters._frame = clusters._frame.astype(np.int64)
- res = mod.fit(cov_type='clustered', clusters=clusters, debiased=False)
+ res = mod.fit(cov_type="clustered", clusters=clusters, debiased=False)
y = mod.dependent.dataframe.copy()
x = mod.exog.dataframe.copy()
@@ -201,7 +204,7 @@ def test_two_way_clustering(data):
clusters = mod.reformat_clusters(clusters)
ols_mod = IV2SLS(y, x, None, None)
- ols_res = ols_mod.fit(cov_type='clustered', clusters=clusters.dataframe)
+ ols_res = ols_mod.fit(cov_type="clustered", clusters=clusters.dataframe)
assert_results_equal(res, ols_res)
@@ -209,16 +212,16 @@ def test_fitted_effects_residuals(data):
mod = PooledOLS(data.y, data.x)
res = mod.fit()
expected = pd.DataFrame(res.resids.copy())
- expected.columns = ['idiosyncratic']
+ expected.columns = ["idiosyncratic"]
assert_allclose(res.idiosyncratic, expected)
assert_frame_similar(res.idiosyncratic, expected)
expected = mod.dependent.values2d - res.resids.values[:, None]
- expected = pd.DataFrame(expected, index=res.resids.index, columns=['fitted_values'])
+ expected = pd.DataFrame(expected, index=res.resids.index, columns=["fitted_values"])
assert_allclose(res.fitted_values, expected)
assert_frame_similar(res.fitted_values, expected)
expected.iloc[:, 0] = np.nan
- expected.columns = ['estimated_effects']
+ expected.columns = ["estimated_effects"]
assert_allclose(res.estimated_effects, expected)
assert_frame_similar(res.estimated_effects, expected)
diff --git a/linearmodels/tests/panel/test_random_effects.py b/linearmodels/tests/panel/test_random_effects.py
index bd8160c1ab..7d6d6e6238 100644
--- a/linearmodels/tests/panel/test_random_effects.py
+++ b/linearmodels/tests/panel/test_random_effects.py
@@ -10,12 +10,14 @@
assert_frame_similar, datatypes,
generate_data)
-pytestmark = pytest.mark.filterwarnings('ignore::linearmodels.utility.MissingValueWarning')
+pytestmark = pytest.mark.filterwarnings(
+ "ignore::linearmodels.utility.MissingValueWarning"
+)
missing = [0.0, 0.20]
has_const = [True, False]
perms = list(product(missing, datatypes, has_const))
-ids = list(map(lambda s: '-'.join(map(str, s)), perms))
+ids = list(map(lambda s: "-".join(map(str, s)), perms))
@pytest.fixture(params=perms, ids=ids)
@@ -30,17 +32,17 @@ def test_random_effects_small_sample(data):
no_ss = mod.fit()
ss = mod.fit(small_sample=True)
if y.dataframe.shape[0] == mod.dependent.dataframe.shape[0]:
- assert (ss.variance_decomposition.Effects == no_ss.variance_decomposition.Effects)
+ assert ss.variance_decomposition.Effects == no_ss.variance_decomposition.Effects
else:
- assert (ss.variance_decomposition.Effects != no_ss.variance_decomposition.Effects)
+ assert ss.variance_decomposition.Effects != no_ss.variance_decomposition.Effects
mod = RandomEffects(data.y, data.x, weights=data.w)
no_ss = mod.fit()
ss = mod.fit(small_sample=True)
if y.dataframe.shape[0] == mod.dependent.dataframe.shape[0]:
- assert (ss.variance_decomposition.Effects == no_ss.variance_decomposition.Effects)
+ assert ss.variance_decomposition.Effects == no_ss.variance_decomposition.Effects
else:
- assert (ss.variance_decomposition.Effects != no_ss.variance_decomposition.Effects)
+ assert ss.variance_decomposition.Effects != no_ss.variance_decomposition.Effects
def test_results_access(data):
@@ -54,17 +56,17 @@ def test_fitted_effects_residuals(data):
res = mod.fit()
expected = mod.exog.values2d @ res.params.values
- expected = pd.DataFrame(expected, index=mod.exog.index, columns=['fitted_values'])
+ expected = pd.DataFrame(expected, index=mod.exog.index, columns=["fitted_values"])
assert_allclose(res.fitted_values, expected)
assert_frame_similar(res.fitted_values, expected)
expected.iloc[:, 0] = res.resids
- expected.columns = ['idiosyncratic']
+ expected.columns = ["idiosyncratic"]
assert_allclose(res.idiosyncratic, expected)
assert_frame_similar(res.idiosyncratic, expected)
fitted_error = res.fitted_values + res.idiosyncratic.values
expected.iloc[:, 0] = mod.dependent.values2d - fitted_error
- expected.columns = ['estimated_effects']
+ expected.columns = ["estimated_effects"]
assert_allclose(res.estimated_effects, expected)
assert_frame_similar(res.estimated_effects, expected)
diff --git a/linearmodels/tests/panel/test_results.py b/linearmodels/tests/panel/test_results.py
index 71663bc71b..04cabb7a2e 100644
--- a/linearmodels/tests/panel/test_results.py
+++ b/linearmodels/tests/panel/test_results.py
@@ -23,72 +23,75 @@ def data(request):
missing = [0.0, 0.02, 0.20]
has_const = [True, False]
perms = list(product(missing, datatypes, has_const))
-ids = list(map(lambda s: '-'.join(map(str, s)), perms))
+ids = list(map(lambda s: "-".join(map(str, s)), perms))
@pytest.fixture(params=perms, ids=ids)
def generated_data(request):
missing, datatype, const = request.param
- return generate_data(missing, datatype, const=const, ntk=(91, 7, 5), other_effects=2)
+ return generate_data(
+ missing, datatype, const=const, ntk=(91, 7, 5), other_effects=2
+ )
-@pytest.mark.parametrize('precision', ('tstats', 'std_errors', 'pvalues'))
+@pytest.mark.parametrize("precision", ("tstats", "std_errors", "pvalues"))
def test_single(data, precision):
- dependent = data.set_index(['nr', 'year']).lwage
- exog = add_constant(data.set_index(['nr', 'year'])[['expersq', 'married', 'union']])
+ dependent = data.set_index(["nr", "year"]).lwage
+ exog = add_constant(data.set_index(["nr", "year"])[["expersq", "married", "union"]])
res = PanelOLS(dependent, exog, entity_effects=True).fit()
comp = compare([res])
assert len(comp.rsquared) == 1
d = dir(comp)
for value in d:
- if value.startswith('_'):
+ if value.startswith("_"):
continue
getattr(comp, value)
-@pytest.mark.parametrize('precision', ('tstats', 'std_errors', 'pvalues'))
+@pytest.mark.parametrize("precision", ("tstats", "std_errors", "pvalues"))
def test_multiple(data, precision):
- dependent = data.set_index(['nr', 'year']).lwage
- exog = add_constant(data.set_index(['nr', 'year'])[['expersq', 'married', 'union']])
+ dependent = data.set_index(["nr", "year"]).lwage
+ exog = add_constant(data.set_index(["nr", "year"])[["expersq", "married", "union"]])
res = PanelOLS(dependent, exog, entity_effects=True, time_effects=True).fit()
- res2 = PanelOLS(dependent, exog, entity_effects=True).fit(cov_type='clustered',
- cluster_entity=True)
- exog = add_constant(data.set_index(['nr', 'year'])[['married', 'union']])
+ res2 = PanelOLS(dependent, exog, entity_effects=True).fit(
+ cov_type="clustered", cluster_entity=True
+ )
+ exog = add_constant(data.set_index(["nr", "year"])[["married", "union"]])
res3 = PooledOLS(dependent, exog).fit()
- exog = data.set_index(['nr', 'year'])[['exper']]
+ exog = data.set_index(["nr", "year"])[["exper"]]
res4 = RandomEffects(dependent, exog).fit()
comp = compare([res, res2, res3, res4], precision=precision)
assert len(comp.rsquared) == 4
d = dir(comp)
for value in d:
- if value.startswith('_'):
+ if value.startswith("_"):
continue
getattr(comp, value)
with pytest.raises(ValueError):
- compare([res, res2, res3, res4], precision='unknown')
+ compare([res, res2, res3, res4], precision="unknown")
def test_multiple_no_effects(data):
- dependent = data.set_index(['nr', 'year']).lwage
- exog = add_constant(data.set_index(['nr', 'year'])[['expersq', 'married', 'union']])
+ dependent = data.set_index(["nr", "year"]).lwage
+ exog = add_constant(data.set_index(["nr", "year"])[["expersq", "married", "union"]])
res = PanelOLS(dependent, exog).fit()
- exog = add_constant(data.set_index(['nr', 'year'])[['married', 'union']])
+ exog = add_constant(data.set_index(["nr", "year"])[["married", "union"]])
res3 = PooledOLS(dependent, exog).fit()
- exog = data.set_index(['nr', 'year'])[['exper']]
+ exog = data.set_index(["nr", "year"])[["exper"]]
res4 = RandomEffects(dependent, exog).fit()
comp = compare(dict(a=res, model2=res3, model3=res4))
assert len(comp.rsquared) == 3
d = dir(comp)
for value in d:
- if value.startswith('_'):
+ if value.startswith("_"):
continue
getattr(comp, value)
compare(OrderedDict(a=res, model2=res3, model3=res4))
def test_incorrect_type(data):
- dependent = data.set_index(['nr', 'year']).lwage
- exog = add_constant(data.set_index(['nr', 'year'])[['expersq', 'married', 'union']])
+ dependent = data.set_index(["nr", "year"]).lwage
+ exog = add_constant(data.set_index(["nr", "year"])[["expersq", "married", "union"]])
mod = PanelOLS(dependent, exog)
res = mod.fit()
mod2 = IV2SLS(mod.dependent.dataframe, mod.exog.dataframe, None, None)
@@ -97,41 +100,41 @@ def test_incorrect_type(data):
compare(dict(model1=res, model2=res2))
-@pytest.mark.filterwarnings('ignore::linearmodels.utility.MissingValueWarning')
+@pytest.mark.filterwarnings("ignore::linearmodels.utility.MissingValueWarning")
def test_predict(generated_data):
mod = PanelOLS(generated_data.y, generated_data.x, entity_effects=True)
res = mod.fit()
pred = res.predict()
nobs = mod.dependent.dataframe.shape[0]
- assert list(pred.columns) == ['fitted_values']
+ assert list(pred.columns) == ["fitted_values"]
assert pred.shape == (nobs, 1)
pred = res.predict(effects=True, idiosyncratic=True)
- assert list(pred.columns) == ['fitted_values', 'estimated_effects', 'idiosyncratic']
+ assert list(pred.columns) == ["fitted_values", "estimated_effects", "idiosyncratic"]
assert pred.shape == (nobs, 3)
assert_series_equal(pred.fitted_values, res.fitted_values.iloc[:, 0])
assert_series_equal(pred.estimated_effects, res.estimated_effects.iloc[:, 0])
assert_series_equal(pred.idiosyncratic, res.idiosyncratic.iloc[:, 0])
pred = res.predict(effects=True, idiosyncratic=True, missing=True)
- assert list(pred.columns) == ['fitted_values', 'estimated_effects', 'idiosyncratic']
+ assert list(pred.columns) == ["fitted_values", "estimated_effects", "idiosyncratic"]
assert pred.shape == (PanelData(generated_data.y).dataframe.shape[0], 3)
mod = PanelOLS(generated_data.y, generated_data.x)
res = mod.fit()
pred = res.predict()
- assert list(pred.columns) == ['fitted_values']
+ assert list(pred.columns) == ["fitted_values"]
assert pred.shape == (nobs, 1)
pred = res.predict(effects=True, idiosyncratic=True)
- assert list(pred.columns) == ['fitted_values', 'estimated_effects', 'idiosyncratic']
+ assert list(pred.columns) == ["fitted_values", "estimated_effects", "idiosyncratic"]
assert pred.shape == (nobs, 3)
assert_series_equal(pred.fitted_values, res.fitted_values.iloc[:, 0])
assert_series_equal(pred.estimated_effects, res.estimated_effects.iloc[:, 0])
assert_series_equal(pred.idiosyncratic, res.idiosyncratic.iloc[:, 0])
pred = res.predict(effects=True, idiosyncratic=True, missing=True)
- assert list(pred.columns) == ['fitted_values', 'estimated_effects', 'idiosyncratic']
+ assert list(pred.columns) == ["fitted_values", "estimated_effects", "idiosyncratic"]
assert pred.shape == (PanelData(generated_data.y).dataframe.shape[0], 3)
-@pytest.mark.filterwarnings('ignore::linearmodels.utility.MissingValueWarning')
+@pytest.mark.filterwarnings("ignore::linearmodels.utility.MissingValueWarning")
def test_predict_no_selection(generated_data):
mod = PanelOLS(generated_data.y, generated_data.x, entity_effects=True)
res = mod.fit()
@@ -142,8 +145,8 @@ def test_predict_no_selection(generated_data):
def test_wald_test(data):
- dependent = data.set_index(['nr', 'year']).lwage
- exog = add_constant(data.set_index(['nr', 'year'])[['expersq', 'married', 'union']])
+ dependent = data.set_index(["nr", "year"]).lwage
+ exog = add_constant(data.set_index(["nr", "year"])[["expersq", "married", "union"]])
res = PanelOLS(dependent, exog, entity_effects=True, time_effects=True).fit()
restriction = np.zeros((2, 4))
@@ -151,7 +154,7 @@ def test_wald_test(data):
restriction[1, 3] = 1
t1 = res.wald_test(restriction)
t2 = res.wald_test(restriction, np.zeros(2))
- formula = 'married = union = 0'
+ formula = "married = union = 0"
t3 = res.wald_test(formula=formula)
p = res.params.values[:, None]
c = np.asarray(res.cov)
diff --git a/linearmodels/tests/panel/test_simulated_against_stata.py b/linearmodels/tests/panel/test_simulated_against_stata.py
index 7d54701e57..10d87d6db8 100644
--- a/linearmodels/tests/panel/test_simulated_against_stata.py
+++ b/linearmodels/tests/panel/test_simulated_against_stata.py
@@ -12,23 +12,28 @@
from linearmodels.utility import AttrDict
pytestmark = pytest.mark.filterwarnings(
- 'ignore::linearmodels.utility.MissingValueWarning')
+ "ignore::linearmodels.utility.MissingValueWarning"
+)
STATA_RESULTS = parse_stata_results.data()
-MODELS = {'between': BetweenOLS, 'fixed_effect': PanelOLS, 'pooled': PooledOLS,
- 'random_effect': RandomEffects}
+MODELS = {
+ "between": BetweenOLS,
+ "fixed_effect": PanelOLS,
+ "pooled": PooledOLS,
+ "random_effect": RandomEffects,
+}
cwd = os.path.split(os.path.abspath(__file__))[0]
-sim_data = pd.read_stata(os.path.join(cwd, 'results', 'simulated-panel.dta'))
-sim_data = sim_data.set_index(['firm', 'time'])
+sim_data = pd.read_stata(os.path.join(cwd, "results", "simulated-panel.dta"))
+sim_data = sim_data.set_index(["firm", "time"])
valid = sorted(list(filter(lambda x: True, list(STATA_RESULTS.keys()))))
-@pytest.fixture(params=valid, scope='module')
+@pytest.fixture(params=valid, scope="module")
def data(request):
- model, vcv, weights, missing = request.param.split('-')
- y_vars = ['y']
- x_vars = ['x1', 'x2', 'x3', 'x4', 'x5']
+ model, vcv, weights, missing = request.param.split("-")
+ y_vars = ["y"]
+ x_vars = ["x1", "x2", "x3", "x4", "x5"]
vars = y_vars + x_vars
if missing:
for i, v in enumerate(vars):
@@ -36,45 +41,53 @@ def data(request):
y_vars = vars[:1]
x_vars = vars[1:]
y = sim_data[y_vars]
- x = sim_data[['intercept'] + x_vars]
+ x = sim_data[["intercept"] + x_vars]
mod = MODELS[model]
mod_options = {}
- if model == 'fixed_effect':
- mod_options = {'entity_effects': True}
- if weights == 'weighted':
- mod_options.update({'weights': sim_data['w']})
- fit_options = {'debiased': True}
- if weights == 'wls':
- fit_options.update({'reweight': True})
- if vcv == 'robust' and model not in ('fixed_effect', 'random_effect'):
- fit_options.update({'cov_type': 'robust'})
- elif vcv in ('cluster', 'robust'):
+ if model == "fixed_effect":
+ mod_options = {"entity_effects": True}
+ if weights == "weighted":
+ mod_options.update({"weights": sim_data["w"]})
+ fit_options = {"debiased": True}
+ if weights == "wls":
+ fit_options.update({"reweight": True})
+ if vcv == "robust" and model not in ("fixed_effect", "random_effect"):
+ fit_options.update({"cov_type": "robust"})
+ elif vcv in ("cluster", "robust"):
y_data = PanelData(y)
eid = y_data.entity_ids
- entities = pd.DataFrame(eid, index=y_data.index, columns=['firm_ids'])
- fit_options.update({'cov_type': 'clustered', 'clusters': entities})
+ entities = pd.DataFrame(eid, index=y_data.index, columns=["firm_ids"])
+ fit_options.update({"cov_type": "clustered", "clusters": entities})
else:
- fit_options.update({'cov_type': 'unadjusted'})
+ fit_options.update({"cov_type": "unadjusted"})
- if vcv == 'cluster' or (
- model in ('fixed_effect', 'random_effect') and vcv == 'robust'):
- fit_options.update({'group_debias': True})
+ if vcv == "cluster" or (
+ model in ("fixed_effect", "random_effect") and vcv == "robust"
+ ):
+ fit_options.update({"group_debias": True})
spec_mod = mod(y, x, **mod_options)
fit = spec_mod.fit(**fit_options)
- return AttrDict(fit=fit, model=spec_mod, model_options=mod_options, y=y,
- x=x,
- stata=STATA_RESULTS[request.param],
- fit_options=fit_options,
- model_name=model, vcv=vcv, weights=weights,
- missing=missing)
+ return AttrDict(
+ fit=fit,
+ model=spec_mod,
+ model_options=mod_options,
+ y=y,
+ x=x,
+ stata=STATA_RESULTS[request.param],
+ fit_options=fit_options,
+ model_name=model,
+ vcv=vcv,
+ weights=weights,
+ missing=missing,
+ )
# TODO: pvals, r2o, r2
def correct_order(stata, lm):
repl = []
for c in stata.index:
- if c == '_cons':
- repl.append('intercept')
+ if c == "_cons":
+ repl.append("intercept")
else:
repl.append(c)
stata = stata.copy()
@@ -92,9 +105,8 @@ def test_params(data):
def test_rsquared_between(data):
- if (data.weights in ('weighted', 'wls') or
- data.missing in ('_heavy', '_light')):
- pytest.xfail(reason='Respect weights in calculation')
+ if data.weights in ("weighted", "wls") or data.missing in ("_heavy", "_light"):
+ pytest.xfail(reason="Respect weights in calculation")
r2_between = data.fit.rsquared_between
if np.isnan(data.stata.r2_b):
return
@@ -102,8 +114,8 @@ def test_rsquared_between(data):
def test_rsquared_within(data):
- if data.model_name == 'between':
- pytest.xfail(reason='Use stricter definition of rsquared within')
+ if data.model_name == "between":
+ pytest.xfail(reason="Use stricter definition of rsquared within")
r2_within = data.fit.rsquared_within
if np.isnan(data.stata.r2_w):
return
@@ -115,8 +127,8 @@ def test_cov(data):
stata = data.stata
repl = []
for c in stata.variance.columns:
- if c == '_cons':
- repl.append('intercept')
+ if c == "_cons":
+ repl.append("intercept")
else:
repl.append(c)
var = stata.variance.copy()
@@ -127,10 +139,10 @@ def test_cov(data):
def test_f_pooled(data):
- f_pool = getattr(data.fit, 'f_pooled', None)
+ f_pool = getattr(data.fit, "f_pooled", None)
stata_f_pool = data.stata.F_f
if not f_pool or np.isnan(stata_f_pool):
- pytest.skip('Result not available for testing')
+ pytest.skip("Result not available for testing")
assert_allclose(f_pool.stat, stata_f_pool)
@@ -146,8 +158,8 @@ def test_t_stat(data):
stata_t = data.stata.params.tstat
repl = []
for c in stata_t.index:
- if c == '_cons':
- repl.append('intercept')
+ if c == "_cons":
+ repl.append("intercept")
else:
repl.append(c)
stata_t.index = repl
diff --git a/linearmodels/tests/panel/test_utility.py b/linearmodels/tests/panel/test_utility.py
index 9b0d3770f8..15b84561c4 100644
--- a/linearmodels/tests/panel/test_utility.py
+++ b/linearmodels/tests/panel/test_utility.py
@@ -9,13 +9,19 @@
from linearmodels.panel.utility import (dummy_matrix, in_2core_graph,
in_2core_graph_slow, preconditioner)
-formats = {'csc': scipy.sparse.csc.csc_matrix, 'csr': scipy.sparse.csr.csr_matrix,
- 'coo': scipy.sparse.coo.coo_matrix, 'array': np.ndarray}
+formats = {
+ "csc": scipy.sparse.csc.csc_matrix,
+ "csr": scipy.sparse.csr.csr_matrix,
+ "coo": scipy.sparse.coo.coo_matrix,
+ "array": np.ndarray,
+}
-pytestmark = pytest.mark.filterwarnings('ignore:the matrix subclass:PendingDeprecationWarning')
+pytestmark = pytest.mark.filterwarnings(
+ "ignore:the matrix subclass:PendingDeprecationWarning"
+)
-@pytest.fixture(scope='module', params=formats)
+@pytest.fixture(scope="module", params=formats)
def format(request):
return request.param, formats[request.param]
@@ -40,7 +46,7 @@ def test_dummy_last():
cats[10:, 0] = 2
cats[:, 1] = np.arange(15) % 5
cats[-1, 1] = 0
- out, _ = dummy_matrix(cats, drop='last', precondition=False)
+ out, _ = dummy_matrix(cats, drop="last", precondition=False)
assert isinstance(out, scipy.sparse.csc.csc_matrix)
assert out.shape == (15, 3 + 5 - 1)
expected = np.array([5, 5, 5, 4, 3, 3, 3], dtype=np.int32)
@@ -52,14 +58,14 @@ def test_invalid_format():
cats = np.zeros([10, 1], dtype=np.int8)
cats[5:, 0] = 1
with pytest.raises(ValueError):
- dummy_matrix(cats, format='unknown', precondition=False)
+ dummy_matrix(cats, format="unknown", precondition=False)
def test_dummy_pandas():
- c1 = pd.Series(pd.Categorical(['a'] * 5 + ['b'] * 5 + ['c'] * 5))
- c2 = pd.Series(pd.Categorical(['A', 'B', 'C', 'D', 'E'] * 3))
+ c1 = pd.Series(pd.Categorical(["a"] * 5 + ["b"] * 5 + ["c"] * 5))
+ c2 = pd.Series(pd.Categorical(["A", "B", "C", "D", "E"] * 3))
cats = pd.concat([c1, c2], 1)
- out, _ = dummy_matrix(cats, drop='last', precondition=False)
+ out, _ = dummy_matrix(cats, drop="last", precondition=False)
assert isinstance(out, scipy.sparse.csc.csc_matrix)
assert out.shape == (15, 3 + 5 - 1)
expected = np.array([5, 5, 5, 3, 3, 3, 3], dtype=np.int32)
@@ -67,12 +73,14 @@ def test_dummy_pandas():
def test_dummy_precondition():
- c1 = pd.Series(pd.Categorical(['a'] * 5 + ['b'] * 5 + ['c'] * 5))
- c2 = pd.Series(pd.Categorical(['A', 'B', 'C', 'D', 'E'] * 3))
+ c1 = pd.Series(pd.Categorical(["a"] * 5 + ["b"] * 5 + ["c"] * 5))
+ c2 = pd.Series(pd.Categorical(["A", "B", "C", "D", "E"] * 3))
cats = pd.concat([c1, c2], 1)
- out_arr, cond_arr = dummy_matrix(cats, format='array', drop='last', precondition=True)
- out_csc, cond_csc = dummy_matrix(cats, format='csc', drop='last', precondition=True)
- out_csr, cond_csr = dummy_matrix(cats, format='csr', drop='last', precondition=True)
+ out_arr, cond_arr = dummy_matrix(
+ cats, format="array", drop="last", precondition=True
+ )
+ out_csc, cond_csc = dummy_matrix(cats, format="csc", drop="last", precondition=True)
+ out_csr, cond_csr = dummy_matrix(cats, format="csr", drop="last", precondition=True)
assert_allclose((out_arr ** 2).sum(0), np.ones(out_arr.shape[1]))
assert_allclose((out_csc.multiply(out_csc)).sum(0).A1, np.ones(out_arr.shape[1]))
assert_allclose(cond_arr, cond_csc)
@@ -108,23 +116,23 @@ def test_drop_singletons_slow():
idx = np.arange(40000)
- cols = {'c1': c1.copy(), 'c2': c2.copy()}
+ cols = {"c1": c1.copy(), "c2": c2.copy()}
for i in range(40000):
- last = cols['c1'].shape[0]
+ last = cols["c1"].shape[0]
for col in cols:
keep = in_2core_graph_slow(cols[col])
for col2 in cols:
cols[col2] = cols[col2][keep]
idx = idx[keep]
- if cols['c1'].shape[0] == last:
+ if cols["c1"].shape[0] == last:
break
expected = np.concatenate([c1[idx], c2[idx]], 1)
assert_array_equal(nonsingletons, expected)
- expected = np.concatenate([cols['c1'], cols['c2']], 1)
+ expected = np.concatenate([cols["c1"], cols["c2"]], 1)
assert_array_equal(nonsingletons, expected)
- dummies, _ = dummy_matrix(cats, format='csr', precondition=False)
+ dummies, _ = dummy_matrix(cats, format="csr", precondition=False)
to_drop = dummies[~retain]
assert to_drop.sum() == 2 * (~retain).sum()
@@ -154,10 +162,12 @@ def test_drop_singletons_pandas():
rs = np.random.RandomState(0)
c1 = rs.randint(0, 10000, (40000, 1))
c2 = rs.randint(0, 20000, (40000, 1))
- df = [pd.Series(['{0}{1}'.format(let, c) for c in cat.ravel()], dtype='category')
- for let, cat in zip('AB', (c1, c2))]
+ df = [
+ pd.Series(["{0}{1}".format(let, c) for c in cat.ravel()], dtype="category")
+ for let, cat in zip("AB", (c1, c2))
+ ]
df = pd.concat(df, 1)
- df.columns = ['cat1', 'cat2']
+ df.columns = ["cat1", "cat2"]
cats = df
remain = in_2core_graph(cats)
expected = in_2core_graph_slow(cats)
diff --git a/linearmodels/tests/system/_utility.py b/linearmodels/tests/system/_utility.py
index 1d4e17e1a1..90d2190068 100644
--- a/linearmodels/tests/system/_utility.py
+++ b/linearmodels/tests/system/_utility.py
@@ -6,8 +6,17 @@
from linearmodels.utility import AttrDict
-def generate_data(n=500, k=10, p=3, const=True, rho=0.8, common_exog=False,
- included_weights=False, output_dict=True, seed=1234):
+def generate_data(
+ n=500,
+ k=10,
+ p=3,
+ const=True,
+ rho=0.8,
+ common_exog=False,
+ included_weights=False,
+ output_dict=True,
+ seed=1234,
+):
np.random.seed(seed)
p = np.array(p)
if p.ndim == 0:
@@ -34,13 +43,13 @@ def generate_data(n=500, k=10, p=3, const=True, rho=0.8, common_exog=False,
if included_weights:
w = np.random.chisquare(5, (n, 1)) / 5
if output_dict:
- data['equ.{0}'.format(i)] = {'dependent': y, 'exog': x}
+ data["equ.{0}".format(i)] = {"dependent": y, "exog": x}
if included_weights:
- data['equ.{0}'.format(i)]['weights'] = w
+ data["equ.{0}".format(i)]["weights"] = w
else:
- data['equ.{0}'.format(i)] = (y, x)
+ data["equ.{0}".format(i)] = (y, x)
if included_weights:
- data['equ.{0}'.format(i)] = tuple(list(data['equ.{0}'.format(i)]) + [w])
+ data["equ.{0}".format(i)] = tuple(list(data["equ.{0}".format(i)]) + [w])
return data
@@ -52,9 +61,21 @@ def atleast_k_elem(x, k):
return x
-def generate_3sls_data(n=500, k=10, p=3, en=2, instr=3, const=True, rho=0.8, kappa=0.5,
- beta=0.5, common_exog=False, included_weights=False, output_dict=True,
- seed=1234):
+def generate_3sls_data(
+ n=500,
+ k=10,
+ p=3,
+ en=2,
+ instr=3,
+ const=True,
+ rho=0.8,
+ kappa=0.5,
+ beta=0.5,
+ common_exog=False,
+ included_weights=False,
+ output_dict=True,
+ seed=1234,
+):
np.random.seed(seed)
p = atleast_k_elem(p, k)
en = atleast_k_elem(en, k)
@@ -79,25 +100,27 @@ def generate_3sls_data(n=500, k=10, p=3, en=2, instr=3, const=True, rho=0.8, kap
for i, _p, _en, _instr in zip(range(k), p, en, instr):
total = _p + _en + _instr
corr = np.eye(_p + _en + _instr + 1)
- corr[_p:_p + _en, _p:_p + _en] = kappa * np.eye(_en)
- corr[_p:_p + _en, -1] = np.sqrt(1 - kappa ** 2) * np.ones(_en)
- corr[_p + _en:_p + _en + _instr, _p:_p + _en] = beta * np.ones((_instr, _en))
+ corr[_p : _p + _en, _p : _p + _en] = kappa * np.eye(_en)
+ corr[_p : _p + _en, -1] = np.sqrt(1 - kappa ** 2) * np.ones(_en)
+ corr[_p + _en : _p + _en + _instr, _p : _p + _en] = beta * np.ones(
+ (_instr, _en)
+ )
if _instr > 0:
val = np.sqrt(1 - beta ** 2) / _instr * np.eye(_instr)
- corr[_p + _en:_p + _en + _instr, _p + _en:_p + _en + _instr] = val
+ corr[_p + _en : _p + _en + _instr, _p + _en : _p + _en + _instr] = val
if common_exog:
shocks = np.random.standard_normal((n, total))
common_shocks = common_shocks if common_shocks is not None else shocks
else:
shocks = np.random.standard_normal((n, total))
- shocks = np.concatenate([shocks, eps[:, count:count + 1]], 1)
+ shocks = np.concatenate([shocks, eps[:, count : count + 1]], 1)
variables = shocks @ corr.T
- x = variables[:, :_p + _en]
+ x = variables[:, : _p + _en]
exog = variables[:, :_p]
- endog = variables[:, _p:_p + _en]
- instr = variables[:, _p + _en:total]
- e = variables[:, total:total + 1]
+ endog = variables[:, _p : _p + _en]
+ instr = variables[:, _p + _en : total]
+ e = variables[:, total : total + 1]
if const:
x = np.c_[np.ones((n, 1)), x]
exog = np.c_[np.ones((n, 1)), exog]
@@ -111,15 +134,19 @@ def generate_3sls_data(n=500, k=10, p=3, en=2, instr=3, const=True, rho=0.8, kap
if _instr == 0:
instr = None
if output_dict:
- data['equ.{0}'.format(count)] = {'dependent': dep, 'exog': exog,
- 'endog': endog, 'instruments': instr}
+ data["equ.{0}".format(count)] = {
+ "dependent": dep,
+ "exog": exog,
+ "endog": endog,
+ "instruments": instr,
+ }
if included_weights:
- data['equ.{0}'.format(count)]['weights'] = w
+ data["equ.{0}".format(count)]["weights"] = w
else:
if included_weights:
- data['equ.{0}'.format(count)] = (dep, exog, endog, instr, w)
+ data["equ.{0}".format(count)] = (dep, exog, endog, instr, w)
else:
- data['equ.{0}'.format(count)] = (dep, exog, endog, instr)
+ data["equ.{0}".format(count)] = (dep, exog, endog, instr)
count += 1
return data
@@ -134,8 +161,8 @@ def simple_sur(y, x):
b.append(lstsq(x[i], y[i])[0])
eps.append(y[i] - x[i] @ b[-1])
b = np.vstack(b)
- out['beta0'] = b
- out['eps0'] = eps
+ out["beta0"] = b
+ out["eps0"] = eps
eps = np.hstack(eps)
nobs = eps.shape[0]
sigma = eps.T @ eps / nobs
@@ -153,12 +180,12 @@ def simple_sur(y, x):
row = np.hstack(row)
bx.append(row)
bx = np.vstack(bx)
- xpx = (bx.T @ omegainv @ bx)
- xpy = (bx.T @ omegainv @ by)
+ xpx = bx.T @ omegainv @ bx
+ xpy = bx.T @ omegainv @ by
beta1 = np.linalg.solve(xpx, xpy)
- out['beta1'] = beta1
- out['xpx'] = xpx
- out['xpy'] = xpy
+ out["beta1"] = beta1
+ out["xpx"] = xpx
+ out["xpy"] = xpy
return out
@@ -174,12 +201,12 @@ def simple_3sls(y, x, z):
b.append(lstsq(xhat[i], y[i])[0])
eps.append(y[i] - x[i] @ b[-1])
b = np.vstack(b)
- out['beta0'] = b
- out['eps0'] = eps
+ out["beta0"] = b
+ out["eps0"] = eps
eps = np.hstack(eps)
nobs = eps.shape[0]
sigma = eps.T @ eps / nobs
- out['sigma'] = sigma
+ out["sigma"] = sigma
omega = np.kron(sigma, np.eye(nobs))
omegainv = np.linalg.inv(omega)
by = np.vstack([y[i] for i in range(k)])
@@ -194,46 +221,48 @@ def simple_3sls(y, x, z):
row = np.hstack(row)
bx.append(row)
bx = np.vstack(bx)
- xpx = (bx.T @ omegainv @ bx)
- xpy = (bx.T @ omegainv @ by)
+ xpx = bx.T @ omegainv @ bx
+ xpy = bx.T @ omegainv @ by
beta1 = np.linalg.solve(xpx, xpy)
- out['beta1'] = beta1
- out['xpx'] = xpx
- out['xpy'] = xpy
+ out["beta1"] = beta1
+ out["xpx"] = xpx
+ out["xpy"] = xpy
idx = 0
eps = []
for i in range(k):
k = x[i].shape[1]
- b = beta1[idx:idx + k]
+ b = beta1[idx : idx + k]
eps.append(y[i] - x[i] @ b)
idx += k
eps = np.hstack(eps)
nobs = eps.shape[0]
sigma = eps.T @ eps / nobs
- out['eps'] = eps
- out['cov'] = np.linalg.inv(bx.T @ omegainv @ bx)
+ out["eps"] = eps
+ out["cov"] = np.linalg.inv(bx.T @ omegainv @ bx)
return out
def convert_to_pandas(a, base):
k = a.shape[1]
- cols = [base + '_{0}'.format(i) for i in range(k)]
+ cols = [base + "_{0}".format(i) for i in range(k)]
return pd.DataFrame(a, columns=cols)
-def generate_simultaneous_data(n=500, nsystem=3, nexog=3, ninstr=2, const=True, seed=1234):
+def generate_simultaneous_data(
+ n=500, nsystem=3, nexog=3, ninstr=2, const=True, seed=1234
+):
np.random.seed(seed)
k = nexog + nsystem * ninstr
beta = np.random.chisquare(3, (k, nsystem)) / 3
gam = np.random.standard_normal((nsystem, nsystem)) / np.sqrt(3)
- gam.flat[::nsystem + 1] = 1.0
+ gam.flat[:: nsystem + 1] = 1.0
x = np.random.standard_normal((n, k))
for i in range(nsystem):
mask = np.zeros(k)
mask[:nexog] = 1.0
- mask[nexog + i * ninstr: nexog + (i + 1) * ninstr] = 1.0
+ mask[nexog + i * ninstr : nexog + (i + 1) * ninstr] = 1.0
beta[:, i] *= mask
if const:
x = np.concatenate([np.ones((n, 1)), x], 1)
@@ -243,10 +272,10 @@ def generate_simultaneous_data(n=500, nsystem=3, nexog=3, ninstr=2, const=True,
gaminv = np.linalg.inv(gam)
y = x @ beta @ gaminv + eps @ gaminv
eqns = {}
- deps = convert_to_pandas(np.squeeze(y), 'dependent')
- exogs = convert_to_pandas(x, 'exog')
+ deps = convert_to_pandas(np.squeeze(y), "dependent")
+ exogs = convert_to_pandas(x, "exog")
if const:
- exogs.columns = ['const'] + list(exogs.columns[1:])
+ exogs.columns = ["const"] + list(exogs.columns[1:])
for i in range(nsystem):
dep = deps.iloc[:, i]
idx = sorted(set(range(nsystem)).difference([i]))
@@ -258,14 +287,24 @@ def generate_simultaneous_data(n=500, nsystem=3, nexog=3, ninstr=2, const=True,
ex_idx = list(range(const + nexog)) + list(drop)
exog = exogs.iloc[:, ex_idx]
idx = set(range(const + nexog, x.shape[1]))
- instr = convert_to_pandas(x[:, sorted(idx.difference(drop))], 'instruments')
+ instr = convert_to_pandas(x[:, sorted(idx.difference(drop))], "instruments")
eqn = dict(dependent=dep, exog=exog, endog=endog, instruments=instr)
eqns[dep.name] = eqn
return eqns
-def generate_3sls_data_v2(n=500, k=3, nexog=3, nendog=2, ninstr=3, const=True, rho=0.5,
- output_dict=True, seed=1234, omitted='none'):
+def generate_3sls_data_v2(
+ n=500,
+ k=3,
+ nexog=3,
+ nendog=2,
+ ninstr=3,
+ const=True,
+ rho=0.5,
+ output_dict=True,
+ seed=1234,
+ omitted="none",
+):
np.random.seed(seed)
eqns = AttrDict()
for i in range(k):
@@ -288,25 +327,26 @@ def generate_3sls_data_v2(n=500, k=3, nexog=3, nendog=2, ninstr=3, const=True, r
x = np.hstack([np.ones((n, 1)), x])
exog = np.hstack([np.ones((n, 1)), exog])
dep = x @ params + eps + nendog * np.random.standard_normal((n, 1))
- if omitted == 'none' or omitted == 'drop':
+ if omitted == "none" or omitted == "drop":
if exog.shape[1] == 0:
exog = None
if endog.shape[1] == 0:
endog = None
if instr.shape[1] == 0:
instr = None
- eqn = AttrDict(dependent=dep, exog=exog, endog=endog, instruments=instr,
- params=params)
- eqns['eqn.{0}'.format(i)] = eqn
+ eqn = AttrDict(
+ dependent=dep, exog=exog, endog=endog, instruments=instr, params=params
+ )
+ eqns["eqn.{0}".format(i)] = eqn
if not output_dict:
for key in eqns:
eq = eqns[key]
eqns[key] = (eq.dependent, eq.exog, eq.endog, eq.instruments)
else:
- if omitted == 'drop':
+ if omitted == "drop":
for key in eqns:
eq = eqns[key]
- for key2 in ('exog', 'endog', 'instruments'):
+ for key2 in ("exog", "endog", "instruments"):
if eq[key2] is None:
del eq[key2]
@@ -323,7 +363,7 @@ def simple_gmm(y, x, z, robust=True, steps=2):
idx = 0
for i in range(len(x)):
_k = x[i].shape[1]
- _x[nobs * i:nobs * (i + 1), idx:idx + _k] = x[i]
+ _x[nobs * i : nobs * (i + 1), idx : idx + _k] = x[i]
idx += _k
x = _x
@@ -331,7 +371,7 @@ def simple_gmm(y, x, z, robust=True, steps=2):
_z = np.zeros((k * nobs, kz))
for i in range(len(z)):
_k = z[i].shape[1]
- _z[nobs * i:nobs * (i + 1), idx:idx + _k] = z[i]
+ _z[nobs * i : nobs * (i + 1), idx : idx + _k] = z[i]
idx += _k
z = _z
@@ -384,5 +424,12 @@ def simple_gmm(y, x, z, robust=True, steps=2):
ze = z * eps
g_bar = ze.sum(0) / nobs
j_stat = nobs * g_bar @ wi @ g_bar
- return AttrDict(beta0=beta0.ravel(), beta1=beta1.ravel(), w0=w0,
- w1=w, sigma=sigma, cov=cov, j_stat=j_stat)
+ return AttrDict(
+ beta0=beta0.ravel(),
+ beta1=beta1.ravel(),
+ w0=w0,
+ w1=w,
+ sigma=sigma,
+ cov=cov,
+ j_stat=j_stat,
+ )
diff --git a/linearmodels/tests/system/results/execute-stata-3sls.py b/linearmodels/tests/system/results/execute-stata-3sls.py
index d65ef491c7..1ab9fd10b8 100644
--- a/linearmodels/tests/system/results/execute-stata-3sls.py
+++ b/linearmodels/tests/system/results/execute-stata-3sls.py
@@ -14,7 +14,7 @@
out = []
for key in data:
eqn = data[key]
- for key in ('exog', 'endog'):
+ for key in ("exog", "endog"):
vals = eqn[key]
for col in vals:
if col in all_cols:
@@ -23,9 +23,9 @@
out.append(vals[col])
all_cols.append(col)
out = concat(out, 1)
-if 'const' in out:
- out.pop('const')
-out.to_stata('simulated-3sls.dta', write_index=False)
+if "const" in out:
+ out.pop("const")
+out.to_stata("simulated-3sls.dta", write_index=False)
SEP = """
file open myfile using {outfile}, write append
@@ -41,17 +41,22 @@
(dependent_2 dependent_0 dependent_1 exog_1 exog_2 exog_3 exog_8 exog_9), {method}
"""
-STATA_PATH = os.path.join('C:\\', 'Program Files (x86)', 'Stata13', 'StataMP-64.exe')
-OUTFILE = os.path.join(os.getcwd(), 'stata-3sls-results.txt')
+STATA_PATH = os.path.join("C:\\", "Program Files (x86)", "Stata15", "StataMP-64.exe")
+OUTFILE = os.path.join(os.getcwd(), "stata-3sls-results.txt")
-header = [r'use "C:\git\linearmodels\linearmodels\tests\system\results\simulated-3sls.dta", clear']
+header = [
+ r'use "C:\git\linearmodels\linearmodels\tests\system\results\simulated-3sls.dta", clear'
+]
-all_stats = 'estout using {outfile}, cells(b(fmt(%13.12g)) t(fmt(%13.12g)) p(fmt(%13.12g))) stats('
-stats = ['chi2_{0}', 'F_{0}', 'p_{0}', 'df_m{0}', 'mss_{0}', 'r2_{0}', 'rss_{0}']
+all_stats = "estout using {outfile}, cells(b(fmt(%13.12g)) t(fmt(%13.12g)) p(fmt(%13.12g))) stats("
+stats = ["chi2_{0}", "F_{0}", "p_{0}", "df_m{0}", "mss_{0}", "r2_{0}", "rss_{0}"]
for i in range(1, 4):
- all_stats += ' '.join(map(lambda s: s.format(i), stats)) + ' '
-all_stats += ') append'
-output = all_stats + '\n' + """
+ all_stats += " ".join(map(lambda s: s.format(i), stats)) + " "
+all_stats += ") append"
+output = (
+ all_stats
+ + "\n"
+ + """
file open myfile using {outfile}, write append
file write myfile "*********** Variance ****************" _n
@@ -69,21 +74,22 @@
estout matrix(Sigma, fmt(%13.12g)) using {outfile}, append
"""
+)
output = output.format(outfile=OUTFILE)
-methods = ('3sls', '2sls', 'ols', 'sur', '3sls ireg3')
+methods = ("3sls", "2sls", "ols", "sur", "3sls ireg3")
-with open('three-sls.do', 'w') as stata_file:
- stata_file.write('\n\n'.join(header))
+with open("three-sls.do", "w") as stata_file:
+ stata_file.write("\n\n".join(header))
for method in methods:
stata_file.write(SEP.format(method=method, outfile=OUTFILE))
- stata_file.write('\n\n'.join([CMD.format(method=method), output]))
+ stata_file.write("\n\n".join([CMD.format(method=method), output]))
if os.path.exists(OUTFILE):
os.unlink(OUTFILE)
-do_file = os.path.join(os.getcwd(), 'three-sls.do')
-cmd = [STATA_PATH, '/e', 'do', do_file]
-print(' '.join(cmd))
+do_file = os.path.join(os.getcwd(), "three-sls.do")
+cmd = [STATA_PATH, "/e", "do", do_file]
+print(" ".join(cmd))
subprocess.call(cmd)
diff --git a/linearmodels/tests/system/results/execute-stata.py b/linearmodels/tests/system/results/execute-stata.py
index 2360e3a89b..d526468e0c 100644
--- a/linearmodels/tests/system/results/execute-stata.py
+++ b/linearmodels/tests/system/results/execute-stata.py
@@ -12,17 +12,22 @@
from linearmodels.tests.system._utility import generate_data
-STATA_PATH = os.path.join('C:\\', 'Program Files (x86)', 'Stata13', 'StataMP-64.exe')
-OUTFILE = os.path.join(os.getcwd(), 'stata-sur-results.txt')
+STATA_PATH = os.path.join("C:\\", "Program Files (x86)", "Stata13", "StataMP-64.exe")
+OUTFILE = os.path.join(os.getcwd(), "stata-sur-results.txt")
-header = [r'use "C:\git\linearmodels\linearmodels\tests\system\results\simulated-sur.dta", clear']
+header = [
+ r'use "C:\git\linearmodels\linearmodels\tests\system\results\simulated-sur.dta", clear'
+]
-all_stats = 'estout using {outfile}, cells(b(fmt(%13.12g)) t(fmt(%13.12g)) p(fmt(%13.12g))) stats('
-stats = ['chi2_{0}', 'F_{0}', 'p_{0}', 'df_m{0}', 'mss_{0}', 'r2_{0}', 'rss_{0}']
+all_stats = "estout using {outfile}, cells(b(fmt(%13.12g)) t(fmt(%13.12g)) p(fmt(%13.12g))) stats("
+stats = ["chi2_{0}", "F_{0}", "p_{0}", "df_m{0}", "mss_{0}", "r2_{0}", "rss_{0}"]
for i in range(1, 4):
- all_stats += ' '.join(map(lambda s: s.format(i), stats)) + ' '
-all_stats += ') append'
-output = all_stats + '\n' + """
+ all_stats += " ".join(map(lambda s: s.format(i), stats)) + " "
+all_stats += ") append"
+output = (
+ all_stats
+ + "\n"
+ + """
file open myfile using {outfile}, write append
file write myfile "*********** Variance ****************" _n
@@ -40,6 +45,7 @@
estout matrix(Sigma, fmt(%13.12g)) using {outfile}, append
"""
+)
output = output.format(outfile=OUTFILE)
data = generate_data(n=200, k=3, p=[2, 3, 4], const=True, seed=0)
@@ -48,63 +54,63 @@
cmds = []
for i, dataset in enumerate((data, common_data, missing_data)):
- base = 'mod_{0}'.format(i)
- cmd = ''
+ base = "mod_{0}".format(i)
+ cmd = ""
for j, key in enumerate(dataset):
- dep = dataset[key]['dependent']
- dep = pd.DataFrame(dep, columns=[base + '_y_{0}'.format(j)])
- exog = dataset[key]['exog'][:, 1:]
- exog_cols = [base + '_x_{0}{1}'.format(j, k) for k in range(exog.shape[1])]
+ dep = dataset[key]["dependent"]
+ dep = pd.DataFrame(dep, columns=[base + "_y_{0}".format(j)])
+ exog = dataset[key]["exog"][:, 1:]
+ exog_cols = [base + "_x_{0}{1}".format(j, k) for k in range(exog.shape[1])]
exog = pd.DataFrame(exog, columns=exog_cols)
if i != 1 or j == 0:
- cmd += ' ( ' + ' '.join(list(dep.columns) + list(exog.columns)) + ' ) '
+ cmd += " ( " + " ".join(list(dep.columns) + list(exog.columns)) + " ) "
else:
- new_cmd = cmd[:cmd.find(')') + 1]
- new_cmd = new_cmd.replace('mod_1_y_0', 'mod_1_y_{0}'.format(j))
+ new_cmd = cmd[: cmd.find(")") + 1]
+ new_cmd = new_cmd.replace("mod_1_y_0", "mod_1_y_{0}".format(j))
cmd += new_cmd
cmds.append(cmd)
outcmds = {}
-key_bases = ['basic', 'common', 'missing']
+key_bases = ["basic", "common", "missing"]
for key_base, cmd in zip(key_bases, cmds):
- base = 'sureg ' + cmd
- ss = base + ', small dfk'
- comp = cmd.replace('(', '').strip().split(')')[:-1]
+ base = "sureg " + cmd
+ ss = base + ", small dfk"
+ comp = cmd.replace("(", "").strip().split(")")[:-1]
comp = list(map(lambda s: s.strip(), comp))
- deps = [c.split(' ')[0] for c in comp]
- first = [c.split(' ')[1] for c in comp]
+ deps = [c.split(" ")[0] for c in comp]
+ first = [c.split(" ")[1] for c in comp]
vals = {}
i = 0
for d, f in zip(deps, first):
- vals['y' + str(i)] = d
- vals['x' + str(i)] = f
+ vals["y" + str(i)] = d
+ vals["x" + str(i)] = f
i += 1
constraint = """
constraint 1 [{y0}]{x0} = [{y1}]{x1}
constraint 2 [{y0}]{x0} = [{y2}]{x2}
"""
- cons = constraint.format(**vals) + base + ', const (1 2)'
- outcmds[key_base + '-base'] = base
- outcmds[key_base + '-ss'] = ss
- outcmds[key_base + '-constrained'] = cons
+ cons = constraint.format(**vals) + base + ", const (1 2)"
+ outcmds[key_base + "-base"] = base
+ outcmds[key_base + "-ss"] = ss
+ outcmds[key_base + "-constrained"] = cons
sep = """
file open myfile using {outfile}, write append \n
file write myfile "#################!{key}!####################" _n \n
file close myfile\n
"""
-with open('sur.do', 'w') as stata_file:
- stata_file.write('\n'.join(header) + '\n')
+with open("sur.do", "w") as stata_file:
+ stata_file.write("\n".join(header) + "\n")
for outcmd in outcmds:
stata_file.write(sep.format(outfile=OUTFILE, key=outcmd))
- stata_file.write(outcmds[outcmd] + '\n')
- stata_file.write('\n{0}\n\n'.format(output))
- stata_file.write('\n' * 5)
+ stata_file.write(outcmds[outcmd] + "\n")
+ stata_file.write("\n{0}\n\n".format(output))
+ stata_file.write("\n" * 5)
if os.path.exists(OUTFILE):
os.unlink(OUTFILE)
-do_file = os.path.join(os.getcwd(), 'sur.do')
-cmd = [STATA_PATH, '/e', 'do', do_file]
-print(' '.join(cmd))
+do_file = os.path.join(os.getcwd(), "sur.do")
+cmd = [STATA_PATH, "/e", "do", do_file]
+print(" ".join(cmd))
subprocess.call(cmd)
diff --git a/linearmodels/tests/system/results/generate_data.py b/linearmodels/tests/system/results/generate_data.py
index 838525fb9b..55beda8d0b 100644
--- a/linearmodels/tests/system/results/generate_data.py
+++ b/linearmodels/tests/system/results/generate_data.py
@@ -19,33 +19,33 @@
np.random.seed(1234)
for key in missing_data:
- dep = missing_data[key]['dependent']
+ dep = missing_data[key]["dependent"]
locs = np.where(np.random.random_sample(dep.shape[0]) < 0.02)[0]
if np.any(locs):
dep.flat[locs] = np.nan
- exog = missing_data[key]['exog']
+ exog = missing_data[key]["exog"]
locs = np.where(np.random.random_sample(np.prod(exog.shape)) < 0.02)[0]
if np.any(locs):
exog.flat[locs] = np.nan
out = []
for i, dataset in enumerate((basic_data, common_data, missing_data)):
- base = 'mod_{0}'.format(i)
+ base = "mod_{0}".format(i)
for j, key in enumerate(dataset):
- dep = dataset[key]['dependent']
- dep = pd.DataFrame(dep, columns=[base + '_y_{0}'.format(j)])
- dataset[key]['dependent'] = dep
- exog = dataset[key]['exog'][:, 1:]
- exog_cols = [base + '_x_{0}{1}'.format(j, k) for k in range(exog.shape[1])]
+ dep = dataset[key]["dependent"]
+ dep = pd.DataFrame(dep, columns=[base + "_y_{0}".format(j)])
+ dataset[key]["dependent"] = dep
+ exog = dataset[key]["exog"][:, 1:]
+ exog_cols = [base + "_x_{0}{1}".format(j, k) for k in range(exog.shape[1])]
exog = pd.DataFrame(exog, columns=exog_cols)
exog = exog.copy()
- exog['cons'] = 1.0
- dataset[key]['exog'] = exog
+ exog["cons"] = 1.0
+ dataset[key]["exog"] = exog
if i != 1 or j == 0:
out.extend([dep, exog])
else:
out.extend([dep])
-if __name__ == '__main__':
+if __name__ == "__main__":
df = concat(out, 1)
- df.to_stata('simulated-sur.dta')
+ df.to_stata("simulated-sur.dta")
diff --git a/linearmodels/tests/system/results/parse_stata_3sls_results.py b/linearmodels/tests/system/results/parse_stata_3sls_results.py
index 135abae1fb..bb6640413d 100644
--- a/linearmodels/tests/system/results/parse_stata_3sls_results.py
+++ b/linearmodels/tests/system/results/parse_stata_3sls_results.py
@@ -11,75 +11,77 @@
def process_block(results):
for i, line in enumerate(results):
- if line.startswith('chi2_1'):
+ if line.startswith("chi2_1"):
stat_start = i
- elif '* Variance' in line:
+ elif "* Variance" in line:
variance_start = i + 2
- elif '* Sigma' in line:
+ elif "* Sigma" in line:
sigma_start = i + 2
param_results = results[:stat_start]
- stats = results[stat_start:variance_start - 2]
- variance = results[variance_start:sigma_start - 2]
+ stats = results[stat_start : variance_start - 2]
+ variance = results[variance_start : sigma_start - 2]
sigma = results[sigma_start:]
def parse_block(block):
- values = pd.read_csv(StringIO('\n'.join(block)), header=None)
+ values = pd.read_csv(StringIO("\n".join(block)), header=None)
nums = np.asarray(values.iloc[:, -1])
nums = np.reshape(nums, (len(nums) // 3, 3))
- values = pd.DataFrame(nums, index=values.iloc[::3, 0], columns=['param', 'tstat', 'pval'])
- values.index.name = ''
+ values = pd.DataFrame(
+ nums, index=values.iloc[::3, 0], columns=["param", "tstat", "pval"]
+ )
+ values.index.name = ""
return values
params = {}
block = []
key = None
for line in param_results[2:]:
- contents = list(map(lambda s: s.strip(), line.split('\t')))
- if contents[0] != '' and contents[1] == '':
+ contents = list(map(lambda s: s.strip(), line.split("\t")))
+ if contents[0] != "" and contents[1] == "":
if key is not None:
params[key] = parse_block(block)
key = contents[0]
block = []
else:
- block.append(','.join(contents))
+ block.append(",".join(contents))
params[key] = parse_block(block)
stat_values = AttrDict()
for line in stats:
- contents = line.strip().split('\t')
+ contents = line.strip().split("\t")
if len(contents) > 1 and contents[0] and contents[1]:
stat_values[contents[0]] = float(contents[1])
stats = stat_values
- variance = list(map(lambda s: s.replace('\t', ','), variance))
+ variance = list(map(lambda s: s.replace("\t", ","), variance))
header = variance[0]
block = []
for line in variance[1:]:
- if ',,,' in line:
+ if ",,," in line:
continue
else:
block.append(line)
- out = pd.read_csv(StringIO(''.join([header] + block)))
+ out = pd.read_csv(StringIO("".join([header] + block)))
out = out.iloc[:, 1:]
- out.index = header.strip().split(',')[1:]
+ out.index = header.strip().split(",")[1:]
vcv = out
- sigma = list(map(lambda s: s.replace('\t', ','), sigma))
- sigma = pd.read_csv(StringIO(''.join(sigma)), index_col=0)
+ sigma = list(map(lambda s: s.replace("\t", ","), sigma))
+ sigma = pd.read_csv(StringIO("".join(sigma)), index_col=0)
return AttrDict(sigma=sigma, params=params, variance=vcv, stats=stats)
-with open(os.path.join(base, 'stata-3sls-results.txt'), 'r') as stata_results:
+with open(os.path.join(base, "stata-3sls-results.txt"), "r") as stata_results:
stata_results = stata_results.readlines()
block = []
results = {}
key = None
for line in stata_results:
- if '!!!!' in line:
+ if "!!!!" in line:
if key is not None:
results[key] = process_block(block)
- key = line.replace('!', '').strip()
+ key = line.replace("!", "").strip()
block = []
else:
block.append(line)
diff --git a/linearmodels/tests/system/results/parse_stata_results.py b/linearmodels/tests/system/results/parse_stata_results.py
index cd634601af..a072cd81c2 100644
--- a/linearmodels/tests/system/results/parse_stata_results.py
+++ b/linearmodels/tests/system/results/parse_stata_results.py
@@ -5,22 +5,22 @@
from linearmodels.utility import AttrDict
-filename = 'stata-sur-results.txt'
+filename = "stata-sur-results.txt"
cwd = os.path.split(os.path.abspath(__file__))[0]
-with open(os.path.join(cwd, filename), 'r') as results_file:
+with open(os.path.join(cwd, filename), "r") as results_file:
results = results_file.readlines()
blocks = {}
block = []
-key = ''
+key = ""
for line in results:
- if '###!' in line:
+ if "###!" in line:
if block:
blocks[key] = block
block = []
- key = line.strip().split('!')[1]
+ key = line.strip().split("!")[1]
block = []
block.append(line)
blocks[key] = block
@@ -31,84 +31,86 @@
def split_block(block):
block = block[:]
for i, line in enumerate(block):
- if '** Sigma **' in line:
- sigma = block[i + 2:]
+ if "** Sigma **" in line:
+ sigma = block[i + 2 :]
block = block[:i]
for i, line in enumerate(block):
- if '** Variance **' in line:
- variance = block[i + 2:]
+ if "** Variance **" in line:
+ variance = block[i + 2 :]
block = block[:i]
for i, line in enumerate(block):
- if 'chi2_' in line or 'F_' in line:
+ if "chi2_" in line or "F_" in line:
stats = block[i:]
params = block[:i]
break
- return AttrDict(sigma=process_sigma(sigma),
- variance=process_variance(variance),
- stats=process_stats(stats),
- params=process_params(params))
+ return AttrDict(
+ sigma=process_sigma(sigma),
+ variance=process_variance(variance),
+ stats=process_stats(stats),
+ params=process_params(params),
+ )
def process_stats(stats):
- sio = StringIO(''.join(stats))
- values = pd.read_csv(sio, sep='\t', header=None, index_col=0, engine='c')
- values.columns = ['value']
- values.index.name = 'stat'
- values = values.astype('float64')
+ sio = StringIO("".join(stats))
+ values = pd.read_csv(sio, sep="\t", header=None, index_col=0, engine="c")
+ values.columns = ["value"]
+ values.index.name = "stat"
+ values = values.astype("float64")
return values
def process_sigma(sigma):
- sio = StringIO(''.join(sigma))
- values = pd.read_csv(sio, sep='\t', index_col=0)
+ sio = StringIO("".join(sigma))
+ values = pd.read_csv(sio, sep="\t", index_col=0)
return values
def process_variance(variance):
- key = ''
+ key = ""
new = [variance[0]]
for line in variance[1:]:
- if '\t\t' in line:
- key = line.split('\t')[0]
+ if "\t\t" in line:
+ key = line.split("\t")[0]
continue
- new.append(key + '_' + line)
- sio = StringIO(''.join(new))
- values = pd.read_csv(sio, sep='\t', index_col=0)
- values.index = [i.replace('__', '_') for i in values.index]
- values.columns = [c.replace(':', '_').replace('__', '_') for c in values.columns]
+ new.append(key + "_" + line)
+ sio = StringIO("".join(new))
+ values = pd.read_csv(sio, sep="\t", index_col=0)
+ values.index = [i.replace("__", "_") for i in values.index]
+ values.columns = [c.replace(":", "_").replace("__", "_") for c in values.columns]
return values
def process_params(params):
reformatted = []
values = []
- key = var_name = ''
+ key = var_name = ""
for line in params[3:]:
- if '\t\n' in line:
+ if "\t\n" in line:
if values:
- new_line = key + '_' + var_name + '\t' + '\t'.join(values)
+ new_line = key + "_" + var_name + "\t" + "\t".join(values)
reformatted.append(new_line)
values = []
- key = line.split('\t')[0]
+ key = line.split("\t")[0]
continue
- if line.split('\t')[0].strip():
+ if line.split("\t")[0].strip():
if values:
- new_line = key + '_' + var_name + '\t' + '\t'.join(values)
+ new_line = key + "_" + var_name + "\t" + "\t".join(values)
reformatted.append(new_line)
values = []
- var_name = line.split('\t')[0].strip()
- values.append(line.split('\t')[1].strip())
- new_line = key + '_' + var_name + '\t' + '\t'.join(values)
+ var_name = line.split("\t")[0].strip()
+ values.append(line.split("\t")[1].strip())
+ new_line = key + "_" + var_name + "\t" + "\t".join(values)
reformatted.append(new_line)
- sio = StringIO('\n'.join(reformatted))
- values = pd.read_csv(sio, sep='\t', index_col=0, header=None)
+ sio = StringIO("\n".join(reformatted))
+ values = pd.read_csv(sio, sep="\t", index_col=0, header=None)
new_index = []
for idx in list(values.index):
- new_index.append(idx.replace('__', '_'))
+ new_index.append(idx.replace("__", "_"))
values.index = new_index
- values.index.name = 'param'
- values.columns = ['param', 'tstat', 'pval']
+ values.index.name = "param"
+ values.columns = ["param", "tstat", "pval"]
return values
diff --git a/linearmodels/tests/system/test_3sls.py b/linearmodels/tests/system/test_3sls.py
index f5b54f4b14..6a9196fd33 100644
--- a/linearmodels/tests/system/test_3sls.py
+++ b/linearmodels/tests/system/test_3sls.py
@@ -20,29 +20,35 @@
common_exog = [True, False]
included_weights = [True, False]
output_dict = [True, False]
-params = list(product(nexog, nendog, ninstr, const, rho, common_exog,
- included_weights, output_dict))
+params = list(
+ product(
+ nexog, nendog, ninstr, const, rho, common_exog, included_weights, output_dict
+ )
+)
nexog = [[0, 1, 2]]
nendog = [[1, 0, 1]]
ninstr = [[2, 0, 1]]
# Explicitly test variables that have no columns
-add_params = list(product(nexog, nendog, ninstr, const, rho, common_exog,
- included_weights, output_dict))
+add_params = list(
+ product(
+ nexog, nendog, ninstr, const, rho, common_exog, included_weights, output_dict
+ )
+)
params += add_params
def gen_id(param):
- idstr = 'homo' if isinstance(param[0], list) else 'hetero'
- idstr += '-homo_endog' if isinstance(param[1], list) else '-hetero_endog'
- idstr += '-homo_instr' if isinstance(param[2], list) else '-hetero_instr'
- idstr += '-const' if param[3] else ''
- idstr += '-correl' if param[4] != 0 else ''
- idstr += '-common' if param[5] else ''
- idstr += '-weights' if param[6] else ''
- idstr += '-dict' if param[7] else '-tuple'
+ idstr = "homo" if isinstance(param[0], list) else "hetero"
+ idstr += "-homo_endog" if isinstance(param[1], list) else "-hetero_endog"
+ idstr += "-homo_instr" if isinstance(param[2], list) else "-hetero_instr"
+ idstr += "-const" if param[3] else ""
+ idstr += "-correl" if param[4] != 0 else ""
+ idstr += "-common" if param[5] else ""
+ idstr += "-weights" if param[6] else ""
+ idstr += "-dict" if param[7] else "-tuple"
return idstr
@@ -59,6 +65,7 @@ def data(request):
en = 2
instr = 3
elif list_like:
+
def safe_len(a):
a = np.array(a)
if a.ndim == 0:
@@ -67,14 +74,23 @@ def safe_len(a):
k = max(map(safe_len, [p, en, instr]))
- return generate_3sls_data(n=250, k=k, p=p, en=en, instr=instr, const=const, rho=rho,
- common_exog=common_exog, included_weights=included_weights,
- output_dict=output_dict)
+ return generate_3sls_data(
+ n=250,
+ k=k,
+ p=p,
+ en=en,
+ instr=instr,
+ const=const,
+ rho=rho,
+ common_exog=common_exog,
+ included_weights=included_weights,
+ output_dict=output_dict,
+ )
def test_direct_simple(data):
mod = IV3SLS(data)
- res = mod.fit(cov_type='unadjusted')
+ res = mod.fit(cov_type="unadjusted")
y = []
x = []
@@ -92,14 +108,18 @@ def test_direct_simple(data):
if len(val) == 5:
return # weighted
else:
- y.append(val['dependent'])
- nobs = val['dependent'].shape[0]
- vexog = val['exog'] if val['exog'] is not None else np.empty((nobs, 0))
- vendog = val['endog'] if val['endog'] is not None else np.empty((nobs, 0))
- vinstr = val['instruments'] if val['instruments'] is not None else np.empty((nobs, 0))
+ y.append(val["dependent"])
+ nobs = val["dependent"].shape[0]
+ vexog = val["exog"] if val["exog"] is not None else np.empty((nobs, 0))
+ vendog = val["endog"] if val["endog"] is not None else np.empty((nobs, 0))
+ vinstr = (
+ val["instruments"]
+ if val["instruments"] is not None
+ else np.empty((nobs, 0))
+ )
x.append(np.concatenate([vexog, vendog], 1))
z.append(np.concatenate([vexog, vinstr], 1))
- if 'weights' in val:
+ if "weights" in val:
return # weighted
out = simple_3sls(y, x, z)
assert_allclose(res.params.values, out.beta1.squeeze())
@@ -113,7 +133,7 @@ def test_single_equation(data):
data = {key: data[key]}
mod = IV3SLS(data)
- res = mod.fit(cov_type='unadjusted')
+ res = mod.fit(cov_type="unadjusted")
y = []
x = []
@@ -127,10 +147,10 @@ def test_single_equation(data):
if len(val) == 5:
return # weighted
else:
- y.append(val['dependent'])
- x.append(np.concatenate([val['exog'], val['endog']], 1))
- z.append(np.concatenate([val['exog'], val['instruments']], 1))
- if 'weights' in val:
+ y.append(val["dependent"])
+ x.append(np.concatenate([val["exog"], val["endog"]], 1))
+ z.append(np.concatenate([val["exog"], val["instruments"]], 1))
+ if "weights" in val:
return # weighted
out = simple_3sls(y, x, z)
assert_allclose(res.params.values, out.beta1.squeeze())
@@ -147,7 +167,7 @@ def test_too_few_instruments():
instr = np.random.standard_normal((n, 1))
eqns = {}
for i in range(2):
- eqns['eqn.{0}'.format(i)] = (dep[:, i], exog, endog, instr)
+ eqns["eqn.{0}".format(i)] = (dep[:, i], exog, endog, instr)
with pytest.raises(ValueError):
IV3SLS(eqns)
@@ -161,7 +181,7 @@ def test_redundant_instruments():
instr = np.concatenate([exog, instr], 1)
eqns = {}
for i in range(2):
- eqns['eqn.{0}'.format(i)] = (dep[:, i], exog, endog, instr)
+ eqns["eqn.{0}".format(i)] = (dep[:, i], exog, endog, instr)
with pytest.raises(ValueError):
IV3SLS(eqns)
@@ -174,7 +194,7 @@ def test_too_many_instruments():
instr = np.random.standard_normal((n, n + 1))
eqns = {}
for i in range(2):
- eqns['eqn.{0}'.format(i)] = (dep[:, i], exog, endog, instr)
+ eqns["eqn.{0}".format(i)] = (dep[:, i], exog, endog, instr)
with pytest.raises(ValueError):
IV3SLS(eqns)
@@ -203,14 +223,14 @@ def test_multivariate_iv():
n = 250
dep = np.random.standard_normal((n, 2))
exog = np.random.standard_normal((n, 3))
- exog = DataFrame(exog, columns=['exog.{0}'.format(i) for i in range(3)])
+ exog = DataFrame(exog, columns=["exog.{0}".format(i) for i in range(3)])
endog = np.random.standard_normal((n, 2))
- endog = DataFrame(endog, columns=['endog.{0}'.format(i) for i in range(2)])
+ endog = DataFrame(endog, columns=["endog.{0}".format(i) for i in range(2)])
instr = np.random.standard_normal((n, 3))
- instr = DataFrame(instr, columns=['instr.{0}'.format(i) for i in range(3)])
+ instr = DataFrame(instr, columns=["instr.{0}".format(i) for i in range(3)])
eqns = {}
for i in range(2):
- eqns['dependent.{0}'.format(i)] = (dep[:, i], exog, endog, instr)
+ eqns["dependent.{0}".format(i)] = (dep[:, i], exog, endog, instr)
mod = IV3SLS(eqns)
res = mod.fit()
@@ -224,7 +244,7 @@ def test_multivariate_iv_bad_data():
n = 250
dep = np.random.standard_normal((n, 2))
instr = np.random.standard_normal((n, 3))
- instr = DataFrame(instr, columns=['instr.{0}'.format(i) for i in range(3)])
+ instr = DataFrame(instr, columns=["instr.{0}".format(i) for i in range(3)])
with pytest.raises(ValueError):
IV3SLS.multivariate_ls(dep, None, None, instr)
@@ -237,15 +257,18 @@ def test_fitted(data):
for i, key in enumerate(res.equations):
eq = res.equations[key]
fv = res.fitted_values[key].copy()
- fv.name = 'fitted_values'
+ fv.name = "fitted_values"
assert_series_equal(eq.fitted_values, fv)
b = eq.params.values
direct = mod._x[i] @ b
expected.append(direct[:, None])
assert_allclose(eq.fitted_values, direct, atol=1e-8)
expected = np.concatenate(expected, 1)
- expected = DataFrame(expected, index=mod._dependent[i].pandas.index,
- columns=[key for key in res.equations])
+ expected = DataFrame(
+ expected,
+ index=mod._dependent[i].pandas.index,
+ columns=[key for key in res.equations],
+ )
assert_frame_equal(expected, res.fitted_values)
@@ -254,11 +277,11 @@ def test_no_exog():
mod = IV3SLS(data)
res = mod.fit()
- data = generate_3sls_data_v2(nexog=0, const=False, omitted='drop')
+ data = generate_3sls_data_v2(nexog=0, const=False, omitted="drop")
mod = IV3SLS(data)
res2 = mod.fit()
- data = generate_3sls_data_v2(nexog=0, const=False, omitted='empty')
+ data = generate_3sls_data_v2(nexog=0, const=False, omitted="empty")
mod = IV3SLS(data)
res3 = mod.fit()
@@ -266,7 +289,9 @@ def test_no_exog():
mod = IV3SLS(data)
res4 = mod.fit()
- data = generate_3sls_data_v2(nexog=0, const=False, output_dict=False, omitted='empty')
+ data = generate_3sls_data_v2(
+ nexog=0, const=False, output_dict=False, omitted="empty"
+ )
mod = IV3SLS(data)
res5 = mod.fit()
assert_series_equal(res.params, res2.params)
@@ -280,11 +305,11 @@ def test_no_endog():
mod = IV3SLS(data)
res = mod.fit()
- data = generate_3sls_data_v2(nendog=0, ninstr=0, omitted='drop')
+ data = generate_3sls_data_v2(nendog=0, ninstr=0, omitted="drop")
mod = IV3SLS(data)
res2 = mod.fit()
- data = generate_3sls_data_v2(nendog=0, ninstr=0, omitted='empty')
+ data = generate_3sls_data_v2(nendog=0, ninstr=0, omitted="empty")
mod = IV3SLS(data)
res3 = mod.fit()
@@ -292,7 +317,7 @@ def test_no_endog():
mod = IV3SLS(data)
res4 = mod.fit()
- data = generate_3sls_data_v2(nendog=0, ninstr=0, output_dict=False, omitted='empty')
+ data = generate_3sls_data_v2(nendog=0, ninstr=0, output_dict=False, omitted="empty")
mod = IV3SLS(data)
res5 = mod.fit()
assert_series_equal(res.params, res2.params)
@@ -304,6 +329,6 @@ def test_no_endog():
def test_uneven_shapes():
data = generate_3sls_data_v2()
eq = data[list(data.keys())[0]]
- eq['weights'] = np.ones(eq.dependent.shape[0] // 2)
+ eq["weights"] = np.ones(eq.dependent.shape[0] // 2)
with pytest.raises(ValueError):
IV3SLS(data)
diff --git a/linearmodels/tests/system/test_3sls_against_stata.py b/linearmodels/tests/system/test_3sls_against_stata.py
index 744ef7153b..881f562743 100644
--- a/linearmodels/tests/system/test_3sls_against_stata.py
+++ b/linearmodels/tests/system/test_3sls_against_stata.py
@@ -8,32 +8,37 @@
from linearmodels.tests.system.results.parse_stata_3sls_results import results
-@pytest.fixture(scope='module', params=list(results.keys()))
+@pytest.fixture(scope="module", params=list(results.keys()))
def fit(request):
method = request.param
data = generate_simultaneous_data()
- if 'ols' in method or 'sur' in method:
+ if "ols" in method or "sur" in method:
mod = SUR
for key in data:
temp = data[key]
- temp['exog'] = concat([temp['exog'], temp['endog']], 1)
- del temp['endog']
- del temp['instruments']
+ temp["exog"] = concat([temp["exog"], temp["endog"]], 1)
+ del temp["endog"]
+ del temp["instruments"]
else:
mod = IV3SLS
- if 'ols' in method or '2sls' in method:
- fit_method = 'ols'
+ if "ols" in method or "2sls" in method:
+ fit_method = "ols"
else:
- fit_method = 'gls'
+ fit_method = "gls"
mod = mod(data)
- iterate = 'ireg3' in method
+ iterate = "ireg3" in method
stata = results[method]
- debiased = method in ('ols', '2sls')
+ debiased = method in ("ols", "2sls")
kwargs = {}
- decimal = 2 if 'ireg3' in method else 5
+ decimal = 3 if "ireg3" in method else 5
rtol = 10 ** -decimal
- res = mod.fit(cov_type='unadjusted', method=fit_method,
- debiased=debiased, iterate=iterate, **kwargs)
+ res = mod.fit(
+ cov_type="unadjusted",
+ method=fit_method,
+ debiased=debiased,
+ iterate=iterate,
+ **kwargs
+ )
return stata, res, rtol
@@ -42,10 +47,10 @@ def test_params(fit):
for idx in result.params.index:
val = result.params[idx]
- dep = '_'.join(idx.split('_')[:2])
- variable = '_'.join(idx.split('_')[2:])
- variable = '_cons' if variable == 'const' else variable
- stata_val = stata.params[dep].loc[variable, 'param']
+ dep = "_".join(idx.split("_")[:2])
+ variable = "_".join(idx.split("_")[2:])
+ variable = "_cons" if variable == "const" else variable
+ stata_val = stata.params[dep].loc[variable, "param"]
assert_allclose(stata_val, val, rtol=rtol)
@@ -55,10 +60,10 @@ def test_tstats(fit):
for idx in result.tstats.index:
val = result.tstats[idx]
- dep = '_'.join(idx.split('_')[:2])
- variable = '_'.join(idx.split('_')[2:])
- variable = '_cons' if variable == 'const' else variable
- stata_val = stata.params[dep].loc[variable, 'tstat']
+ dep = "_".join(idx.split("_")[:2])
+ variable = "_".join(idx.split("_")[2:])
+ variable = "_cons" if variable == "const" else variable
+ stata_val = stata.params[dep].loc[variable, "tstat"]
assert_allclose(stata_val, val, rtol=rtol)
@@ -67,10 +72,10 @@ def test_pval(fit):
for idx in result.pvalues.index:
val = result.pvalues[idx]
- dep = '_'.join(idx.split('_')[:2])
- variable = '_'.join(idx.split('_')[2:])
- variable = '_cons' if variable == 'const' else variable
- stata_val = stata.params[dep].loc[variable, 'pval']
+ dep = "_".join(idx.split("_")[:2])
+ variable = "_".join(idx.split("_")[2:])
+ variable = "_cons" if variable == "const" else variable
+ stata_val = stata.params[dep].loc[variable, "pval"]
assert_allclose(1 + stata_val, 1 + val, rtol=rtol)
diff --git a/linearmodels/tests/system/test_covariance.py b/linearmodels/tests/system/test_covariance.py
index b7a3d5d6cf..c3706005c4 100644
--- a/linearmodels/tests/system/test_covariance.py
+++ b/linearmodels/tests/system/test_covariance.py
@@ -17,7 +17,7 @@
from linearmodels.tests.system._utility import generate_3sls_data_v2
covs = [HeteroskedasticCovariance, HomoskedasticCovariance]
-names = ['Heteroskedastic', 'Homoskedastic']
+names = ["Heteroskedastic", "Homoskedastic"]
@pytest.fixture(params=list(zip(covs, names)))
@@ -50,11 +50,11 @@ def gmm_cov(request):
return est(x, z, eps, w, sigma=sigma), name
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
def cov_data():
data = generate_3sls_data_v2(k=2)
mod = IV3SLS(data)
- res = mod.fit(cov_type='unadjusted')
+ res = mod.fit(cov_type="unadjusted")
x = mod._x
z = mod._z
eps = res.resids.values
@@ -78,7 +78,7 @@ def _xpxi(x):
for j in range(k):
if i == j:
kx = x[i].shape[1]
- xpx[loc:loc + kx, loc:loc + kx] = x[i].T @ x[i] / nobs
+ xpx[loc : loc + kx, loc : loc + kx] = x[i].T @ x[i] / nobs
loc += kx
return np.linalg.inv(xpx)
@@ -112,7 +112,7 @@ def test_str_repr(cov):
assert name in str(est)
assert name in est.__repr__()
assert str(hex(id(est))) in est.__repr__()
- assert 'Debiased: True' in str(est)
+ assert "Debiased: True" in str(est)
def test_gmm_str_repr(gmm_cov):
@@ -120,7 +120,7 @@ def test_gmm_str_repr(gmm_cov):
assert name in str(est)
assert name in est.__repr__()
assert str(hex(id(est))) in est.__repr__()
- assert 'GMM' in str(est)
+ assert "GMM" in str(est)
def test_homoskedastic_direct(cov_data, debias):
@@ -154,7 +154,7 @@ def test_heteroskedastic_direct(cov_data, debias):
x, z, eps, sigma = cov_data
cov = HeteroskedasticCovariance(x, eps, sigma, sigma, debiased=debias)
k = len(x)
- xe = [x[i] * eps[:, i:i + 1] for i in range(k)]
+ xe = [x[i] * eps[:, i : i + 1] for i in range(k)]
xe = np.concatenate(xe, 1)
nobs = xe.shape[0]
xeex = np.zeros((xe.shape[1], xe.shape[1]))
@@ -180,10 +180,18 @@ def test_kernel_direct(cov_data, debias):
x, z, eps, sigma = cov_data
k = len(x)
bandwidth = 12
- cov = KernelCovariance(x, eps, sigma, sigma, gls=False, debiased=debias,
- kernel='parzen', bandwidth=bandwidth)
+ cov = KernelCovariance(
+ x,
+ eps,
+ sigma,
+ sigma,
+ gls=False,
+ debiased=debias,
+ kernel="parzen",
+ bandwidth=bandwidth,
+ )
assert cov.bandwidth == 12
- xe = [x[i] * eps[:, i:i + 1] for i in range(k)]
+ xe = [x[i] * eps[:, i : i + 1] for i in range(k)]
xe = np.concatenate(xe, 1)
w = kernel_weight_parzen(12)
nobs = xe.shape[0]
@@ -257,7 +265,7 @@ def test_gmm_heterosedastic_direct(cov_data, debias):
xpz = _xpz(x, z)
wi = np.linalg.inv(w)
xpz_wi = xpz @ wi
- ze = [z[i] * eps[:, i:i + 1] for i in range(k)]
+ ze = [z[i] * eps[:, i : i + 1] for i in range(k)]
ze = np.concatenate(ze, 1)
zeez = ze.T @ ze / nobs
assert_allclose(zeez, cov_est._omega())
@@ -279,16 +287,24 @@ def test_gmm_kernel_direct(cov_data):
bandwidth = 12
k = len(x)
nobs = x[0].shape[0]
- wm = KernelWeightMatrix(kernel='bartlett', bandwidth=bandwidth)
+ wm = KernelWeightMatrix(kernel="bartlett", bandwidth=bandwidth)
w = wm.weight_matrix(x, z, eps, sigma=sigma)
- cov_est = GMMKernelCovariance(x, z, eps, w, sigma=sigma, debiased=debias, kernel='bartlett',
- bandwidth=bandwidth)
+ cov_est = GMMKernelCovariance(
+ x,
+ z,
+ eps,
+ w,
+ sigma=sigma,
+ debiased=debias,
+ kernel="bartlett",
+ bandwidth=bandwidth,
+ )
xpz_wi_zpxi = _xpz_wi_zpxi(x, z, w)
xpz = _xpz(x, z)
wi = np.linalg.inv(w)
xpz_wi = xpz @ wi
- ze = [z[i] * eps[:, i:i + 1] for i in range(k)]
+ ze = [z[i] * eps[:, i : i + 1] for i in range(k)]
ze = np.concatenate(ze, 1)
zeez = ze.T @ ze / nobs
w = kernel_weight_bartlett(bandwidth)
diff --git a/linearmodels/tests/system/test_equivalence.py b/linearmodels/tests/system/test_equivalence.py
index db1c5bcbfe..dd442678fb 100644
--- a/linearmodels/tests/system/test_equivalence.py
+++ b/linearmodels/tests/system/test_equivalence.py
@@ -7,27 +7,27 @@
def test_gmm_3sls_equiv():
eqns = generate_3sls_data_v2(k=3)
gmm = IVSystemGMM(eqns).fit(iter_limit=1)
- tsls = IV3SLS(eqns).fit(method='ols')
+ tsls = IV3SLS(eqns).fit(method="ols")
assert_allclose(gmm.params, tsls.params)
def test_3sls_2sls_equiv():
eqns = generate_3sls_data_v2(k=1)
tsls_mod = IV3SLS(eqns)
- tsls = tsls_mod.fit(method='ols', cov_type='unadjusted', debiased=False)
+ tsls = tsls_mod.fit(method="ols", cov_type="unadjusted", debiased=False)
eqn = eqns[list(eqns.keys())[0]]
ivmod = IV2SLS(eqn.dependent, eqn.exog, eqn.endog, eqn.instruments)
- iv = ivmod.fit(cov_type='unadjusted', debiased=False)
+ iv = ivmod.fit(cov_type="unadjusted", debiased=False)
assert_allclose(iv.params, tsls.params)
assert_allclose(iv.tstats, tsls.tstats)
assert_allclose(iv.rsquared, tsls.rsquared)
- tsls = tsls_mod.fit(method='ols', cov_type='unadjusted', debiased=True)
- iv = ivmod.fit(cov_type='unadjusted', debiased=True)
+ tsls = tsls_mod.fit(method="ols", cov_type="unadjusted", debiased=True)
+ iv = ivmod.fit(cov_type="unadjusted", debiased=True)
assert_allclose(iv.tstats, tsls.tstats)
- tsls = tsls_mod.fit(method='ols', cov_type='robust', debiased=False)
- iv = ivmod.fit(cov_type='robust', debiased=False)
+ tsls = tsls_mod.fit(method="ols", cov_type="robust", debiased=False)
+ iv = ivmod.fit(cov_type="robust", debiased=False)
assert_allclose(iv.tstats, tsls.tstats)
diff --git a/linearmodels/tests/system/test_formulas.py b/linearmodels/tests/system/test_formulas.py
index 1cc89dce46..b2b4d7b9a3 100644
--- a/linearmodels/tests/system/test_formulas.py
+++ b/linearmodels/tests/system/test_formulas.py
@@ -16,23 +16,28 @@
joined = []
for i, key in enumerate(data):
eq = data[key]
- joined.append(Series(eq.dependent[:, 0], name='y{0}'.format(i + 1)))
+ joined.append(Series(eq.dependent[:, 0], name="y{0}".format(i + 1)))
for j, col in enumerate(eq.exog.T):
- joined.append(Series(col, name='x{0}{1}'.format(i + 1, j + 1)))
+ joined.append(Series(col, name="x{0}{1}".format(i + 1, j + 1)))
k = len(eq.exog.T)
for j, col in enumerate(eq.endog.T):
- joined.append(Series(col, name='x{0}{1}'.format(i + 1, j + k + 1)))
+ joined.append(Series(col, name="x{0}{1}".format(i + 1, j + k + 1)))
for j, col in enumerate(eq.instruments.T):
- joined.append(Series(col, name='z{0}{1}'.format(i + 1, j + 1)))
+ joined.append(Series(col, name="z{0}{1}".format(i + 1, j + 1)))
joined = concat(joined, 1)
fmlas = [
- {'eq1': 'y1 ~ x11 + x12', 'eq2': 'y2 ~ x21 + x22'},
- {'eq1': 'y1 ~ 1 + x11 + x12', 'eq2': 'y2 ~ 1 + x21 + x22'},
- {'eq1': 'y1 ~ 1 + x11 + np.exp(x12)', 'eq2': 'y2 ~ 1 + x21 + sigmoid(x22)'},
- {'eq1': 'y1 ~ 1 + x11 + [x14 + x15 ~ z11 + z12 + z13]', 'eq2': 'y2 ~ 1 + x21 + x22'},
- {'eq1': 'y1 ~ [x14 + x15 ~ 1 + x11 + x12 + x13 + z11 + z12 + z13]',
- 'eq2': 'y2 ~ x21 + [x24 ~ 1 + z21 + z22 + z23]'}
+ {"eq1": "y1 ~ x11 + x12", "eq2": "y2 ~ x21 + x22"},
+ {"eq1": "y1 ~ 1 + x11 + x12", "eq2": "y2 ~ 1 + x21 + x22"},
+ {"eq1": "y1 ~ 1 + x11 + np.exp(x12)", "eq2": "y2 ~ 1 + x21 + sigmoid(x22)"},
+ {
+ "eq1": "y1 ~ 1 + x11 + [x14 + x15 ~ z11 + z12 + z13]",
+ "eq2": "y2 ~ 1 + x21 + x22",
+ },
+ {
+ "eq1": "y1 ~ [x14 + x15 ~ 1 + x11 + x12 + x13 + z11 + z12 + z13]",
+ "eq2": "y2 ~ x21 + [x24 ~ 1 + z21 + z22 + z23]",
+ },
]
models = ((SUR, sur), (IVSystemGMM, iv_system_gmm), (IV3SLS, iv_3sls))
@@ -41,8 +46,8 @@
ids = []
for f, m in params:
- key = '--'.join([value for value in f.values()])
- key += ' : ' + str(m[0].__name__)
+ key = "--".join([value for value in f.values()])
+ key += " : " + str(m[0].__name__)
ids.append(key)
@@ -50,7 +55,7 @@ def sigmoid(v):
return np.exp(v) / (1 + np.exp(v))
-@pytest.fixture(scope='module', params=params, ids=ids)
+@pytest.fixture(scope="module", params=params, ids=ids)
def config(request):
fmla, model_interace = request.param
model, interface = model_interace
@@ -60,7 +65,7 @@ def config(request):
def test_fromula(config):
fmla, model, interface = config
for key in fmla:
- if '[' in fmla[key] and model not in (IVSystemGMM, IV3SLS):
+ if "[" in fmla[key] and model not in (IVSystemGMM, IV3SLS):
return
mod = model.from_formula(fmla, joined)
mod_fmla = interface(fmla, joined)
@@ -72,7 +77,7 @@ def test_fromula(config):
def test_predict(config):
fmla, model, interface = config
for key in fmla:
- if '[' in fmla[key] and model not in (IVSystemGMM, IV3SLS):
+ if "[" in fmla[key] and model not in (IVSystemGMM, IV3SLS):
return
mod = model.from_formula(fmla, joined)
res = mod.fit()
@@ -86,7 +91,7 @@ def test_predict(config):
def test_predict_partial(config):
fmla, model, interface = config
for key in fmla:
- if '[' in fmla[key] and model not in (IVSystemGMM, IV3SLS):
+ if "[" in fmla[key] and model not in (IVSystemGMM, IV3SLS):
return
mod = model.from_formula(fmla, joined)
res = mod.fit()
@@ -106,7 +111,7 @@ def test_predict_partial(config):
for key in list(mod._equations.keys())[1:]:
eqns[key] = mod._equations[key]
final = list(mod._equations.keys())[0]
- eqns[final] = {'exog': None, 'endog': None}
+ eqns[final] = {"exog": None, "endog": None}
pred3 = res.predict(equations=eqns, dataframe=True)
assert_frame_equal(pred2[pred3.columns], pred3)
@@ -120,7 +125,7 @@ def test_predict_partial(config):
def test_invalid_predict(config):
fmla, model, interface = config
for key in fmla:
- if '[' in fmla[key] and model not in (IVSystemGMM, IV3SLS):
+ if "[" in fmla[key] and model not in (IVSystemGMM, IV3SLS):
return
mod = model.from_formula(fmla, joined)
res = mod.fit()
@@ -144,18 +149,18 @@ def test_parser(config):
for key in orig_data:
eq = orig_data[key]
if exog[key] is None:
- assert eq['exog'] is None
+ assert eq["exog"] is None
else:
- assert_frame_equal(exog[key], eq['exog'])
- assert_frame_equal(dep[key], eq['dependent'])
+ assert_frame_equal(exog[key], eq["exog"])
+ assert_frame_equal(dep[key], eq["dependent"])
if endog[key] is None:
- assert eq['endog'] is None
+ assert eq["endog"] is None
else:
- assert_frame_equal(endog[key], eq['endog'])
+ assert_frame_equal(endog[key], eq["endog"])
if instr[key] is None:
- assert eq['instruments'] is None
+ assert eq["instruments"] is None
else:
- assert_frame_equal(instr[key], eq['instruments'])
+ assert_frame_equal(instr[key], eq["instruments"])
labels = parser.equation_labels
for label in labels:
diff --git a/linearmodels/tests/system/test_gmm.py b/linearmodels/tests/system/test_gmm.py
index 44eb6d2ea0..1907d2b729 100644
--- a/linearmodels/tests/system/test_gmm.py
+++ b/linearmodels/tests/system/test_gmm.py
@@ -22,37 +22,38 @@
def gen_id(r):
- id = 'steps:{0}'.format(r[0])
+ id = "steps:{0}".format(r[0])
if r[1]:
- id += ',robust'
+ id += ",robust"
else:
- id += ',unadjusted'
+ id += ",unadjusted"
return id
ids = list(map(gen_id, params))
-@pytest.fixture(scope='module', params=params, ids=ids)
+@pytest.fixture(scope="module", params=params, ids=ids)
def data(request):
steps, robust = request.param
- weight_type = 'robust' if robust else 'unadjusted'
+ weight_type = "robust" if robust else "unadjusted"
eqns = generate_3sls_data_v2(k=3)
y = [eqns[key].dependent for key in eqns]
x = [np.concatenate([eqns[key].exog, eqns[key].endog], 1) for key in eqns]
z = [np.concatenate([eqns[key].exog, eqns[key].instruments], 1) for key in eqns]
- return AttrDict(eqns=eqns, x=x, y=y, z=z, steps=steps,
- robust=robust, weight_type=weight_type)
+ return AttrDict(
+ eqns=eqns, x=x, y=y, z=z, steps=steps, robust=robust, weight_type=weight_type
+ )
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
def weight_data():
eqns = generate_3sls_data_v2(k=2)
mod = IV3SLS(eqns)
x = mod._x
z = mod._z
- res = mod.fit(cov_type='unadjusted')
+ res = mod.fit(cov_type="unadjusted")
eps = res.resids.values
sigma = res.sigma
return x, z, eps, sigma
@@ -95,7 +96,7 @@ def test_cov(data):
def test_formula_equivalence(data):
- mod = IVSystemGMM(data.eqns, weight_type='unadjusted')
+ mod = IVSystemGMM(data.eqns, weight_type="unadjusted")
formula = []
df = []
for i, key in enumerate(data.eqns):
@@ -104,31 +105,37 @@ def test_formula_equivalence(data):
ex = eqn.exog
en = eqn.endog
instr = eqn.instruments
- dep = DataFrame(dep, columns=['dep_{0}'.format(i)])
+ dep = DataFrame(dep, columns=["dep_{0}".format(i)])
has_const = False
if np.any(np.all(ex == 1, 0)):
ex = ex[:, 1:]
has_const = True
- ex = DataFrame(ex, columns=['ex_{0}_{1}'.format(i, j) for j in range(ex.shape[1])])
- en = DataFrame(en, columns=['en_{0}_{1}'.format(i, j) for j in range(en.shape[1])])
- instr = DataFrame(instr, columns=['instr_{0}_{1}'.format(i, j)
- for j in range(ex.shape[1])])
- fmla = ''.join(dep.columns) + ' ~ '
+ ex = DataFrame(
+ ex, columns=["ex_{0}_{1}".format(i, j) for j in range(ex.shape[1])]
+ )
+ en = DataFrame(
+ en, columns=["en_{0}_{1}".format(i, j) for j in range(en.shape[1])]
+ )
+ instr = DataFrame(
+ instr, columns=["instr_{0}_{1}".format(i, j) for j in range(ex.shape[1])]
+ )
+ fmla = "".join(dep.columns) + " ~ "
if has_const:
- fmla += ' 1 + '
- fmla += ' + '.join(ex.columns) + ' + ['
- fmla += ' + '.join(en.columns) + ' ~ '
- fmla += ' + '.join(instr.columns) + ' ] '
+ fmla += " 1 + "
+ fmla += " + ".join(ex.columns) + " + ["
+ fmla += " + ".join(en.columns) + " ~ "
+ fmla += " + ".join(instr.columns) + " ] "
formula.append(fmla)
df.extend([dep, ex, en, instr])
from collections import OrderedDict
+
formulas = OrderedDict()
for i, f in enumerate(formula):
- formulas['eq{0}'.format(i)] = f
+ formulas["eq{0}".format(i)] = f
df = concat(df, 1)
- formula_mod = IVSystemGMM.from_formula(formulas, df, weight_type='unadjusted')
- res = mod.fit(cov_type='unadjusted')
- formula_res = formula_mod.fit(cov_type='unadjusted')
+ formula_mod = IVSystemGMM.from_formula(formulas, df, weight_type="unadjusted")
+ res = mod.fit(cov_type="unadjusted")
+ formula_res = formula_mod.fit(cov_type="unadjusted")
assert_allclose(res.params, formula_res.params)
@@ -137,13 +144,13 @@ def test_formula_equivalence_weights(data):
eqn_copy = AttrDict()
for key in data.eqns:
eqn = {k: v for k, v in data.eqns[key].items()}
- nobs = eqn['dependent'].shape[0]
+ nobs = eqn["dependent"].shape[0]
w = np.random.chisquare(2, (nobs, 1)) / 2
weights[key] = w
- eqn['weights'] = w
+ eqn["weights"] = w
eqn_copy[key] = eqn
- mod = IVSystemGMM(eqn_copy, weight_type='unadjusted')
+ mod = IVSystemGMM(eqn_copy, weight_type="unadjusted")
df = []
formulas = OrderedDict()
for i, key in enumerate(data.eqns):
@@ -152,43 +159,52 @@ def test_formula_equivalence_weights(data):
ex = eqn.exog
en = eqn.endog
instr = eqn.instruments
- dep = DataFrame(dep, columns=['dep_{0}'.format(i)])
+ dep = DataFrame(dep, columns=["dep_{0}".format(i)])
has_const = False
if np.any(np.all(ex == 1, 0)):
ex = ex[:, 1:]
has_const = True
- ex = DataFrame(ex, columns=['ex_{0}_{1}'.format(i, j) for j in range(ex.shape[1])])
- en = DataFrame(en, columns=['en_{0}_{1}'.format(i, j) for j in range(en.shape[1])])
- instr = DataFrame(instr, columns=['instr_{0}_{1}'.format(i, j)
- for j in range(ex.shape[1])])
- fmla = ''.join(dep.columns) + ' ~ '
+ ex = DataFrame(
+ ex, columns=["ex_{0}_{1}".format(i, j) for j in range(ex.shape[1])]
+ )
+ en = DataFrame(
+ en, columns=["en_{0}_{1}".format(i, j) for j in range(en.shape[1])]
+ )
+ instr = DataFrame(
+ instr, columns=["instr_{0}_{1}".format(i, j) for j in range(ex.shape[1])]
+ )
+ fmla = "".join(dep.columns) + " ~ "
if has_const:
- fmla += ' 1 + '
- fmla += ' + '.join(ex.columns) + ' + ['
- fmla += ' + '.join(en.columns) + ' ~ '
- fmla += ' + '.join(instr.columns) + ' ] '
+ fmla += " 1 + "
+ fmla += " + ".join(ex.columns) + " + ["
+ fmla += " + ".join(en.columns) + " ~ "
+ fmla += " + ".join(instr.columns) + " ] "
formulas[key] = fmla
df.extend([dep, ex, en, instr])
df = concat(df, 1)
- formula_mod = IVSystemGMM.from_formula(formulas, df, weights=weights, weight_type='unadjusted')
- res = mod.fit(cov_type='unadjusted')
- formula_res = formula_mod.fit(cov_type='unadjusted')
+ formula_mod = IVSystemGMM.from_formula(
+ formulas, df, weights=weights, weight_type="unadjusted"
+ )
+ res = mod.fit(cov_type="unadjusted")
+ formula_res = formula_mod.fit(cov_type="unadjusted")
assert_allclose(res.params, formula_res.params)
def test_weight_options(data):
- mod = IVSystemGMM(data.eqns, weight_type='unadjusted', debiased=True, center=True)
- res = mod.fit(cov_type='unadjusted')
- assert res.weight_config == {'debiased': True, 'center': True}
- assert res.weight_type == 'unadjusted'
- assert 'Debiased: True' in str(res.summary)
+ mod = IVSystemGMM(data.eqns, weight_type="unadjusted", debiased=True, center=True)
+ res = mod.fit(cov_type="unadjusted")
+ assert res.weight_config == {"debiased": True, "center": True}
+ assert res.weight_type == "unadjusted"
+ assert "Debiased: True" in str(res.summary)
assert str(hex(id(res._weight_estimtor))) in res._weight_estimtor.__repr__()
- assert res._weight_estimtor.config == {'debiased': True, 'center': True}
- base_res = IVSystemGMM(data.eqns, weight_type='unadjusted').fit(cov_type='unadjusted')
+ assert res._weight_estimtor.config == {"debiased": True, "center": True}
+ base_res = IVSystemGMM(data.eqns, weight_type="unadjusted").fit(
+ cov_type="unadjusted"
+ )
assert np.all(np.diag(res.w) >= np.diag(base_res.w))
- mod = IVSystemGMM(data.eqns, weight_type='robust', debiased=True)
- res = mod.fit(cov_type='robust')
+ mod = IVSystemGMM(data.eqns, weight_type="robust", debiased=True)
+ res = mod.fit(cov_type="robust")
def test_no_constant_smoke():
@@ -199,21 +215,23 @@ def test_no_constant_smoke():
def test_unknown_weight_type(data):
with pytest.raises(ValueError):
- IVSystemGMM(data.eqns, weight_type='unknown')
+ IVSystemGMM(data.eqns, weight_type="unknown")
def test_unknown_cov_type(data):
mod = IVSystemGMM(data.eqns)
with pytest.raises(ValueError):
- mod.fit(cov_type='unknown')
+ mod.fit(cov_type="unknown")
with pytest.raises(ValueError):
mod.fit(cov_type=3)
def test_initial_weight_matrix(data):
mod = IVSystemGMM(data.eqns)
- z = [np.concatenate([data.eqns[key].exog, data.eqns[key].instruments], 1)
- for key in data.eqns]
+ z = [
+ np.concatenate([data.eqns[key].exog, data.eqns[key].instruments], 1)
+ for key in data.eqns
+ ]
z = np.concatenate(z, 1)
ze = z + np.random.standard_normal(size=z.shape)
w0 = ze.T @ ze / ze.shape[0]
@@ -225,30 +243,30 @@ def test_initial_weight_matrix(data):
def test_summary(data):
mod = IVSystemGMM(data.eqns)
res = mod.fit()
- assert 'Instruments' in res.summary.as_text()
- assert 'Weight Estimator' in res.summary.as_text()
+ assert "Instruments" in res.summary.as_text()
+ assert "Weight Estimator" in res.summary.as_text()
for eq in res.equations:
- assert 'Weight Estimator' in res.equations[eq].summary.as_text()
- assert 'Instruments' in res.equations[eq].summary.as_text()
+ assert "Weight Estimator" in res.equations[eq].summary.as_text()
+ assert "Instruments" in res.equations[eq].summary.as_text()
res = mod.fit(iter_limit=10)
if res.iterations > 2:
- assert 'Iterative System GMM' in res.summary.as_text()
+ assert "Iterative System GMM" in res.summary.as_text()
def test_summary_homoskedastic(data):
- mod = IVSystemGMM(data.eqns, weight_type='unadjusted', debiased=True)
- res = mod.fit(cov_type='homoskedastic', debiased=True)
- assert 'Homoskedastic (Unadjusted) Weighting' in res.summary.as_text()
+ mod = IVSystemGMM(data.eqns, weight_type="unadjusted", debiased=True)
+ res = mod.fit(cov_type="homoskedastic", debiased=True)
+ assert "Homoskedastic (Unadjusted) Weighting" in res.summary.as_text()
def test_fixed_sigma(data):
- mod = IVSystemGMM(data.eqns, weight_type='unadjusted')
- res = mod.fit(cov_type='unadjusted')
+ mod = IVSystemGMM(data.eqns, weight_type="unadjusted")
+ res = mod.fit(cov_type="unadjusted")
k = len(data.eqns)
b = np.random.standard_normal((k, 1))
sigma = b @ b.T + np.diag(np.ones(k))
- mod_sigma = IVSystemGMM(data.eqns, weight_type='unadjusted', sigma=sigma)
+ mod_sigma = IVSystemGMM(data.eqns, weight_type="unadjusted", sigma=sigma)
res_sigma = mod_sigma.fit()
assert np.any(res.params != res_sigma.params)
assert np.any(res.sigma != res_sigma.sigma)
@@ -259,7 +277,7 @@ def test_incorrect_sigma_shape(data):
b = np.random.standard_normal((k + 2, 1))
sigma = b @ b.T + np.diag(np.ones(k + 2))
with pytest.raises(ValueError):
- IVSystemGMM(data.eqns, weight_type='unadjusted', sigma=sigma)
+ IVSystemGMM(data.eqns, weight_type="unadjusted", sigma=sigma)
def test_invalid_sigma_usage(data):
@@ -267,7 +285,7 @@ def test_invalid_sigma_usage(data):
b = np.random.standard_normal((k, 1))
sigma = b @ b.T + np.diag(np.ones(k))
with pytest.warns(UserWarning):
- IVSystemGMM(data.eqns, weight_type='robust', sigma=sigma)
+ IVSystemGMM(data.eqns, weight_type="robust", sigma=sigma)
def test_j_statistic_direct(data):
@@ -290,22 +308,22 @@ def test_linear_constraint(data):
def test_kernel_equiv(data):
- mod = IVSystemGMM(data.eqns, weight_type='kernel', bandwidth=0)
- res = mod.fit(cov_type='kernel', debiased=True, bandwidth=0)
- assert 'Kernel (HAC) Weighting' in res.summary.as_text()
- rob_mod = IVSystemGMM(data.eqns, weight_type='robust')
- rob_res = rob_mod.fit(cov_type='robust', debiased=True)
+ mod = IVSystemGMM(data.eqns, weight_type="kernel", bandwidth=0)
+ res = mod.fit(cov_type="kernel", debiased=True, bandwidth=0)
+ assert "Kernel (HAC) Weighting" in res.summary.as_text()
+ rob_mod = IVSystemGMM(data.eqns, weight_type="robust")
+ rob_res = rob_mod.fit(cov_type="robust", debiased=True)
assert_allclose(res.tstats, rob_res.tstats)
def test_kernel_optimal_bandwidth(data):
- mod = IVSystemGMM(data.eqns, weight_type='kernel')
- res = mod.fit(cov_type='kernel', debiased=True)
+ mod = IVSystemGMM(data.eqns, weight_type="kernel")
+ res = mod.fit(cov_type="kernel", debiased=True)
nobs = data.eqns[list(data.eqns.keys())[0]].dependent.shape[0]
- assert res.weight_config['bandwidth'] == (nobs - 2)
+ assert res.weight_config["bandwidth"] == (nobs - 2)
- mod = IVSystemGMM(data.eqns, weight_type='kernel', optimal_bw=True)
- mod.fit(cov_type='kernel', debiased=True)
+ mod = IVSystemGMM(data.eqns, weight_type="kernel", optimal_bw=True)
+ mod.fit(cov_type="kernel", debiased=True)
def test_homoskedastic_weight_direct(weight_data, center, debias):
@@ -333,7 +351,7 @@ def test_heteroskedastic_weight_direct(weight_data, center, debias):
x, z, eps, sigma = weight_data
weights = wm.weight_matrix(x, z, eps, sigma=sigma)
k = len(z)
- ze = [z[i] * eps[:, i:i + 1] for i in range(k)]
+ ze = [z[i] * eps[:, i : i + 1] for i in range(k)]
ze = np.concatenate(ze, 1)
if center:
ze = ze - ze.mean(0)
@@ -350,11 +368,11 @@ def test_heteroskedastic_weight_direct(weight_data, center, debias):
def test_kernel_weight_direct(weight_data, center, debias):
bandwidth = 12
- wm = KernelWeightMatrix(center, debias, kernel='parzen', bandwidth=bandwidth)
+ wm = KernelWeightMatrix(center, debias, kernel="parzen", bandwidth=bandwidth)
x, z, eps, sigma = weight_data
weights = wm.weight_matrix(x, z, eps, sigma=sigma)
k = len(z)
- ze = [z[i] * eps[:, i:i + 1] for i in range(k)]
+ ze = [z[i] * eps[:, i : i + 1] for i in range(k)]
ze = np.concatenate(ze, 1)
if center:
ze = ze - ze.mean(0)
@@ -380,13 +398,16 @@ def test_fitted(data):
for i, key in enumerate(res.equations):
eq = res.equations[key]
fv = res.fitted_values[key].copy()
- fv.name = 'fitted_values'
+ fv.name = "fitted_values"
assert_series_equal(eq.fitted_values, fv)
b = eq.params.values
direct = mod._x[i] @ b
expected.append(direct[:, None])
assert_allclose(eq.fitted_values, direct, atol=1e-8)
expected = np.concatenate(expected, 1)
- expected = DataFrame(expected, index=mod._dependent[i].pandas.index,
- columns=[key for key in res.equations])
+ expected = DataFrame(
+ expected,
+ index=mod._dependent[i].pandas.index,
+ columns=[key for key in res.equations],
+ )
assert_frame_equal(expected, res.fitted_values)
diff --git a/linearmodels/tests/system/test_sur.py b/linearmodels/tests/system/test_sur.py
index d93aad6f97..bbcacdf3a8 100644
--- a/linearmodels/tests/system/test_sur.py
+++ b/linearmodels/tests/system/test_sur.py
@@ -27,12 +27,12 @@
def gen_id(param):
- idstr = 'homo' if isinstance(param[0], list) else 'hetero'
- idstr += '-const' if param[1] else ''
- idstr += '-correl' if param[2] != 0 else ''
- idstr += '-common' if param[3] else ''
- idstr += '-weights' if param[4] else ''
- idstr += '-dist' if param[4] else '-tuple'
+ idstr = "homo" if isinstance(param[0], list) else "hetero"
+ idstr += "-const" if param[1] else ""
+ idstr += "-correl" if param[2] != 0 else ""
+ idstr += "-common" if param[3] else ""
+ idstr += "-weights" if param[4] else ""
+ idstr += "-dist" if param[4] else "-tuple"
return idstr
@@ -48,9 +48,9 @@ def check_results(res1, res2):
assert_allclose(res1.wresids, res2.wresids)
assert_allclose(res1.tstats, res2.tstats)
assert_allclose(res1.std_errors, res2.std_errors)
- if hasattr(res1, 'rsquared_adj'):
+ if hasattr(res1, "rsquared_adj"):
assert_allclose(res1.rsquared_adj, res2.rsquared_adj)
- if hasattr(res1, 'f_statistic'):
+ if hasattr(res1, "f_statistic"):
assert_allclose(res1.f_statistic.stat, res2.f_statistic.stat)
if res2.f_statistic.df_denom is None:
# Do not test case of F dist due to DOF differences
@@ -58,7 +58,7 @@ def check_results(res1, res2):
def get_res(res):
- d = filter(lambda s: not s.startswith('_'), dir(res))
+ d = filter(lambda s: not s.startswith("_"), dir(res))
for attr in d:
value = getattr(res, attr)
if isinstance(value, Mapping):
@@ -71,19 +71,24 @@ def data(request):
p, const, rho, common_exog, included_weights, output_dict = request.param
if common_exog and isinstance(p, list):
p = 3
- return generate_data(p=p, const=const, rho=rho,
- common_exog=common_exog, included_weights=included_weights,
- output_dict=output_dict)
+ return generate_data(
+ p=p,
+ const=const,
+ rho=rho,
+ common_exog=common_exog,
+ included_weights=included_weights,
+ output_dict=output_dict,
+ )
-@pytest.fixture(scope='module', params=[0, 0.1])
+@pytest.fixture(scope="module", params=[0, 0.1])
def missing_data(request):
eqns = generate_data()
np.random.seed(12345)
missing = np.random.random_sample(500)
missing = missing < request.param
for key in eqns:
- eqns[key]['dependent'][missing] = np.nan
+ eqns[key]["dependent"][missing] = np.nan
return eqns
@@ -91,46 +96,51 @@ def missing_data(request):
def gen_id(param):
- idstr = 'const' if param[0] else ''
- idstr += '-correl' if param[1] != 0 else ''
- idstr += '-weights' if param[2] else ''
+ idstr = "const" if param[0] else ""
+ idstr += "-correl" if param[1] != 0 else ""
+ idstr += "-weights" if param[2] else ""
return idstr
ids = list(map(gen_id, params))
-@pytest.fixture(scope='module', params=params, ids=ids)
+@pytest.fixture(scope="module", params=params, ids=ids)
def mvreg_data(request):
const, rho, included_weights = request.param
- values = generate_data(const=const, rho=rho,
- common_exog=True, included_weights=included_weights)
+ values = generate_data(
+ const=const, rho=rho, common_exog=True, included_weights=included_weights
+ )
dep = []
for key in values:
- exog = values[key]['exog']
- dep.append(values[key]['dependent'])
+ exog = values[key]["exog"]
+ dep.append(values[key]["dependent"])
return np.hstack(dep), exog
-kernels = ['bartlett', 'newey-west', 'parzen', 'gallant', 'qs', 'andrews']
+kernels = ["bartlett", "newey-west", "parzen", "gallant", "qs", "andrews"]
bandwidths = [None, 0, 10]
debiased = [True, False]
params = list(product(kernels, bandwidths, debiased))
-ids = list(map(lambda p: p[0] + ', BW: ' + str(p[1]) + ', Debiased: ' + str(p[2]), params))
+ids = list(
+ map(lambda p: p[0] + ", BW: " + str(p[1]) + ", Debiased: " + str(p[2]), params)
+)
@pytest.fixture(params=params, ids=ids)
def kernel_options(request):
- return {'kernel': request.param[0],
- 'bandwidth': request.param[1],
- 'debiased': request.param[2]}
+ return {
+ "kernel": request.param[0],
+ "bandwidth": request.param[1],
+ "debiased": request.param[2],
+ }
def test_smoke(data):
mod = SUR(data)
mod.fit()
- mod.fit(cov_type='unadjusted')
- mod.fit(cov_type='unadjusted', method='ols')
+ mod.fit(cov_type="unadjusted")
+ mod.fit(cov_type="unadjusted", method="ols")
res = mod.fit(full_cov=False)
get_res(res)
@@ -140,28 +150,39 @@ def test_errors():
with pytest.raises(TypeError):
SUR([])
with pytest.raises(TypeError):
- SUR({'a': 'absde', 'b': 12345})
-
- moddata = {'a': {'dependent': np.random.standard_normal((100, 1)),
- 'exog': np.random.standard_normal((100, 5))}}
+ SUR({"a": "absde", "b": 12345})
+
+ moddata = {
+ "a": {
+ "dependent": np.random.standard_normal((100, 1)),
+ "exog": np.random.standard_normal((100, 5)),
+ }
+ }
with pytest.raises(ValueError):
mod = SUR(moddata)
- mod.fit(cov_type='unknown')
-
- moddata = {'a': {'dependent': np.random.standard_normal((100, 1)),
- 'exog': np.random.standard_normal((101, 5))}}
+ mod.fit(cov_type="unknown")
+
+ moddata = {
+ "a": {
+ "dependent": np.random.standard_normal((100, 1)),
+ "exog": np.random.standard_normal((101, 5)),
+ }
+ }
with pytest.raises(ValueError):
SUR(moddata)
- moddata = {'a': {'dependent': np.random.standard_normal((10, 1)),
- 'exog': np.random.standard_normal((10, 20))}}
+ moddata = {
+ "a": {
+ "dependent": np.random.standard_normal((10, 1)),
+ "exog": np.random.standard_normal((10, 20)),
+ }
+ }
with pytest.raises(ValueError):
SUR(moddata)
x = np.random.standard_normal((100, 2))
x = np.c_[x, x]
- moddata = {'a': {'dependent': np.random.standard_normal((100, 1)),
- 'exog': x}}
+ moddata = {"a": {"dependent": np.random.standard_normal((100, 1)), "exog": x}}
with pytest.raises(ValueError):
SUR(moddata)
@@ -170,70 +191,72 @@ def test_mv_reg_smoke(mvreg_data):
dependent, exog = mvreg_data
mod = SUR.multivariate_ls(dependent, exog)
mod.fit()
- mod.fit(cov_type='unadjusted')
- res = mod.fit(cov_type='unadjusted', method='ols')
- assert res.method == 'OLS'
+ mod.fit(cov_type="unadjusted")
+ res = mod.fit(cov_type="unadjusted", method="ols")
+ assert res.method == "OLS"
res = mod.fit(full_cov=False)
get_res(res)
def test_formula():
- data = DataFrame(np.random.standard_normal((500, 4)),
- columns=['y1', 'y2', 'x1', 'x2'])
- formula = {'eq1': 'y1 ~ 1 + x1', 'eq2': 'y2 ~ 1 + x2'}
+ data = DataFrame(
+ np.random.standard_normal((500, 4)), columns=["y1", "y2", "x1", "x2"]
+ )
+ formula = {"eq1": "y1 ~ 1 + x1", "eq2": "y2 ~ 1 + x2"}
mod = SUR.from_formula(formula, data)
mod.fit()
- formula = '{y1 ~ 1 + x1} {y2 ~ 1 + x2}'
+ formula = "{y1 ~ 1 + x1} {y2 ~ 1 + x2}"
mod = SUR.from_formula(formula, data)
- mod.fit(cov_type='heteroskedastic')
+ mod.fit(cov_type="heteroskedastic")
- formula = '''
+ formula = """
{y1 ~ 1 + x1}
{y2 ~ 1 + x2}
- '''
+ """
mod = SUR.from_formula(formula, data)
- mod.fit(cov_type='heteroskedastic')
+ mod.fit(cov_type="heteroskedastic")
- formula = '''
+ formula = """
{eq.a:y1 ~ 1 + x1}
{second: y2 ~ 1 + x2}
- '''
+ """
mod = SUR.from_formula(formula, data)
- res = mod.fit(cov_type='heteroskedastic')
- assert 'eq.a' in res.equation_labels
- assert 'second' in res.equation_labels
+ res = mod.fit(cov_type="heteroskedastic")
+ assert "eq.a" in res.equation_labels
+ assert "second" in res.equation_labels
# TODO: Implement weights
# TODO: 1. MV OLS and OLS (weighted) homo and hetero
# TODO: Implement observation dropping and check
+
def test_mv_ols_equivalence(mvreg_data):
dependent, exog = mvreg_data
mod = SUR.multivariate_ls(dependent, exog)
- res = mod.fit(cov_type='unadjusted')
+ res = mod.fit(cov_type="unadjusted")
keys = res.equation_labels
- assert res.method == 'OLS'
+ assert res.method == "OLS"
for i in range(dependent.shape[1]):
ols_mod = OLS(dependent[:, i], exog)
- ols_res = ols_mod.fit(cov_type='unadjusted', debiased=False)
+ ols_res = ols_mod.fit(cov_type="unadjusted", debiased=False)
mv_res = res.equations[keys[i]]
- assert mv_res.method == 'OLS'
+ assert mv_res.method == "OLS"
check_results(mv_res, ols_res)
def test_mv_ols_equivalence_robust(mvreg_data):
dependent, exog = mvreg_data
mod = SUR.multivariate_ls(dependent, exog)
- res = mod.fit(cov_type='robust')
+ res = mod.fit(cov_type="robust")
keys = res.equation_labels
for i in range(dependent.shape[1]):
ols_mod = OLS(dependent[:, i], exog)
- ols_res = ols_mod.fit(cov_type='robust', debiased=False)
+ ols_res = ols_mod.fit(cov_type="robust", debiased=False)
mv_res = res.equations[keys[i]]
check_results(mv_res, ols_res)
@@ -241,12 +264,12 @@ def test_mv_ols_equivalence_robust(mvreg_data):
def test_mv_ols_equivalence_debiased(mvreg_data):
dependent, exog = mvreg_data
mod = SUR.multivariate_ls(dependent, exog)
- res = mod.fit(cov_type='unadjusted', debiased=True)
+ res = mod.fit(cov_type="unadjusted", debiased=True)
keys = res.equation_labels
for i in range(dependent.shape[1]):
ols_mod = OLS(dependent[:, i], exog)
- ols_res = ols_mod.fit(cov_type='unadjusted', debiased=True)
+ ols_res = ols_mod.fit(cov_type="unadjusted", debiased=True)
mv_res = res.equations[keys[i]]
check_results(mv_res, ols_res)
@@ -254,12 +277,12 @@ def test_mv_ols_equivalence_debiased(mvreg_data):
def test_mv_ols_equivalence_hetero_debiased(mvreg_data):
dependent, exog = mvreg_data
mod = SUR.multivariate_ls(dependent, exog)
- res = mod.fit(cov_type='robust', debiased=True)
+ res = mod.fit(cov_type="robust", debiased=True)
keys = res.equation_labels
for i in range(dependent.shape[1]):
ols_mod = OLS(dependent[:, i], exog)
- ols_res = ols_mod.fit(cov_type='robust', debiased=True)
+ ols_res = ols_mod.fit(cov_type="robust", debiased=True)
mv_res = res.equations[keys[i]]
check_results(mv_res, ols_res)
@@ -272,12 +295,11 @@ def test_gls_eye_mv_ols_equiv(mvreg_data):
ad = AttrDict()
for i in range(dependent.shape[1]):
- key = 'dependent.{0}'.format(i)
+ key = "dependent.{0}".format(i)
df = DataFrame(dependent[:, [i]], columns=[key])
- ad[key] = {'dependent': df,
- 'exog': exog.copy()}
+ ad[key] = {"dependent": df, "exog": exog.copy()}
gls_mod = SUR(ad, sigma=np.eye(len(ad)))
- gls_res = gls_mod.fit(method='gls')
+ gls_res = gls_mod.fit(method="gls")
check_results(mv_res, gls_res)
for i in range(dependent.shape[1]):
@@ -285,8 +307,8 @@ def test_gls_eye_mv_ols_equiv(mvreg_data):
gls_res_eq = gls_res.equations[keys[i]]
check_results(mv_res_eq, gls_res_eq)
- mv_res = mv_mod.fit(cov_type='robust')
- gls_res = gls_mod.fit(cov_type='robust', method='gls')
+ mv_res = mv_mod.fit(cov_type="robust")
+ gls_res = gls_mod.fit(cov_type="robust", method="gls")
check_results(mv_res, gls_res)
for i in range(dependent.shape[1]):
@@ -294,8 +316,8 @@ def test_gls_eye_mv_ols_equiv(mvreg_data):
gls_res_eq = gls_res.equations[keys[i]]
check_results(mv_res_eq, gls_res_eq)
- mv_res = mv_mod.fit(cov_type='robust', debiased=True)
- gls_res = gls_mod.fit(cov_type='robust', method='gls', debiased=True)
+ mv_res = mv_mod.fit(cov_type="robust", debiased=True)
+ gls_res = gls_mod.fit(cov_type="robust", method="gls", debiased=True)
check_results(mv_res, gls_res)
for i in range(dependent.shape[1]):
@@ -312,12 +334,11 @@ def test_gls_without_mv_ols_equiv(mvreg_data):
ad = AttrDict()
for i in range(dependent.shape[1]):
- key = 'dependent.{0}'.format(i)
+ key = "dependent.{0}".format(i)
df = DataFrame(dependent[:, [i]], columns=[key])
- ad[key] = {'dependent': df,
- 'exog': exog.copy()}
+ ad[key] = {"dependent": df, "exog": exog.copy()}
gls_mod = SUR(ad)
- gls_res = gls_mod.fit(method='ols')
+ gls_res = gls_mod.fit(method="ols")
check_results(mv_res, gls_res)
for i in range(dependent.shape[1]):
@@ -325,8 +346,8 @@ def test_gls_without_mv_ols_equiv(mvreg_data):
gls_res_eq = gls_res.equations[keys[i]]
check_results(mv_res_eq, gls_res_eq)
- mv_res = mv_mod.fit(cov_type='robust')
- gls_res = gls_mod.fit(cov_type='robust', method='ols')
+ mv_res = mv_mod.fit(cov_type="robust")
+ gls_res = gls_mod.fit(cov_type="robust", method="ols")
check_results(mv_res, gls_res)
for i in range(dependent.shape[1]):
@@ -334,8 +355,8 @@ def test_gls_without_mv_ols_equiv(mvreg_data):
gls_res_eq = gls_res.equations[keys[i]]
check_results(mv_res_eq, gls_res_eq)
- mv_res = mv_mod.fit(cov_type='robust', debiased=True)
- gls_res = gls_mod.fit(cov_type='robust', method='ols', debiased=True)
+ mv_res = mv_mod.fit(cov_type="robust", debiased=True)
+ gls_res = gls_mod.fit(cov_type="robust", method="ols", debiased=True)
check_results(mv_res, gls_res)
for i in range(dependent.shape[1]):
@@ -346,18 +367,18 @@ def test_gls_without_mv_ols_equiv(mvreg_data):
def test_ols_against_gls(data):
mod = SUR(data)
- res = mod.fit(method='gls')
+ res = mod.fit(method="gls")
sigma = res.sigma
sigma_m12 = inv_matrix_sqrt(sigma)
key = list(data.keys())[0]
if isinstance(data[key], Mapping):
- y = [data[key]['dependent'] for key in data]
- x = [data[key]['exog'] for key in data]
+ y = [data[key]["dependent"] for key in data]
+ x = [data[key]["exog"] for key in data]
try:
- w = [data[key]['weights'] for key in data]
+ w = [data[key]["weights"] for key in data]
except KeyError:
- w = [np.ones_like(data[key]['dependent']) for key in data]
+ w = [np.ones_like(data[key]["dependent"]) for key in data]
else:
y = [data[key][0] for key in data]
x = [data[key][1] for key in data]
@@ -388,17 +409,17 @@ def test_constraint_setting(data):
q = Series([0, 1], index=r.index)
mod.add_constraints(r)
- mod.fit(method='ols')
- res = mod.fit(method='ols', cov_type='unadjusted')
+ mod.fit(method="ols")
+ res = mod.fit(method="ols", cov_type="unadjusted")
assert_allclose(r.values @ res.params.values[:, None], np.zeros((2, 1)), atol=1e-8)
- mod.fit(method='gls')
- res = mod.fit(method='gls', cov_type='unadjusted')
+ mod.fit(method="gls")
+ res = mod.fit(method="gls", cov_type="unadjusted")
assert_allclose(r.values @ res.params.values[:, None], np.zeros((2, 1)), atol=1e-8)
mod.add_constraints(r, q)
- res = mod.fit(method='ols')
+ res = mod.fit(method="ols")
assert_allclose(r.values @ res.params.values[:, None], q.values[:, None], atol=1e-8)
- res = mod.fit(method='gls')
+ res = mod.fit(method="gls")
assert_allclose(r.values @ res.params.values[:, None], q.values[:, None], atol=1e-8)
@@ -457,44 +478,47 @@ def test_missing(data):
primes = [11, 13, 17, 19, 23]
for i, key in enumerate(data):
if isinstance(data[key], Mapping):
- data[key]['dependent'][::primes[i % 5]] = np.nan
+ data[key]["dependent"][:: primes[i % 5]] = np.nan
else:
- data[key][0][::primes[i % 5]] = np.nan
+ data[key][0][:: primes[i % 5]] = np.nan
with warnings.catch_warnings(record=True) as w:
SUR(data)
assert len(w) == 1
- assert 'missing' in w[0].message.args[0]
+ assert "missing" in w[0].message.args[0]
def test_formula_errors():
- data = DataFrame(np.random.standard_normal((500, 4)),
- columns=['y1', 'y2', 'x1', 'x2'])
+ data = DataFrame(
+ np.random.standard_normal((500, 4)), columns=["y1", "y2", "x1", "x2"]
+ )
with pytest.raises(TypeError):
SUR.from_formula(np.ones(10), data)
def test_formula_repeated_key():
- data = DataFrame(np.random.standard_normal((500, 4)),
- columns=['y1', 'y2', 'x1', 'x2'])
+ data = DataFrame(
+ np.random.standard_normal((500, 4)), columns=["y1", "y2", "x1", "x2"]
+ )
- formula = '''
+ formula = """
{first:y1 ~ 1 + x1}
{first: y2 ~ 1 + x2}
- '''
+ """
mod = SUR.from_formula(formula, data)
res = mod.fit()
- assert 'first' in res.equation_labels
- assert 'first.0' in res.equation_labels
+ assert "first" in res.equation_labels
+ assert "first.0" in res.equation_labels
def test_formula_weights():
- data = DataFrame(np.random.standard_normal((500, 4)),
- columns=['y1', 'y2', 'x1', 'x2'])
- weights = DataFrame(np.random.chisquare(5, (500, 2)), columns=['eq1', 'eq2'])
+ data = DataFrame(
+ np.random.standard_normal((500, 4)), columns=["y1", "y2", "x1", "x2"]
+ )
+ weights = DataFrame(np.random.chisquare(5, (500, 2)), columns=["eq1", "eq2"])
formula = OrderedDict()
- formula['eq1'] = 'y1 ~ 1 + x1'
- formula['eq2'] = 'y2 ~ 1 + x1'
+ formula["eq1"] = "y1 ~ 1 + x1"
+ formula["eq2"] = "y2 ~ 1 + x1"
mod = SUR.from_formula(formula, data, weights=weights)
mod.fit()
expected = weights.values[:, [0]]
@@ -502,8 +526,8 @@ def test_formula_weights():
expected = weights.values[:, [1]]
assert_allclose(mod._w[1], expected / expected.mean())
- formula = '{y1 ~ 1 + x1} {y2 ~ 1 + x2}'
- weights = DataFrame(np.random.chisquare(5, (500, 2)), columns=['y1', 'y2'])
+ formula = "{y1 ~ 1 + x1} {y2 ~ 1 + x2}"
+ weights = DataFrame(np.random.chisquare(5, (500, 2)), columns=["y1", "y2"])
mod = SUR.from_formula(formula, data, weights=weights)
mod.fit()
expected = weights.values[:, [0]]
@@ -513,31 +537,32 @@ def test_formula_weights():
def test_formula_partial_weights():
- data = DataFrame(np.random.standard_normal((500, 4)),
- columns=['y1', 'y2', 'x1', 'x2'])
- weights = DataFrame(np.random.chisquare(5, (500, 1)), columns=['eq2'])
+ data = DataFrame(
+ np.random.standard_normal((500, 4)), columns=["y1", "y2", "x1", "x2"]
+ )
+ weights = DataFrame(np.random.chisquare(5, (500, 1)), columns=["eq2"])
formula = OrderedDict()
- formula['eq1'] = 'y1 ~ 1 + x1'
- formula['eq2'] = 'y2 ~ 1 + x1'
+ formula["eq1"] = "y1 ~ 1 + x1"
+ formula["eq2"] = "y2 ~ 1 + x1"
with warnings.catch_warnings(record=True) as w:
mod = SUR.from_formula(formula, data, weights=weights)
assert len(w) == 1
- assert 'Weights' in w[0].message.args[0]
- assert 'eq1' in w[0].message.args[0]
- assert 'eq2' not in w[0].message.args[0]
+ assert "Weights" in w[0].message.args[0]
+ assert "eq1" in w[0].message.args[0]
+ assert "eq2" not in w[0].message.args[0]
mod.fit()
expected = np.ones((500, 1))
assert_allclose(mod._w[0], expected / expected.mean())
expected = weights.values[:, [0]]
assert_allclose(mod._w[1], expected / expected.mean())
- formula = '{y1 ~ 1 + x1} {y2 ~ 1 + x2}'
- weights = DataFrame(np.random.chisquare(5, (500, 1)), columns=['y2'])
+ formula = "{y1 ~ 1 + x1} {y2 ~ 1 + x2}"
+ weights = DataFrame(np.random.chisquare(5, (500, 1)), columns=["y2"])
with warnings.catch_warnings(record=True) as w:
mod = SUR.from_formula(formula, data, weights=weights)
assert len(w) == 1
- assert 'y1' in w[0].message.args[0]
- assert 'y2' not in w[0].message.args[0]
+ assert "y1" in w[0].message.args[0]
+ assert "y2" not in w[0].message.args[0]
expected = np.ones((500, 1))
assert_allclose(mod._w[0], expected / expected.mean())
@@ -555,22 +580,22 @@ def test_against_direct_model(data):
keys = list(data.keys())
if not isinstance(data[keys[0]], Mapping):
return
- if 'weights' in data[keys[0]]:
+ if "weights" in data[keys[0]]:
return
y = []
x = []
data_copy = OrderedDict()
for i in range(min(3, len(data))):
data_copy[keys[i]] = data[keys[i]]
- y.append(data[keys[i]]['dependent'])
- x.append(data[keys[i]]['exog'])
+ y.append(data[keys[i]]["dependent"])
+ x.append(data[keys[i]]["exog"])
direct = simple_sur(y, x)
mod = SUR(data_copy)
- res = mod.fit(method='ols')
+ res = mod.fit(method="ols")
assert_allclose(res.params.values[:, None], direct.beta0)
- res = mod.fit(method='gls')
+ res = mod.fit(method="gls")
assert_allclose(res.params.values[:, None], direct.beta1)
@@ -590,33 +615,45 @@ def test_model_repr(data):
repr = mod.__repr__()
assert str(len(data)) in repr
assert str(hex(id(mod))) in repr
- assert 'Seemingly Unrelated Regression (SUR)' in repr
+ assert "Seemingly Unrelated Regression (SUR)" in repr
def test_mv_ols_hac_smoke(kernel_options):
- data = generate_data(p=3, const=True, rho=0.8, common_exog=False,
- included_weights=False, output_dict=True)
+ data = generate_data(
+ p=3,
+ const=True,
+ rho=0.8,
+ common_exog=False,
+ included_weights=False,
+ output_dict=True,
+ )
mod = SUR(data)
- res = mod.fit(cov_type='kernel', **kernel_options)
- assert 'Kernel (HAC) ' in str(res)
- assert 'Kernel: {0}'.format(kernel_options['kernel']) in str(res)
- if kernel_options['bandwidth'] == 0:
- res_base = mod.fit(cov_type='robust', debiased=kernel_options['debiased'])
+ res = mod.fit(cov_type="kernel", **kernel_options)
+ assert "Kernel (HAC) " in str(res)
+ assert "Kernel: {0}".format(kernel_options["kernel"]) in str(res)
+ if kernel_options["bandwidth"] == 0:
+ res_base = mod.fit(cov_type="robust", debiased=kernel_options["debiased"])
assert_allclose(res.tstats, res_base.tstats)
def test_invalid_kernel_options(kernel_options):
- data = generate_data(p=3, const=True, rho=0.8, common_exog=False,
- included_weights=False, output_dict=True)
+ data = generate_data(
+ p=3,
+ const=True,
+ rho=0.8,
+ common_exog=False,
+ included_weights=False,
+ output_dict=True,
+ )
mod = SUR(data)
with pytest.raises(TypeError):
ko = {k: v for k, v in kernel_options.items()}
- ko['bandwidth'] = 'None'
- mod.fit(cov_type='kernel', **ko)
+ ko["bandwidth"] = "None"
+ mod.fit(cov_type="kernel", **ko)
with pytest.raises(TypeError):
ko = {k: v for k, v in kernel_options.items()}
- ko['kernel'] = 1
- mod.fit(cov_type='kernel', **ko)
+ ko["kernel"] = 1
+ mod.fit(cov_type="kernel", **ko)
def test_fitted(data):
@@ -626,29 +663,35 @@ def test_fitted(data):
for i, key in enumerate(res.equations):
eq = res.equations[key]
fv = res.fitted_values[key].copy()
- fv.name = 'fitted_values'
+ fv.name = "fitted_values"
assert_series_equal(eq.fitted_values, fv)
b = eq.params.values
direct = mod._x[i] @ b
expected.append(direct[:, None])
assert_allclose(eq.fitted_values, direct, atol=1e-8)
expected = np.concatenate(expected, 1)
- expected = DataFrame(expected, index=mod._dependent[i].pandas.index,
- columns=[key for key in res.equations])
+ expected = DataFrame(
+ expected,
+ index=mod._dependent[i].pandas.index,
+ columns=[key for key in res.equations],
+ )
assert_frame_equal(expected, res.fitted_values)
-@pytest.mark.filterwarnings('ignore::linearmodels.utility.MissingValueWarning')
+@pytest.mark.filterwarnings("ignore::linearmodels.utility.MissingValueWarning")
def test_predict(missing_data):
mod = SUR(missing_data)
res = mod.fit()
pred = res.predict()
for key in pred:
- assert_series_equal(pred[key].iloc[:, 0], res.equations[key].fitted_values,
- check_names=False)
+ assert_series_equal(
+ pred[key].iloc[:, 0], res.equations[key].fitted_values, check_names=False
+ )
pred = res.predict(fitted=False, idiosyncratic=True)
for key in pred:
- assert_series_equal(pred[key].iloc[:, 0], res.equations[key].resids, check_names=False)
+ assert_series_equal(
+ pred[key].iloc[:, 0], res.equations[key].resids, check_names=False
+ )
pred = res.predict(fitted=True, idiosyncratic=True)
assert isinstance(pred, dict)
for key in res.equations:
@@ -662,12 +705,12 @@ def test_predict(missing_data):
assert_frame_equal(pred, res.resids)
pred = res.predict(fitted=True, idiosyncratic=True, dataframe=True)
assert isinstance(pred, dict)
- assert 'fitted_values' in pred
- assert_frame_equal(pred['fitted_values'], res.fitted_values)
- assert 'idiosyncratic' in pred
- assert_frame_equal(pred['idiosyncratic'], res.resids)
+ assert "fitted_values" in pred
+ assert_frame_equal(pred["fitted_values"], res.fitted_values)
+ assert "idiosyncratic" in pred
+ assert_frame_equal(pred["idiosyncratic"], res.resids)
- nobs = missing_data[list(missing_data.keys())[0]]['dependent'].shape[0]
+ nobs = missing_data[list(missing_data.keys())[0]]["dependent"].shape[0]
pred = res.predict(fitted=True, idiosyncratic=False, dataframe=True, missing=True)
assert pred.shape[0] == nobs
@@ -676,9 +719,55 @@ def test_predict(missing_data):
assert pred[key].shape[0] == nobs
-@pytest.mark.filterwarnings('ignore::linearmodels.utility.MissingValueWarning')
+@pytest.mark.filterwarnings("ignore::linearmodels.utility.MissingValueWarning")
def test_predict_error(missing_data):
mod = SUR(missing_data)
res = mod.fit()
with pytest.raises(ValueError):
res.predict(fitted=False, idiosyncratic=False)
+
+
+def reference_mcelroy(u, y, sigma):
+ u = np.asarray(u)
+ nobs = u.shape[0]
+ sigma = np.asarray(sigma)
+ y = np.asarray(y)
+ u = u.T.ravel()
+ y = y.T.ravel()
+ sigma_inv = np.linalg.inv(sigma)
+ omega_inv = np.kron(sigma_inv, np.eye(nobs))
+ num = u @ omega_inv @ u
+ iota = np.ones((nobs, 1))
+ core = np.kron(sigma_inv, np.eye(nobs) - iota @ iota.T / nobs)
+ denom = y @ core @ y
+
+ return 1 - num / denom
+
+
+def reference_berndt(u, y):
+ u = np.asarray(u)
+ nobs = u.shape[0]
+ num = np.linalg.det(u.T @ u / nobs)
+ y = np.asarray(y)
+ mu = y.mean(0)
+ y = y - mu
+ denom = np.linalg.det(y.T @ y / nobs)
+ return 1 - num / denom
+
+
+def test_system_r2_direct():
+ eqns = generate_data(k=3)
+ mod = SUR(eqns)
+ res = mod.fit(method="ols", cov_type="unadjusted")
+ y = np.hstack([eqns[eq]["dependent"] for eq in eqns])
+ ref = reference_mcelroy(res.resids, y, res.sigma)
+ assert_allclose(ref, res.system_rsquared.mcelroy)
+ ref = reference_berndt(res.resids, y)
+ assert_allclose(ref, res.system_rsquared.berndt)
+
+ res = mod.fit(method="gls", cov_type="unadjusted", iter_limit=100)
+ y = np.hstack([eqns[eq]["dependent"] for eq in eqns])
+ ref = reference_mcelroy(res.resids, y, res.sigma)
+ assert_allclose(ref, res.system_rsquared.mcelroy)
+ ref = reference_berndt(res.resids, y)
+ assert_allclose(ref, res.system_rsquared.berndt, atol=1e-3, rtol=1e-3)
diff --git a/linearmodels/tests/system/test_sur_against_stata.py b/linearmodels/tests/system/test_sur_against_stata.py
index 8bf1599232..a43633d86d 100644
--- a/linearmodels/tests/system/test_sur_against_stata.py
+++ b/linearmodels/tests/system/test_sur_against_stata.py
@@ -12,42 +12,44 @@
from linearmodels.tests.system.results.parse_stata_results import stata_results
from linearmodels.utility import AttrDict
-pytestmark = pytest.mark.filterwarnings('ignore::linearmodels.utility.MissingValueWarning')
+pytestmark = pytest.mark.filterwarnings(
+ "ignore::linearmodels.utility.MissingValueWarning"
+)
-@pytest.fixture(scope='module', params=list(stata_results.keys()))
+@pytest.fixture(scope="module", params=list(stata_results.keys()))
def model_data(request):
key = request.param
- dgp, model_type = key.split('-')
- if dgp == 'basic':
+ dgp, model_type = key.split("-")
+ if dgp == "basic":
data = basic_data
- elif dgp == 'common':
+ elif dgp == "common":
data = common_data
for i, data_key in enumerate(data):
if i == 0:
- exog = data[data_key]['exog']
+ exog = data[data_key]["exog"]
else:
- data[data_key]['exog'] = exog
+ data[data_key]["exog"] = exog
else: # dgp == 'missing'
data = missing_data
- cov_kwds = {'cov_type': 'unadjusted'}
- if model_type == 'ss':
- cov_kwds['debiased'] = True
+ cov_kwds = {"cov_type": "unadjusted"}
+ if model_type == "ss":
+ cov_kwds["debiased"] = True
stata_result = stata_results[key]
rekeyed_data = OrderedDict()
for data_key in data:
temp = data[data_key]
- new_key = temp['dependent'].columns[0]
+ new_key = temp["dependent"].columns[0]
rekeyed_data[new_key] = temp
constraint = None
- if model_type == 'constrained':
+ if model_type == "constrained":
cols = []
widths = []
for new_key in rekeyed_data:
- exog = rekeyed_data[new_key]['exog']
- cols.extend([new_key + '_' + col for col in exog.columns])
+ exog = rekeyed_data[new_key]["exog"]
+ cols.extend([new_key + "_" + col for col in exog.columns])
widths.append(exog.shape[1])
- r = pd.DataFrame(columns=cols, index=['r0', 'r1'], dtype=np.float64)
+ r = pd.DataFrame(columns=cols, index=["r0", "r1"], dtype=np.float64)
r.iloc[:, :] = 0.0
r.iloc[:, 0] = -1.0
r.iloc[0, widths[0]] = 1.0
@@ -59,9 +61,16 @@ def model_data(request):
mod.add_constraints(constraint)
res = mod.fit(**cov_kwds)
- return AttrDict(data=rekeyed_data, cov_kwds=cov_kwds, model_type=model_type,
- stata_result=stata_result, key=key, constraint=constraint,
- mod=mod, res=res)
+ return AttrDict(
+ data=rekeyed_data,
+ cov_kwds=cov_kwds,
+ model_type=model_type,
+ stata_result=stata_result,
+ key=key,
+ constraint=constraint,
+ mod=mod,
+ res=res,
+ )
def test_params(model_data):
@@ -105,12 +114,12 @@ def test_f_stat(model_data):
for i, key in enumerate(res.equations):
eq = res.equations[key]
stat = eq.f_statistic.stat
- stata_stat = stata_stats.loc['F_{0}'.format(i + 1)].squeeze()
+ stata_stat = stata_stats.loc["F_{0}".format(i + 1)].squeeze()
if np.isnan(stata_stat):
- stata_stat = stata_stats.loc['chi2_{0}'.format(i + 1)].squeeze()
+ stata_stat = stata_stats.loc["chi2_{0}".format(i + 1)].squeeze()
assert_allclose(stat, stata_stat)
pval = eq.f_statistic.pval
- stata_pval = stata_stats.loc['p_{0}'.format(i + 1)]
+ stata_pval = stata_stats.loc["p_{0}".format(i + 1)]
assert_allclose(pval, stata_pval, atol=1e-6)
@@ -120,7 +129,7 @@ def test_r2(model_data):
for i, key in enumerate(res.equations):
eq = res.equations[key]
stat = eq.rsquared
- stata_stat = stata_stats.loc['r2_{0}'.format(i + 1)].squeeze()
+ stata_stat = stata_stats.loc["r2_{0}".format(i + 1)].squeeze()
assert_allclose(stat, stata_stat)
@@ -130,9 +139,9 @@ def test_sum_of_squares(model_data):
for i, key in enumerate(res.equations):
eq = res.equations[key]
stat = eq.resid_ss
- stata_stat = stata_stats.loc['rss_{0}'.format(i + 1)].squeeze()
+ stata_stat = stata_stats.loc["rss_{0}".format(i + 1)].squeeze()
assert_allclose(stat, stata_stat)
- stata_stat = stata_stats.loc['mss_{0}'.format(i + 1)].squeeze()
+ stata_stat = stata_stats.loc["mss_{0}".format(i + 1)].squeeze()
stat = eq.model_ss
assert_allclose(stat, stata_stat)
@@ -143,5 +152,5 @@ def test_df_model(model_data):
for i, key in enumerate(res.equations):
eq = res.equations[key]
stat = eq.df_model
- stata_stat = stata_stats.loc['df_m{0}'.format(i + 1)].squeeze()
+ stata_stat = stata_stats.loc["df_m{0}".format(i + 1)].squeeze()
assert_allclose(stat, stata_stat + 1)
diff --git a/linearmodels/tests/system/test_utility.py b/linearmodels/tests/system/test_utility.py
index 4a5415ab2c..b064fe8c00 100644
--- a/linearmodels/tests/system/test_utility.py
+++ b/linearmodels/tests/system/test_utility.py
@@ -13,7 +13,7 @@
blocked_inner_prod, inv_matrix_sqrt)
-@pytest.fixture(params=(3, np.arange(1, 6)), ids=['common-size', 'different-size'])
+@pytest.fixture(params=(3, np.arange(1, 6)), ids=["common-size", "different-size"])
def data(request):
k = 5
t = 200
@@ -151,7 +151,7 @@ def test_linear_constraint_repr():
r = np.eye(10)
lc = LinearConstraint(r, require_pandas=False)
assert hex(id(lc)) in lc.__repr__()
- assert '10 constraints' in lc.__repr__()
+ assert "10 constraints" in lc.__repr__()
assert isinstance(lc.q, pd.Series)
assert np.all(lc.q == 0)
assert lc.q.shape == (10,)
@@ -198,4 +198,5 @@ def test_blocked_outer_product():
desired = _x.T @ np.kron(s, np.eye(nobs)) @ _z
assert_allclose(actual, desired)
+
# TODO: One complex constrain test of equivalence
diff --git a/linearmodels/tests/test_compat.py b/linearmodels/tests/test_compat.py
index 5127bfb3d5..8ec737081b 100644
--- a/linearmodels/tests/test_compat.py
+++ b/linearmodels/tests/test_compat.py
@@ -7,13 +7,13 @@
from linearmodels.utility import AttrDict
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
def data():
- idx = date_range('2000-01-01', periods=100)
- df1 = DataFrame(np.arange(100)[:, None], columns=['A'], index=idx)
+ idx = date_range("2000-01-01", periods=100)
+ df1 = DataFrame(np.arange(100)[:, None], columns=["A"], index=idx)
x = np.reshape(np.arange(200), (100, 2))
- df2 = DataFrame(x, columns=['B', 'C'], index=idx[::-1])
- s = Series(300 + np.arange(100), index=idx, name='D')
+ df2 = DataFrame(x, columns=["B", "C"], index=idx[::-1])
+ s = Series(300 + np.arange(100), index=idx, name="D")
return AttrDict(df1=df1, df2=df2, s=s)
@@ -22,7 +22,7 @@ def test_concat_sort(data):
b = concat([data.df1, data.df2, data.s], 1)
c = concat([data.df1, data.df2, data.s], 1, sort=True)
d = concat([data.df2, data.df1, data.s], 1, sort=False)
- assert list(a.columns) == ['A', 'B', 'C']
- assert list(b.columns) == ['A', 'B', 'C', 'D']
- assert list(c.columns) == ['A', 'B', 'C', 'D']
- assert list(d.columns) == ['B', 'C', 'A', 'D']
+ assert list(a.columns) == ["A", "B", "C"]
+ assert list(b.columns) == ["A", "B", "C", "D"]
+ assert list(c.columns) == ["A", "B", "C", "D"]
+ assert list(d.columns) == ["B", "C", "A", "D"]
diff --git a/linearmodels/tests/test_examples.py b/linearmodels/tests/test_examples.py
index 435cedcd2a..3d95616b72 100644
--- a/linearmodels/tests/test_examples.py
+++ b/linearmodels/tests/test_examples.py
@@ -15,20 +15,21 @@
import jupyter_client
import nbformat
from nbconvert.preprocessors import ExecutePreprocessor
+
kernels = jupyter_client.kernelspec.find_kernel_specs()
except ImportError: # pragma: no cover
- pytest.mark.skip(reason='Required packages not available')
+ pytest.mark.skip(reason="Required packages not available")
-kernel_name = 'python%s' % sys.version_info.major
+kernel_name = "python%s" % sys.version_info.major
head, _ = os.path.split(__file__)
-NOTEBOOKS_USING_XARRAY = ['panel_data-formats.ipynb']
-NOTEBOOK_DIR = os.path.abspath(os.path.join(head, '..', '..', 'examples'))
+NOTEBOOKS_USING_XARRAY = ["panel_data-formats.ipynb"]
+NOTEBOOK_DIR = os.path.abspath(os.path.join(head, "..", "..", "examples"))
-nbs = sorted(glob.glob(os.path.join(NOTEBOOK_DIR, '*.ipynb')))
-ids = list(map(lambda s: os.path.split(s)[-1].split('.')[0], nbs))
+nbs = sorted(glob.glob(os.path.join(NOTEBOOK_DIR, "*.ipynb")))
+ids = list(map(lambda s: os.path.split(s)[-1].split(".")[0], nbs))
if not nbs: # pragma: no cover
- pytest.mark.skip(reason='No notebooks found so not tests run')
+ pytest.mark.skip(reason="No notebooks found so not tests run")
@pytest.fixture(params=nbs, ids=ids)
@@ -40,10 +41,8 @@ def notebook(request):
def test_notebook(notebook):
nb_name = os.path.split(notebook)[-1]
if MISSING_XARRAY and nb_name in NOTEBOOKS_USING_XARRAY:
- pytest.skip('xarray is required to test {0}'.format(notebook))
+ pytest.skip("xarray is required to test {0}".format(notebook))
nb = nbformat.read(notebook, as_version=4)
- ep = ExecutePreprocessor(allow_errors=False,
- timeout=120,
- kernel_name=kernel_name)
- ep.preprocess(nb, {'metadata': {'path': NOTEBOOK_DIR}})
+ ep = ExecutePreprocessor(allow_errors=False, timeout=120, kernel_name=kernel_name)
+ ep.preprocess(nb, {"metadata": {"path": NOTEBOOK_DIR}})
diff --git a/linearmodels/tests/test_utility.py b/linearmodels/tests/test_utility.py
index 64659fdd8e..b14fd3e479 100644
--- a/linearmodels/tests/test_utility.py
+++ b/linearmodels/tests/test_utility.py
@@ -15,7 +15,7 @@
has_constant, inv_sqrth, missing_warning,
panel_to_frame)
-MISSING_PANEL = 'Panel' not in dir(pd)
+MISSING_PANEL = "Panel" not in dir(pd)
def test_missing_warning():
@@ -60,38 +60,38 @@ def test_hasconstant():
def test_wald_statistic():
ts = WaldTestStatistic(1.0, "_NULL_", 1, name="_NAME_")
assert str(hex(id(ts))) in ts.__repr__()
- assert '_NULL_' in str(ts)
+ assert "_NULL_" in str(ts)
assert ts.stat == 1.0
assert ts.df == 1
assert ts.df_denom is None
- assert ts.dist_name == 'chi2(1)'
+ assert ts.dist_name == "chi2(1)"
assert isinstance(ts.critical_values, dict)
assert_allclose(1 - stats.chi2.cdf(1.0, 1), ts.pval)
ts = WaldTestStatistic(1.0, "_NULL_", 1, 1000, name="_NAME_")
assert ts.df == 1
assert ts.df_denom == 1000
- assert ts.dist_name == 'F(1,1000)'
+ assert ts.dist_name == "F(1,1000)"
assert_allclose(1 - stats.f.cdf(1.0, 1, 1000), ts.pval)
def test_invalid_test_statistic():
- ts = InvalidTestStatistic('_REASON_', name='_NAME_')
+ ts = InvalidTestStatistic("_REASON_", name="_NAME_")
assert str(hex(id(ts))) in ts.__repr__()
- assert '_REASON_' in str(ts)
+ assert "_REASON_" in str(ts)
assert np.isnan(ts.pval)
assert ts.critical_values is None
def test_inapplicable_test_statistic():
- ts = InapplicableTestStatistic(reason='_REASON_', name='_NAME_')
+ ts = InapplicableTestStatistic(reason="_REASON_", name="_NAME_")
assert str(hex(id(ts))) in ts.__repr__()
- assert '_REASON_' in str(ts)
+ assert "_REASON_" in str(ts)
assert np.isnan(ts.pval)
assert ts.critical_values is None
ts = InapplicableTestStatistic()
- assert 'not applicable' in str(ts)
+ assert "not applicable" in str(ts)
def test_inv_sqrth():
@@ -103,52 +103,52 @@ def test_inv_sqrth():
def test_ensure_unique_column():
- df = pd.DataFrame({'a': [0, 1, 0], 'b': [1.0, 0.0, 1.0]})
- out = ensure_unique_column('a', df)
- assert out == '_a_'
- out = ensure_unique_column('c', df)
- assert out == 'c'
- out = ensure_unique_column('a', df, '=')
- assert out == '=a='
- df['_a_'] = -1
- out = ensure_unique_column('a', df)
- assert out == '__a__'
+ df = pd.DataFrame({"a": [0, 1, 0], "b": [1.0, 0.0, 1.0]})
+ out = ensure_unique_column("a", df)
+ assert out == "_a_"
+ out = ensure_unique_column("c", df)
+ assert out == "c"
+ out = ensure_unique_column("a", df, "=")
+ assert out == "=a="
+ df["_a_"] = -1
+ out = ensure_unique_column("a", df)
+ assert out == "__a__"
def test_attr_dict():
ad = AttrDict()
- ad['one'] = 'one'
+ ad["one"] = "one"
ad[1] = 1
- ad[('a', 2)] = ('a', 2)
- assert list(ad.keys()) == ['one', 1, ('a', 2)]
+ ad[("a", 2)] = ("a", 2)
+ assert list(ad.keys()) == ["one", 1, ("a", 2)]
assert len(ad) == 3
ad2 = ad.copy()
assert list(ad2.keys()) == list(ad.keys())
- assert ad.get('one', None) == 'one'
- assert ad.get('two', False) is False
+ assert ad.get("one", None) == "one"
+ assert ad.get("two", False) is False
k, v = ad.popitem()
- assert k == 'one'
- assert v == 'one'
+ assert k == "one"
+ assert v == "one"
items = ad.items()
assert (1, 1) in items
- assert (('a', 2), ('a', 2)) in items
+ assert (("a", 2), ("a", 2)) in items
assert len(items) == 2
values = ad.values()
assert 1 in values
- assert ('a', 2) in values
+ assert ("a", 2) in values
assert len(values) == 2
ad2 = AttrDict()
ad2[1] = 3
- ad2['one'] = 'one'
- ad2['a'] = 'a'
+ ad2["one"] = "one"
+ ad2["a"] = "a"
ad.update(ad2)
assert ad[1] == 3
- assert 'a' in ad
+ assert "a" in ad
ad.__str__()
with pytest.raises(AttributeError):
@@ -156,23 +156,23 @@ def test_attr_dict():
with pytest.raises(AttributeError):
ad.some_other_key
with pytest.raises(KeyError):
- ad['__ordered_dict__'] = None
+ ad["__ordered_dict__"] = None
del ad[1]
assert 1 not in ad.keys()
- ad.new_value = 'new_value'
- assert 'new_value' in ad.keys()
- assert ad.new_value == ad['new_value']
+ ad.new_value = "new_value"
+ assert "new_value" in ad.keys()
+ assert ad.new_value == ad["new_value"]
for key in ad.keys():
if isinstance(key, str):
assert key in dir(ad)
- new_value = ad.pop('new_value')
- assert new_value == 'new_value'
+ new_value = ad.pop("new_value")
+ assert new_value == "new_value"
del ad.one
- assert 'one' not in ad.keys()
+ assert "one" not in ad.keys()
ad.clear()
assert list(ad.keys()) == []
@@ -202,15 +202,22 @@ def test_panel_to_midf():
df2 = panel_to_frame(x, list(range(3)), list(range(7)), list(range(100)), True)
pd.testing.assert_frame_equal(df2, expected2)
- entities = list(map(''.join, [[random.choice(string.ascii_lowercase) for __ in range(10)]
- for _ in range(100)]))
- times = pd.date_range('1999-12-31', freq='A-DEC', periods=7)
- var_names = ['x.{0}'.format(i) for i in range(1, 4)]
+ entities = list(
+ map(
+ "".join,
+ [
+ [random.choice(string.ascii_lowercase) for __ in range(10)]
+ for _ in range(100)
+ ],
+ )
+ )
+ times = pd.date_range("1999-12-31", freq="A-DEC", periods=7)
+ var_names = ["x.{0}".format(i) for i in range(1, 4)]
df3 = panel_to_frame(x, var_names, times, entities, True)
mi = pd.MultiIndex.from_product([times, entities])
expected3 = pd.DataFrame(index=mi, columns=var_names)
for i in range(1, 4):
- expected3['x.{0}'.format(i)] = x[i-1].ravel()
+ expected3["x.{0}".format(i)] = x[i - 1].ravel()
expected3.index = expected3.index.swaplevel(0, 1)
mi = pd.MultiIndex.from_product([entities, times])
expected3 = expected3.loc[mi]
diff --git a/linearmodels/utility.py b/linearmodels/utility.py
index ac380bd145..cde1883ca6 100644
--- a/linearmodels/utility.py
+++ b/linearmodels/utility.py
@@ -81,13 +81,13 @@ def __len__(self):
def __repr__(self):
out = self.__ordered_dict__.__str__()
- return 'Attr' + out[7:]
+ return "Attr" + out[7:]
def __str__(self):
return self.__repr__()
def __init__(self, *args, **kwargs):
- self.__dict__['__ordered_dict__'] = OrderedDict(*args, **kwargs)
+ self.__dict__["__ordered_dict__"] = OrderedDict(*args, **kwargs)
def __contains__(self, item):
return self.__ordered_dict__.__contains__(item)
@@ -96,8 +96,8 @@ def __getitem__(self, item):
return self.__ordered_dict__[item]
def __setitem__(self, key, value):
- if key == '__ordered_dict__':
- raise KeyError(key + ' is reserved and cannot be set.')
+ if key == "__ordered_dict__":
+ raise KeyError(key + " is reserved and cannot be set.")
self.__ordered_dict__[key] = value
def __delitem__(self, key):
@@ -109,8 +109,8 @@ def __getattr__(self, item):
return self.__ordered_dict__[item]
def __setattr__(self, key, value):
- if key == '__ordered_dict__':
- raise AttributeError(key + ' is invalid')
+ if key == "__ordered_dict__":
+ raise AttributeError(key + " is invalid")
self.__ordered_dict__[key] = value
def __delattr__(self, name):
@@ -177,7 +177,7 @@ def inv_sqrth(x):
Returns
-------
- invsqrt : ndarray
+ ndarray
Input to the power -1/2
"""
vals, vecs = np.linalg.eigh(x)
@@ -215,10 +215,10 @@ def __init__(self, stat, null, df, df_denom=None, name=None):
self._name = name
if df_denom is None:
self.dist = chi2(df)
- self.dist_name = 'chi2({0})'.format(df)
+ self.dist_name = "chi2({0})".format(df)
else:
self.dist = f(df, df_denom)
- self.dist_name = 'F({0},{1})'.format(df, df_denom)
+ self.dist_name = "F({0},{1})".format(df, df_denom)
@property
def stat(self):
@@ -233,8 +233,7 @@ def pval(self):
@property
def critical_values(self):
"""Critical values test for common test sizes"""
- return OrderedDict(zip(['10%', '5%', '1%'],
- self.dist.ppf([.9, .95, .99])))
+ return OrderedDict(zip(["10%", "5%", "1%"], self.dist.ppf([0.9, 0.95, 0.99])))
@property
def null(self):
@@ -242,16 +241,26 @@ def null(self):
return self._null
def __str__(self):
- name = '' if not self._name else self._name + '\n'
- msg = '{name}H0: {null}\nStatistic: {stat:0.4f}\n' \
- 'P-value: {pval:0.4f}\nDistributed: {dist}'
- return msg.format(name=name, null=self.null, stat=self.stat,
- pval=self.pval, dist=self.dist_name)
+ name = "" if not self._name else self._name + "\n"
+ msg = (
+ "{name}H0: {null}\nStatistic: {stat:0.4f}\n"
+ "P-value: {pval:0.4f}\nDistributed: {dist}"
+ )
+ return msg.format(
+ name=name,
+ null=self.null,
+ stat=self.stat,
+ pval=self.pval,
+ dist=self.dist_name,
+ )
def __repr__(self):
- return self.__str__() + '\n' + \
- self.__class__.__name__ + \
- ', id: {0}'.format(hex(id(self)))
+ return (
+ self.__str__()
+ + "\n"
+ + self.__class__.__name__
+ + ", id: {0}".format(hex(id(self)))
+ )
class InvalidTestWarning(UserWarning):
@@ -276,8 +285,10 @@ class InvalidTestStatistic(WaldTestStatistic):
def __init__(self, reason, *, name=None):
self._reason = reason
- super(InvalidTestStatistic, self).__init__(np.NaN, np.NaN, df=1, df_denom=1, name=name)
- self.dist_name = 'None'
+ super(InvalidTestStatistic, self).__init__(
+ np.NaN, np.NaN, df=1, df_denom=1, name=name
+ )
+ self.dist_name = "None"
@property
def pval(self):
@@ -291,7 +302,7 @@ def critical_values(self):
def __str__(self):
msg = "Invalid test statistic\n{reason}\n{name}"
- name = '' if self._name is None else self._name
+ name = "" if self._name is None else self._name
return msg.format(name=name, reason=self._reason)
@@ -314,11 +325,12 @@ class InapplicableTestStatistic(WaldTestStatistic):
def __init__(self, *, reason=None, name=None):
self._reason = reason
if reason is None:
- self._reason = 'Test is not applicable to model specification'
+ self._reason = "Test is not applicable to model specification"
- super(InapplicableTestStatistic, self).__init__(np.NaN, np.NaN, df=1, df_denom=1,
- name=name)
- self.dist_name = 'None'
+ super(InapplicableTestStatistic, self).__init__(
+ np.NaN, np.NaN, df=1, df_denom=1, name=name
+ )
+ self.dist_name = "None"
@property
def pval(self):
@@ -332,35 +344,35 @@ def critical_values(self):
def __str__(self):
msg = "Irrelevant test statistic\n{reason}\n{name}"
- name = '' if self._name is None else self._name
+ name = "" if self._name is None else self._name
return msg.format(name=name, reason=self._reason)
def _str(v):
"""Preferred basic formatter"""
if np.isnan(v):
- return ' '
+ return " "
av = abs(v)
digits = 0
if av != 0:
digits = np.ceil(np.log10(av))
if digits > 4 or digits <= -4:
- return '{0:8.4g}'.format(v)
+ return "{0:8.4g}".format(v)
if digits > 0:
d = int(5 - digits)
else:
d = int(4)
- format_str = '{0:' + '0.{0}f'.format(d) + '}'
+ format_str = "{0:" + "0.{0}f".format(d) + "}"
return format_str.format(v)
def pval_format(v):
"""Preferred formatting for x in [0,1]"""
if np.isnan(v):
- return ' '
- return '{0:4.4f}'.format(v)
+ return " "
+ return "{0:4.4f}".format(v)
class _SummaryStr(object):
@@ -368,15 +380,18 @@ def __str__(self):
return self.summary.as_text()
def __repr__(self):
- return self.__str__() + '\n' + \
- self.__class__.__name__ + \
- ', id: {0}'.format(hex(id(self)))
+ return (
+ self.__str__()
+ + "\n"
+ + self.__class__.__name__
+ + ", id: {0}".format(hex(id(self)))
+ )
def _repr_html_(self):
- return self.summary.as_html() + '
id: {0}'.format(hex(id(self)))
+ return self.summary.as_html() + "
id: {0}".format(hex(id(self)))
-def ensure_unique_column(col_name, df, addition='_'):
+def ensure_unique_column(col_name, df, addition="_"):
while col_name in df:
col_name = addition + col_name + addition
return col_name
@@ -386,16 +401,19 @@ class _ModelComparison(_SummaryStr):
"""
Base class for model comparisons
"""
+
_supported = tuple()
- _PRECISION_TYPES = {'tstats': 'T-stats',
- 'pvalues': 'P-values',
- 'std_errors': 'Std. Errors'}
+ _PRECISION_TYPES = {
+ "tstats": "T-stats",
+ "pvalues": "P-values",
+ "std_errors": "Std. Errors",
+ }
- def __init__(self, results, *, precision='tstats'):
+ def __init__(self, results, *, precision="tstats"):
if not isinstance(results, (dict, OrderedDict)):
_results = OrderedDict()
for i, res in enumerate(results):
- _results['Model ' + str(i)] = res
+ _results["Model " + str(i)] = res
results = _results
elif not isinstance(results, OrderedDict):
_results = OrderedDict()
@@ -406,15 +424,17 @@ def __init__(self, results, *, precision='tstats'):
for key in self._results:
if not isinstance(self._results[key], self._supported):
- raise TypeError('Results from unknown model')
- precision = precision.lower().replace('-', '_')
- if precision not in ('tstats', 'pvalues', 'std_errors'):
- raise ValueError('Unknown precision value. Must be one of \'tstats\', \'std_errors\' '
- 'or \'pvalues\'.')
+ raise TypeError("Results from unknown model")
+ precision = precision.lower().replace("-", "_")
+ if precision not in ("tstats", "pvalues", "std_errors"):
+ raise ValueError(
+ "Unknown precision value. Must be one of 'tstats', 'std_errors' "
+ "or 'pvalues'."
+ )
self._precision = precision
def _get_series_property(self, name):
- out = ([(k, getattr(v, name)) for k, v in self._results.items()])
+ out = [(k, getattr(v, name)) for k, v in self._results.items()]
cols = [v[0] for v in out]
values = concat([v[1] for v in out], 1)
values.columns = cols
@@ -431,38 +451,40 @@ def _get_property(self, name):
@property
def nobs(self):
"""Parameters for all models"""
- return self._get_property('nobs')
+ return self._get_property("nobs")
@property
def params(self):
"""Parameters for all models"""
- return self._get_series_property('params')
+ return self._get_series_property("params")
@property
def tstats(self):
"""Parameter t-stats for all models"""
- return self._get_series_property('tstats')
+ return self._get_series_property("tstats")
@property
def std_errors(self):
"""Parameter t-stats for all models"""
- return self._get_series_property('std_errors')
+ return self._get_series_property("std_errors")
@property
def pvalues(self):
"""Parameter p-vals for all models"""
- return self._get_series_property('pvalues')
+ return self._get_series_property("pvalues")
@property
def rsquared(self):
"""Coefficients of determination (R**2)"""
- return self._get_property('rsquared')
+ return self._get_property("rsquared")
@property
def f_statistic(self):
"""F-statistics and P-values"""
- out = self._get_property('f_statistic')
- out_df = DataFrame(np.empty((len(out), 2)), columns=['F stat', 'P-value'], index=out.index)
+ out = self._get_property("f_statistic")
+ out_df = DataFrame(
+ np.empty((len(out), 2)), columns=["F stat", "P-value"], index=out.index
+ )
for loc in out.index:
out_df.loc[loc] = out[loc].stat, out[loc].pval
return out_df
@@ -473,18 +495,22 @@ def missing_warning(missing):
if not np.any(missing):
return
import linearmodels
+
if linearmodels.WARN_ON_MISSING:
import warnings
+
warnings.warn(missing_value_warning_msg, MissingValueWarning)
def param_table(results, title, pad_bottom=False):
"""Formatted standard parameter table"""
- param_data = np.c_[np.asarray(results.params)[:, None],
- np.asarray(results.std_errors)[:, None],
- np.asarray(results.tstats)[:, None],
- np.asarray(results.pvalues)[:, None],
- results.conf_int()]
+ param_data = np.c_[
+ np.asarray(results.params)[:, None],
+ np.asarray(results.std_errors)[:, None],
+ np.asarray(results.tstats)[:, None],
+ np.asarray(results.pvalues)[:, None],
+ results.conf_int(),
+ ]
data = []
for row in param_data:
txt_row = []
@@ -494,19 +520,22 @@ def param_table(results, title, pad_bottom=False):
f = pval_format
txt_row.append(f(v))
data.append(txt_row)
- header = ['Parameter', 'Std. Err.', 'T-stat', 'P-value', 'Lower CI', 'Upper CI']
+ header = ["Parameter", "Std. Err.", "T-stat", "P-value", "Lower CI", "Upper CI"]
table_stubs = list(results.params.index)
if pad_bottom:
# Append blank row for spacing
- data.append([''] * 6)
- table_stubs += ['']
+ data.append([""] * 6)
+ table_stubs += [""]
- return SimpleTable(data, stubs=table_stubs, txt_fmt=fmt_params,
- headers=header, title=title)
+ return SimpleTable(
+ data, stubs=table_stubs, txt_fmt=fmt_params, headers=header, title=title
+ )
def format_wide(s, cols):
"""
+ Format a list of strings.
+
Parameters
----------
s : List[str]
@@ -516,25 +545,25 @@ def format_wide(s, cols):
Returns
-------
- formatted : List[List[str]]
- Joined list:
+ List[List[str]]
+ The joined list.
"""
lines = []
- line = ''
+ line = ""
for i, val in enumerate(s):
- if line == '':
+ if line == "":
line = val
if i + 1 != len(s):
- line += ', '
+ line += ", "
else:
temp = line + val
if i + 1 != len(s):
- temp += ', '
+ temp += ", "
if len(temp) > cols:
lines.append([line])
line = val
if i + 1 != len(s):
- line += ', '
+ line += ", "
else:
line = temp
lines.append([line])
@@ -581,14 +610,13 @@ def panel_to_frame(x, items, major_axis, minor_axis, swap=False):
df.sort_index(inplace=True)
final_levels = [minor_axis, major_axis]
df.index.set_levels(final_levels, [0, 1], inplace=True)
- df.index.names = ['major', 'minor']
+ df.index.names = ["major", "minor"]
return df
def quadratic_form_test(params, cov, restriction=None, value=None, formula=None):
if formula is not None and restriction is not None:
- raise ValueError('restriction and formula cannot be used'
- 'simultaneously.')
+ raise ValueError("restriction and formula cannot be used" "simultaneously.")
if formula is not None:
di = DesignInfo(list(params.index))
lc = di.linear_constraint(formula)
@@ -601,7 +629,7 @@ def quadratic_form_test(params, cov, restriction=None, value=None, formula=None)
rcov = restriction @ cov @ restriction.T
stat = float(diff.T @ np.linalg.inv(rcov) @ diff)
df = restriction.shape[0]
- null = 'Linear equality constraint is valid'
- name = 'Linear Equality Hypothesis Test'
+ null = "Linear equality constraint is valid"
+ name = "Linear Equality Hypothesis Test"
return WaldTestStatistic(stat, null, df, name=name)
diff --git a/pyproject.toml b/pyproject.toml
index 9bb791f41e..b630094f91 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,2 +1,2 @@
[build-system]
-requires = ["setuptools", "wheel"]
+requires = ["setuptools", "wheel", "Cython>=0.29.14"]
diff --git a/requirements-dev.txt b/requirements-dev.txt
index eb844d739c..535beb58f6 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,5 +1,8 @@
xarray>=0.9
-pytest>=5
+pytest>=5.0
+black==19.10b0
+flake8
+flake8-black
sphinx
sphinx-material
ipython
@@ -9,4 +12,3 @@ nbconvert
nbformat
matplotlib
seaborn
-Cython
diff --git a/requirements.txt b/requirements.txt
index d07bfc7a71..8024e0282e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,3 +5,4 @@ statsmodels>=0.9
patsy
property_cached>=1.6.3
mypy_extensions>=0.4
+Cython>=0.29.14
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
index 749c88eb24..33bba4f065 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,6 @@
[flake8]
max-line-length = 99
+ignore = E203,W503,BLK100
[versioneer]
VCS = git
@@ -44,4 +45,4 @@ multi_line_output=0
force_grid_wrap=0
combine_as_imports=True
force_sort_within_sections=True
-line_width=99
+line_width=88
diff --git a/setup.py b/setup.py
index 9929774e5e..f2809e70ad 100644
--- a/setup.py
+++ b/setup.py
@@ -22,70 +22,45 @@
"""
try:
- markdown = os.stat('README.md').st_mtime
- if os.path.exists('README.rst'):
- rst = os.stat('README.rst').st_mtime
+ markdown = os.stat("README.md").st_mtime
+ if os.path.exists("README.rst"):
+ rst = os.stat("README.rst").st_mtime
else:
rst = markdown - 1
if rst >= markdown:
- with open('README.rst', 'r') as rst:
+ with open("README.rst", "r") as rst:
description = rst.read()
else:
import pypandoc
- osx_line_ending = '\r'
- windows_line_ending = '\r\n'
- linux_line_ending = '\n'
+ osx_line_ending = "\r"
+ windows_line_ending = "\r\n"
+ linux_line_ending = "\n"
- description = pypandoc.convert_file('README.md', 'rst')
+ description = pypandoc.convert_file("README.md", "rst")
description = description.replace(windows_line_ending, linux_line_ending)
description = description.replace(osx_line_ending, linux_line_ending)
- with open('README.rst', 'w') as rst:
+ with open("README.rst", "w") as rst:
rst.write(description)
except (ImportError, OSError):
import warnings
warnings.warn("Unable to convert README.md to README.rst", UserWarning)
- description = open('README.md').read()
-
-# Copy over notebooks from examples to docs for build
-notebooks = glob.glob('examples/*.ipynb')
-for nb in notebooks:
- fname = os.path.split(nb)[-1]
- folder, nbname = fname.split('_')
- outdir = os.path.join('doc', 'source', folder, 'examples')
- if not os.path.exists(outdir):
- os.makedirs(outdir, exist_ok=True)
- outfile = os.path.join(outdir, nbname)
- with open(outfile, 'w') as nbout:
- with open(nb, 'r') as nbin:
- nbout.write(nbin.read())
-
-images = glob.glob('examples/*.png')
-for image in images:
- fname = os.path.split(image)[-1]
- folder, _ = fname.split('_')
- outdir = os.path.join('doc', 'source', folder, 'examples')
- if not os.path.exists(outdir):
- os.makedirs(outdir, exist_ok=True)
- outfile = os.path.join(outdir, fname)
- with open(outfile, 'wb') as imageout:
- with open(image, 'rb') as imagein:
- imageout.write(imagein.read())
+ description = open("README.md").read()
additional_files = []
-for filename in glob.iglob('./linearmodels/datasets/**', recursive=True):
- if '.csv.bz' in filename:
- additional_files.append(filename.replace('./linearmodels/', ''))
+for filename in glob.iglob("./linearmodels/datasets/**", recursive=True):
+ if ".csv.bz" in filename:
+ additional_files.append(filename.replace("./linearmodels/", ""))
-for filename in glob.iglob('./linearmodels/tests/**', recursive=True):
- if '.txt' in filename or '.csv' in filename or '.dta' in filename:
- additional_files.append(filename.replace('./linearmodels/', ''))
+for filename in glob.iglob("./linearmodels/tests/**", recursive=True):
+ if ".txt" in filename or ".csv" in filename or ".dta" in filename:
+ additional_files.append(filename.replace("./linearmodels/", ""))
-for filename in glob.iglob('./examples/**', recursive=True):
- if '.png' in filename:
+for filename in glob.iglob("./examples/**", recursive=True):
+ if ".png" in filename:
additional_files.append(filename)
@@ -94,59 +69,80 @@ def run_setup(binary=True):
if binary:
from Cython.Build import cythonize
import numpy
- macros = [('NPY_NO_DEPRECATED_API', '1')]
+
+ macros = [("NPY_NO_DEPRECATED_API", "1")]
# macros.append(('CYTHON_TRACE', '1'))
directives = {} # {'linetrace': True, 'binding':True}
- extension = Extension('linearmodels.panel._utility',
- ['linearmodels/panel/_utility.pyx'],
- define_macros=macros,
- include_dirs=[numpy.get_include()])
+ extension = Extension(
+ "linearmodels.panel._utility",
+ ["linearmodels/panel/_utility.pyx"],
+ define_macros=macros,
+ include_dirs=[numpy.get_include()],
+ )
extensions.append(extension)
extensions = cythonize(extensions, compiler_directives=directives, force=True)
- setup(cmdclass=versioneer.get_cmdclass(),
- name='linearmodels',
- license='NCSA',
- description='Instrumental Variable and Linear Panel models for Python',
- version=versioneer.get_version(),
- packages=find_packages(),
- package_dir={'linearmodels': './linearmodels'},
- author='Kevin Sheppard',
- author_email='kevin.k.sheppard@gmail.com',
- url='http://github.com/bashtage/linearmodels',
- long_description=description,
- install_requires=open('requirements.txt').read().split('\n'),
- include_package_data=True,
- package_data={'linearmodels': additional_files},
- keywords=['linear models', 'regression', 'instrumental variables', 'IV',
- 'panel', 'fixed effects', 'clustered', 'heteroskedasticity',
- 'endogeneity', 'instruments', 'statistics',
- 'statistical inference', 'econometrics'],
- zip_safe=False,
- classifiers=[
- 'Development Status :: 5 - Production/Stable',
- 'Intended Audience :: End Users/Desktop',
- 'Intended Audience :: Financial and Insurance Industry',
- 'Intended Audience :: Science/Research',
- 'Programming Language :: Python :: 3.6',
- 'Programming Language :: Python :: 3.7',
- 'Programming Language :: Python :: 3.8',
- 'License :: OSI Approved',
- 'Operating System :: MacOS :: MacOS X',
- 'Operating System :: Microsoft :: Windows',
- 'Operating System :: POSIX',
- 'Programming Language :: Python',
- 'Topic :: Scientific/Engineering',
- ],
- ext_modules=extensions,
- python_requires='>=3.6',
- )
+ setup(
+ cmdclass=versioneer.get_cmdclass(),
+ name="linearmodels",
+ license="NCSA",
+ description="Instrumental Variable and Linear Panel models for Python",
+ version=versioneer.get_version(),
+ packages=find_packages(),
+ package_dir={"linearmodels": "./linearmodels"},
+ author="Kevin Sheppard",
+ author_email="kevin.k.sheppard@gmail.com",
+ url="http://github.com/bashtage/linearmodels",
+ long_description=description,
+ install_requires=open("requirements.txt").read().split("\n"),
+ include_package_data=True,
+ package_data={"linearmodels": additional_files},
+ keywords=[
+ "linear models",
+ "regression",
+ "instrumental variables",
+ "IV",
+ "panel",
+ "fixed effects",
+ "clustered",
+ "heteroskedasticity",
+ "endogeneity",
+ "instruments",
+ "statistics",
+ "statistical inference",
+ "econometrics",
+ ],
+ zip_safe=False,
+ classifiers=[
+ "Development Status :: 5 - Production/Stable",
+ "Intended Audience :: End Users/Desktop",
+ "Intended Audience :: Financial and Insurance Industry",
+ "Intended Audience :: Science/Research",
+ "Programming Language :: Python :: 3.6",
+ "Programming Language :: Python :: 3.7",
+ "Programming Language :: Python :: 3.8",
+ "License :: OSI Approved",
+ "Operating System :: MacOS :: MacOS X",
+ "Operating System :: Microsoft :: Windows",
+ "Operating System :: POSIX",
+ "Programming Language :: Python",
+ "Topic :: Scientific/Engineering",
+ ],
+ ext_modules=extensions,
+ python_requires=">=3.6",
+ )
try:
run_setup(binary=True)
-except (CCompilerError, DistutilsExecError, DistutilsPlatformError, IOError, ValueError,
- ImportError):
+except (
+ CCompilerError,
+ DistutilsExecError,
+ DistutilsPlatformError,
+ IOError,
+ ValueError,
+ ImportError,
+):
run_setup(binary=False)
import warnings