Skip to content

Commit

Permalink
regression covariance
Browse files Browse the repository at this point in the history
  • Loading branch information
enzbus committed Dec 22, 2023
1 parent 5ea0753 commit fff21e4
Show file tree
Hide file tree
Showing 2 changed files with 289 additions and 1 deletion.
16 changes: 15 additions & 1 deletion docs/examples/paper_examples.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,20 @@
Original examples from the paper
================================

These are the original examples, which were also used to generate plots
and results in :paper:`chapter 7 of the paper <chapter.7>`. They were
originally written as IPython notebook, and have been translated into Python
code using the stable API of Cvxportfolio (*i.e.*, version ``1.0.0`` and
above).

The original notebooks are `available on this branch of the repository
<https://github.com/cvxgrp/cvxportfolio/blob/0.0.X/examples/>`_.

.. note::

Currently some of them are *work in progress*, refer to the linked notebook
if in doubt.

.. toctree::
:maxdepth: 2

Expand All @@ -13,4 +27,4 @@ Original examples from the paper
paper_examples/single_period_opt
paper_examples/single_period_opt_lin_tcost
paper_examples/solution_time


274 changes: 274 additions & 0 deletions examples/regression_covariance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,274 @@
# Copyright 2023 Enzo Busseti
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Build covariance matrix by regression.
This is a prototype for a planned extension of the forecast module in the main
library. However we're not sure yet which precise form it will take.
.. note::
The internal interface methods used here **may not be public**
and may not covered by the semantic versioning agreement (they might change
without notice). In general, methods that are shown in the examples are
considered public, but for this one it may not be not so.
"""

import numpy as np
import pandas as pd

import cvxportfolio as cvx

from cvxportfolio.estimator import Estimator
# from cvxportfolio.forecast import BaseForecast

def _striptime(df):
res = pd.DataFrame(df)
res.index = pd.to_datetime(df.index.date)
return res

def _covariance(returns):
_ = returns.fillna(0.)
num = _.T @ _
_ = (~returns.isnull()) * 1.
den = _.T @ _
return num / den

def _covariance_weigh(returns, weighter):
tmp = returns.multiply(weighter, axis=0)
left = tmp.fillna(0.)
right = returns.multiply(~(weighter.isnull())*1., axis=0).fillna(0.)
num = left.T @ right
_ = (~tmp.isnull()) * 1.
den = _.T @ _
return num / den

def _decorrelate(XtX, obs, gamma):
return np.linalg.solve(
XtX + np.diag(([gamma] * (len(obs)-1)) + [0.]),
obs)

class RegressionBase(Estimator):
"""Base for linear regression forecasters.
"""

def __init__(self, regressors, gamma=1e-1):
"""Initialize with regressors.
For simplicity here we drop time (and keep date) from the index.
:param regressors: Regressors used.
:type regressors: pandas.DataFrame
:param gamma: Tikhonov regularizer, default 0.
:type gamma: float
"""
self.regressors = _striptime(regressors)
self.regressors['intercept'] = 1.
self.gamma = gamma

def _obtain_weighted(self, used_regressors, **kwargs):
"""Obtain objects weighted by the regressors.
:param used_regressors: Used regressors (only past).
:type used_regressors: pandas.DataFrame
:param kwargs: All arguments to :meth:`values_in_time`.
:type kwargs: dict
"""

raise NotImplementedError

def values_in_time(self, t, past_returns, **kwargs):
"""Produce regressed object for the period.
:param t: Current time.
:type t: pandas.Timestamp
:param past_returns: Past returns (includes cash).
:type past_returns: pandas.DataFrame
:param kwargs: Other arguments.
:type kwargs: dict
:return: Regressed covariance.
:rtype: pandas.DataFrame
"""
# raise Exception
used_regressors = pd.DataFrame(
self.regressors.loc[
self.regressors.index <= pd.Timestamp(t.date())], copy=True)

# to extend to index
used_regressors['intercept'] = past_returns.iloc[:,-1]

used_regressors = used_regressors.ffill()
used_regressors -= used_regressors.mean()
used_regressors /= np.sqrt((used_regressors**2).mean())

used_regressors['intercept'] = 1.

# print('used_regressors')
# print(used_regressors)

_ = _covariance(used_regressors.iloc[:-1])
# print('XtX', _)
lhs = _decorrelate(_, used_regressors.iloc[-1], gamma=self.gamma)
# print('lhs', lhs)

_ = self._obtain_weighted(used_regressors,
t=t, past_returns=past_returns, **kwargs)

return sum(el * _[i] for i, el in enumerate(lhs))


class RegressionCovariance(RegressionBase):
"""Covariance matrix that is obtained by linear regression.
"""

def _obtain_weighted(self, used_regressors, past_returns, **kwargs):
"""Obtain objects weigthed by the regressors.
:param used_regressors: Used regressors (only past).
:type used_regressors: pandas.DataFrame
:param past_returns: Past market returns.
:type past_returns: pandas.DataFrame
:param kwargs: All other arguments to :meth:`values_in_time`.
:type kwargs: dict
"""

# creates new object, doesn't damage original one
past_returns = _striptime(past_returns)

return [_covariance_weigh(
past_returns.iloc[:, :-1], used_regressors.iloc[:-1][col])
for col in used_regressors]

class RegressionReturns(RegressionBase):
"""Returns forecast that is obtained by linear regression.
"""

def _obtain_weighted(self, used_regressors, past_returns, **kwargs):
"""Obtain objects weighted by the regressors.
:param used_regressors: Used regressors (only past).
:type used_regressors: pandas.DataFrame
:param past_returns: Past market returns.
:type past_returns: pandas.DataFrame
:param kwargs: All other arguments to :meth:`values_in_time`.
:type kwargs: dict
"""

# creates new object, doesn't damage original one
past_returns = _striptime(past_returns)

return [ past_returns.iloc[:, :-1].multiply(
used_regressors.iloc[:-1][col], axis=0).mean().values
for col in used_regressors]

def example_regressors():
"""Vix regressors and others.
The open value of VIX is avalaible at market open US, so this is a valid
(no look-ahead) set of regressors.
:return: Various regressors, with different timescales and availability.
:rtype: pandas.DataFrame
"""
vix = cvx.YahooFinance('^VIX').data.open
rate = cvx.DownloadedMarketData(['GE']).returns.USDOLLAR
#wage_change = cvx.Fred('CES0500000003').data.pct_change()
#gdp_change = cvx.Fred('GDP').data.pct_change()
base_regressors = {
'vix':vix,
'rate':rate,
#'wage_change': wage_change,
#'gdp_change': gdp_change
}
regressors = pd.DataFrame(base_regressors)

# for col in base_regressors:
# if np.all(regressors[col].dropna() > 0):
# regressors['log_' + col] = np.log(regressors[col])
# regressors['square_'+ col] = regressors[col]**2
# regressors['cube_' + col] = regressors[col]**3

return _striptime(regressors).ffill()

if __name__ == '__main__':

from .universes import DOW30

REGRESSORS = example_regressors()
print(REGRESSORS)

# RETURNS_REGRESSORS = REGRESSORS[
# [col for col in REGRESSORS.columns
# if not 'vix' in col]
# ]

# market_data = cvx.DownloadedMarketData(['AAPL', 'GOOG', 'TSLA'])

cov = RegressionCovariance(REGRESSORS, gamma = 1e-6)
# ret = RegressionReturns(RETURNS_REGRESSORS, gamma = 1000)

# tmp = pd.DataFrame(market_data.returns.iloc[:-10],copy=True)
# tmp.iloc[-1] = np.nan
#print(cov.values_in_time(
# t = market_data.returns.index[-10],
# past_returns=tmp,))

constraints = [
#cvx.LongOnly(),
cvx.LeverageLimit(7)
]

# result_plain, result_covregr, result_retregr, result_retcovregr = \
result_plain, result_covregr = \
cvx.MarketSimulator(DOW30).backtest_many(
[
cvx.SinglePeriodOptimization(cvx.ReturnsForecast()
- 3 * cvx.FullCovariance(),
constraints,
),
cvx.SinglePeriodOptimization(cvx.ReturnsForecast()
- 3 * cvx.FullCovariance(cov),
constraints,
),
# cvx.SinglePeriodOptimization(cvx.ReturnsForecast(ret)
# - 3 * cvx.FullCovariance(),
# constraints,
# ),
# cvx.SinglePeriodOptimization(cvx.ReturnsForecast(ret)
# - 3 * cvx.FullCovariance(cov),
# constraints,
# ),
],
start_time='2010-01-01') # can't do earlier than ~2010
# because regressor not available,
# need to add logic to include them after some min_history

result_plain.plot()
result_covregr.plot()
# result_retregr.plot()
# result_retcovregr.plot()

print('Result with no regression:')
print(result_plain)

print('Result with covariance regression:')
print(result_covregr)

# print('Result with returns regression:')
# print(result_retregr)

# print('Result with returns and covariance regression:')
# print(result_retcovregr)

0 comments on commit fff21e4

Please sign in to comment.