Skip to content

Commit

Permalink
adapting covariance forecaster to new base
Browse files Browse the repository at this point in the history
  • Loading branch information
enzbus committed Jan 25, 2024
1 parent 3a630e0 commit d231b9f
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 24 deletions.
101 changes: 78 additions & 23 deletions cvxportfolio/forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,15 +169,15 @@ def values_in_time( # pylint: disable=arguments-differ
self._agnostic_update(**kwargs)
return (self._last_sum / self._last_counts).values

def _ewm_weights(self, df, t):
index_in_halflifes = (df.index - t) / self.ema_half_life
def _ewm_weights(self, index, t):
index_in_halflifes = (index - t) / self.ema_half_life
return np.exp(index_in_halflifes * np.log(2))

def _ewm_numerator(self, df, t):
"""Exponential moving window (optional) numerator."""
if not _is_timedelta(self.ema_half_life):
return df.sum()
weighter = self._ewm_weights(df, t)
weighter = self._ewm_weights(df.index, t)
filled = df.fillna(0.)
return filled.multiply(weighter, axis=0).sum()

Expand All @@ -186,12 +186,23 @@ def _ewm_denominator(self, df, t):
if not _is_timedelta(self.ema_half_life):
return df.count()

weighter = self._ewm_weights(df, t)
weighter = self._ewm_weights(df.index, t)
ones = (~df.isnull()) * 1.
return ones.multiply(weighter, axis=0).sum()

def _update_numerator(self, last_row):
"""Update with last observation. Ewm (if any) is applied below."""
return last_row.fillna(0.)

def _update_denominator(self, last_row):
"""Update with last observation. Ewm (if any) is applied below."""
return ~(last_row.isnull())

def _initial_compute(self, t, **kwargs): # pylint: disable=arguments-differ
"""Make forecast from scratch."""
"""Make forecast from scratch.
This is designed to also work for the covariance forecaster.
"""
df = self._dataframe_selector(t=t, **kwargs)

# Moving average window logic
Expand All @@ -201,13 +212,16 @@ def _initial_compute(self, t, **kwargs): # pylint: disable=arguments-differ
self._last_counts = self._ewm_denominator(df, t)
if self._last_counts.min() == 0:
raise ForecastError(
f'{self.__class__.__name__} is computing the '
+ 'mean of a column with no values.')
f'{self.__class__.__name__} is given a dataframe with '
+ 'at least a column that has no values.')
self._last_sum = self._ewm_numerator(df, t)
self._last_time = t

def _online_update(self, t, **kwargs): # pylint: disable=arguments-differ
"""Update forecast from period before."""
"""Update forecast from period before.
This is designed to also work for the covariance forecaster.
"""
df = self._dataframe_selector(t=t, **kwargs)
last_row = df.iloc[-1]

Expand All @@ -222,8 +236,9 @@ def _online_update(self, t, **kwargs): # pylint: disable=arguments-differ
discount_factor = 1.

# for ewm we also need to discount last element
self._last_counts += ~(last_row.isnull()) * discount_factor
self._last_sum += last_row.fillna(0.) * discount_factor
self._last_counts += self._update_denominator(
last_row) * discount_factor
self._last_sum += self._update_numerator(last_row) * discount_factor

# Moving average window logic
if _is_timedelta(self.ma_window):
Expand All @@ -234,8 +249,8 @@ def _online_update(self, t, **kwargs): # pylint: disable=arguments-differ
self._last_counts -= self._ewm_denominator(skips, t)
if self._last_counts.min() == 0:
raise ForecastError(
f'{self.__class__.__name__} is computing the '
+ 'mean of a column with no values.')
f'{self.__class__.__name__} is given a dataframe with '
+ 'at least a column that has no values.')
self._last_sum -= self._ewm_numerator(skips, t).fillna(0.)

self._last_time = t
Expand Down Expand Up @@ -293,6 +308,13 @@ def _dataframe_selector(self, past_returns, **kwargs):
return past_returns.iloc[:, :-1]**2


## HOW TO DO COVARIANCE
## Factor out non-kelly part in dedicated forecaster (like it's done for var)
## initial compute is good
## ewm_weights is good
## ewm_num and ewm_denum to override
## online update needs to be refactored in BaseMean

@dataclass(unsafe_hash=True)
class HistoricalStandardDeviation(HistoricalVariance):
"""Historical standard deviation."""
Expand Down Expand Up @@ -450,7 +472,7 @@ def project_on_psd_cone_and_factorize(covariance):
return eigvec @ np.diag(np.sqrt(eigval))

@dataclass(unsafe_hash=True)
class HistoricalFactorizedCovariance(BaseForecast):
class HistoricalFactorizedCovariance(BaseMeanForecast):
r"""Historical covariance matrix, sqrt factorized.
:param kelly: if ``True`` compute each
Expand All @@ -471,13 +493,46 @@ class HistoricalFactorizedCovariance(BaseForecast):
FACTORIZED = True

def __post_init__(self):
self._last_time = None
self._last_counts_matrix = None
self._last_sum_matrix = None
super().__post_init__()
self._joint_mean = None

# pylint: disable=arguments-differ
def _dataframe_selector(self, past_returns, **kwargs):
"""Return dataframe to compute the historical covariance of."""
return past_returns.iloc[:, :-1]


def _ewm_numerator(self, df, t):
"""Exponential moving window (optional) numerator."""
if not _is_timedelta(self.ema_half_life):
return df.sum()
weighter = self._ewm_weights(df.index, t)
filled = df.fillna(0.)
return filled.multiply(weighter, axis=0).sum()

def _ewm_denominator(self, df, t):
"""Exponential moving window (optional) denominator."""
ones = (~df.isnull()) * 1.
if not _is_timedelta(self.ema_half_life):
return ones.T @ ones

weighter = self._ewm_weights(df.index, t)
ones = (~df.isnull()) * 1.
tmp = ones.multiply(weighter, axis=0)
return tmp.T @ tmp

def _update_numerator(self, last_row):
"""Update with last observation. Ewm (if any) is applied below."""
return last_row.fillna(0.)

def _update_denominator(self, last_row):
"""Update with last observation. Ewm (if any) is applied below."""
return ~(last_row.isnull())



@staticmethod
def _get_count_matrix(past_returns):
def _get_count_matrix(past_returns): # -> _ewn_denominator
r"""We obtain the matrix of non-null joint counts:
.. math::
Expand All @@ -497,19 +552,19 @@ def _get_initial_joint_mean(past_returns):

# pylint: disable=arguments-differ
def _initial_compute(self, t, past_returns, **kwargs):
self._last_counts_matrix = self._get_count_matrix(past_returns).values
self._last_counts = self._get_count_matrix(past_returns).values
filled = past_returns.iloc[:, :-1].fillna(0.).values
self._last_sum_matrix = filled.T @ filled
self._last_sum = filled.T @ filled
if not self.kelly:
self._joint_mean = self._get_initial_joint_mean(past_returns)

self._last_time = t

def _online_update(self, t, past_returns, **kwargs):
nonnull = ~(past_returns.iloc[-1, :-1].isnull())
self._last_counts_matrix += np.outer(nonnull, nonnull)
self._last_counts += np.outer(nonnull, nonnull)
last_ret = past_returns.iloc[-1, :-1].fillna(0.)
self._last_sum_matrix += np.outer(last_ret, last_ret)
self._last_sum += np.outer(last_ret, last_ret)
self._last_time = t
if not self.kelly:
self._joint_mean += last_ret
Expand All @@ -533,10 +588,10 @@ def values_in_time( # pylint: disable=arguments-differ
"""

self._agnostic_update(t=t, **kwargs)
covariance = self._last_sum_matrix / self._last_counts_matrix
covariance = self._last_sum / self._last_counts

if not self.kelly:
tmp = self._joint_mean / self._last_counts_matrix
tmp = self._joint_mean / self._last_counts
covariance -= tmp.T * tmp
try:
return project_on_psd_cone_and_factorize(covariance)
Expand Down
2 changes: 1 addition & 1 deletion cvxportfolio/tests/test_forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ def test_sum_matrix(self):
t=pd.Timestamp('2022-01-01'), past_returns=returns)

# pylint: disable=protected-access
sum_matrix = forecaster._last_sum_matrix
sum_matrix = forecaster._last_sum

for indexes in [(1, 2), (4, 5), (1, 5), (7, 18),
(7, 24), (1, 15), (13, 22)]:
Expand Down

0 comments on commit d231b9f

Please sign in to comment.