Skip to content

Commit

Permalink
Merge pull request #108 from cvxgrp/gh_issue_107
Browse files Browse the repository at this point in the history
Gh issue #107 and also fixed #106
  • Loading branch information
enzbus authored Sep 5, 2023
2 parents 211f0bc + 85c3c6b commit 3239fdf
Show file tree
Hide file tree
Showing 18 changed files with 457 additions and 287 deletions.
6 changes: 5 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ env:
$(BINDIR)/python -m pip install -r requirements.txt

test:
$(BINDIR)/python -m unittest $(PROJECT)/tests/*.py
$(BINDIR)/coverage run -m unittest $(PROJECT)/tests/*.py

pytest:
$(BINDIR)/pytest $(PROJECT)/tests/*.py
Expand All @@ -36,6 +36,10 @@ cleanenv:
docs:
$(BINDIR)/sphinx-build -E docs $(BUILDDIR); open build/index.html

coverage: test
$(BINDIR)/coverage html
open htmlcov/index.html

pep8:
# use autopep8 to make innocuous fixes
$(BINDIR)/autopep8 -i $(PROJECT)/*.py $(PROJECT)/tests/*.py
Expand Down
4 changes: 3 additions & 1 deletion cvxportfolio/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ class Benchmark(BaseBenchmark, DataEstimator):
"""

def __init__(self, benchmark_weights):
DataEstimator.__init__(self, benchmark_weights)
DataEstimator.__init__(self,
benchmark_weights,
data_includes_cash=True)


class CashBenchmark(BaseBenchmark):
Expand Down
6 changes: 4 additions & 2 deletions cvxportfolio/constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,8 @@ class FactorMaxLimit(BaseWeightConstraint, InequalityConstraint):
def __init__(self, factor_exposure, limit):
self.factor_exposure = DataEstimator(
factor_exposure, compile_parameter=True)
self.limit = DataEstimator(limit, compile_parameter=True)
self.limit = DataEstimator(limit, compile_parameter=True,
ignore_shape_check=True)

def _compile_constr_to_cvxpy(self, w_plus, z, w_plus_minus_w_bm):
"Compile left hand side of the constraint expression."
Expand Down Expand Up @@ -478,7 +479,8 @@ class FactorMinLimit(BaseWeightConstraint, InequalityConstraint):
def __init__(self, factor_exposure, limit):
self.factor_exposure = DataEstimator(
factor_exposure, compile_parameter=True)
self.limit = DataEstimator(limit, compile_parameter=True)
self.limit = DataEstimator(limit, compile_parameter=True,
ignore_shape_check=True)

def _compile_constr_to_cvxpy(self, w_plus, z, w_plus_minus_w_bm):
"Compile left hand side of the constraint expression."
Expand Down
323 changes: 198 additions & 125 deletions cvxportfolio/costs.py

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions cvxportfolio/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,7 @@ def __init__(

self.base_location = base_location
self.use_last_available_time = use_last_available_time
self.universe_maybe_noncash = None # fix, but we should retire this class

def _recursive_pre_evaluation(self, *args, **kwargs):
self.data = self.update_and_load(self.symbol)
132 changes: 100 additions & 32 deletions cvxportfolio/estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,38 +146,45 @@ class DataEstimator(PolicyEstimator):
by its `_recursive_values_in_time` method, which is the way `cvxportfolio`
objects use this class to get data.
Args:
data (object, pandas.Series, pandas.DataFrame): Data expressed
preferably as pandas Series or DataFrame where the first
index is a pandas.DateTimeIndex. Otherwise you can
pass a callable object which implements the _recursive_values_in_time method
(with the standard signature) and returns the corresponding value in time,
or a constant float, numpy.array, or even pandas Series or DataFrame not
indexed by time (e.g., a covariance matrix where both index and columns
are the stock symbols).
use_last_available_time (bool): if the pandas index exists
and is a pandas.DateTimeIndex you can instruct self._recursive_values_in_time
to retrieve the last available value at time t by setting
this to True. Default is False.
:param data: Data expressed preferably as pandas Series or DataFrame
where the first index is a pandas.DateTimeIndex. Otherwise you can
pass a callable object which implements the _recursive_values_in_time method
(with the standard signature) and returns the corresponding value in time,
or a constant float, numpy.array, or even pandas Series or DataFrame not
indexed by time (e.g., a covariance matrix where both index and columns
are the stock symbols).
:type data: object, pandas.Series, pandas.DataFrame
:param use_last_available_time: if the pandas index exists
and is a pandas.DateTimeIndex you can instruct self._recursive_values_in_time
to retrieve the last available value at time t by setting
this to True. Default is False.
:type use_last_available_time: bool
"""

def __init__(self, data, use_last_available_time=False, allow_nans=False,
compile_parameter=False, non_negative=False, positive_semi_definite=False):
compile_parameter=False, non_negative=False, positive_semi_definite=False,
data_includes_cash=False, # affects _universe_subselect
ignore_shape_check=False # affects _universe_subselect
):
self.data = data
self.use_last_available_time = use_last_available_time
self.allow_nans = allow_nans
self.compile_parameter = compile_parameter
self.non_negative = non_negative
self.positive_semi_definite = positive_semi_definite
self.universe_maybe_noncash = None
self.data_includes_cash = data_includes_cash
self.ignore_shape_check = ignore_shape_check

def _recursive_pre_evaluation(self, universe, backtest_times):
# super()._recursive_pre_evaluation(universe, backtest_times)
if self.compile_parameter:
value = self.internal__recursive_values_in_time(
t=backtest_times[0])
self.parameter = cp.Parameter(value.shape if hasattr(value, "shape") else (),
PSD=self.positive_semi_definite, nonneg=self.non_negative)
PSD=self.positive_semi_definite, nonneg=self.non_negative)

self.universe_maybe_noncash = universe if self.data_includes_cash else universe[:-1]

def value_checker(self, result):
"""Ensure that only scalars or arrays without np.nan are returned.
Expand Down Expand Up @@ -215,50 +222,111 @@ def value_checker(self, result):
raise DataError(
f"{self.__class__.__name__}._recursive_values_in_time result is not a scalar or array."
)

def _universe_subselect(self, data):
"""This function subselects from ``data`` the relevant universe.
See github issue #106.
If data is a pandas Series we subselect its index. If we fail
we throw an error. If data is a pandas DataFrame (covariance, exposure matrix)
we try to subselect its index and columns. If we fail on either
we ignore the failure, but if we fail on both we throw an error.
If data is a numpy 1-d array we check that its length is the same as the
universe's.
If it is a 2-d array we check that at least one dimension is the
same as the universe's.
If the universe is None we skip all checks. (We may revisit this choice.) This only happens
if the DataEstimator instance is not part of a PolicyEstimator tree
(a usecase which we will probably drop).
"""

if (self.universe_maybe_noncash is None) or self.ignore_shape_check:
return data

if isinstance(data, pd.Series):
try:
return data.loc[self.universe_maybe_noncash]
except KeyError:
raise MissingValuesError(
f"The pandas Series found by {self.__class__.__name__} has index {self.data.index}"
f" while the current universe {'minus cash' if not self.data_includes_cash else ''}"
f" is {self.universe_maybe_noncash}. It was not possibly to reconcile the two.")

if isinstance(data, pd.DataFrame):
try:
return data.loc[self.universe_maybe_noncash, self.universe_maybe_noncash]
except KeyError:
try:
return data.loc[:, self.universe_maybe_noncash]
except KeyError:
try:
return data.loc[self.universe_maybe_noncash, :]
except KeyError:
pass
raise MissingValuesError(
f"The pandas DataFrame found by {self.__class__.__name__} has index {self.data.index}"
f" and columns {self.data.columns}"
f" while the current universe {'minus cash' if not self.data_includes_cash else ''}"
f" is {self.universe_maybe_noncash}. It was not possibly to reconcile the two.")

if isinstance(data, np.ndarray):
dimensions = data.shape
if not len(self.universe_maybe_noncash) in dimensions:
raise MissingValuesError(
f"The numpy array found by {self.__class__.__name__} has dimensions {self.data.shape}"
f" while the current universe {'minus cash' if not self.data_includes_cash else ''}"
f" has size {len(self.universe_maybe_noncash)}.")
return data

# scalar
return data



def internal__recursive_values_in_time(self, t, *args, **kwargs):
"""Internal method called by `self._recursive_values_in_time`."""

# if self.data has values_in_time we use it
if hasattr(self.data, "values_in_time"):
_ = self.data.values_in_time(t=t, *args, **kwargs)
if hasattr(_, 'values'):
return self.value_checker(_.values)
tmp = self.data.values_in_time(t=t, *args, **kwargs)
tmp = self._universe_subselect(tmp)
if hasattr(tmp, 'values'):
return self.value_checker(tmp.values)
else:
return self.value_checker(_)
return self.value_checker(tmp)

# if self.data is pandas and has datetime (first) index
if (hasattr(self.data, "loc") and hasattr(self.data, "index")
and (isinstance(self.data.index, pd.DatetimeIndex)
or (
isinstance(self.data.index, pd.MultiIndex)
and isinstance(self.data.index.levels[0], pd.DatetimeIndex)
)
)
):
or (isinstance(self.data.index, pd.MultiIndex) and
isinstance(self.data.index.levels[0], pd.DatetimeIndex)))):
try:
if self.use_last_available_time:
if isinstance(self.data.index, pd.MultiIndex):
newt = self.data.index.levels[0][
self.data.index.levels[0] <= t
][-1]
self.data.index.levels[0] <= t][-1]
else:
newt = self.data.index[self.data.index <= t][-1]
tmp = self.data.loc[newt]
else:
tmp = self.data.loc[t]
if hasattr(tmp, "values"):
return self.value_checker(tmp.values)
return self.value_checker(self._universe_subselect(tmp.values))
else:
return self.value_checker(tmp)
return self.value_checker(self._universe_subselect(tmp))

except (KeyError, IndexError):
raise MissingValuesError(
f"{self.__class__.__name__}._recursive_values_in_time could not find data for requested time."
)

# if data is pandas but no datetime index (constant in time)
if hasattr(self.data, "values"):
return self.value_checker(self.data.values)
return self.value_checker(self._universe_subselect(self.data.values))

return self.value_checker(self.data)
# if data is scalar or numpy
return self.value_checker(self._universe_subselect(self.data))

def _recursive_values_in_time(self, t, *args, **kwargs):
"""Obtain value of `self.data` at time t or right before.
Expand Down
29 changes: 20 additions & 9 deletions cvxportfolio/hyperparameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,12 @@ def _collect_hyperparameters(self):
if hasattr(el, '_collect_hyperparameters'):
result += el._collect_hyperparameters()
return result

def __repr__(self):
result = ''
for le, ri in zip(self.left, self.right):
result += str(le) + ' * ' + str(ri)
return result


class RangeHyperParameter(HyperParameter):
Expand All @@ -100,29 +106,34 @@ class RangeHyperParameter(HyperParameter):
its subclasses for ones that you can use.
"""

def __init__(self, values_range, initial_value):
if not (initial_value in values_range):
def __init__(self, values_range, current_value):
if not (current_value in values_range):
raise SyntaxError('Initial value must be in the provided range')
self.values_range = values_range
self.current_value = initial_value
self.current_value = current_value

def __repr__(self):
return self.__class__.__name__ \
+ f'(values_range={self.values_range}'\
+ f', current_value={self.current_value})'


class GammaRisk(RangeHyperParameter):
"""Multiplier of a risk term."""

def __init__(self, values_range=GAMMA_RISK_RANGE, initial_value=1.):
super().__init__(values_range, initial_value)
def __init__(self, values_range=GAMMA_RISK_RANGE, current_value=1.):
super().__init__(values_range, current_value)


class GammaTrade(RangeHyperParameter):
"""Multiplier of a transaction cost term."""

def __init__(self, values_range=GAMMA_COST_RANGE, initial_value=1.):
super().__init__(values_range, initial_value)
def __init__(self, values_range=GAMMA_COST_RANGE, current_value=1.):
super().__init__(values_range, current_value)


class GammaHold(RangeHyperParameter):
"""Multiplier of a holding cost term."""

def __init__(self, values_range=GAMMA_COST_RANGE, initial_value=1.):
super().__init__(values_range, initial_value)
def __init__(self, values_range=GAMMA_COST_RANGE, current_value=1.):
super().__init__(values_range, current_value)
18 changes: 9 additions & 9 deletions cvxportfolio/policies.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,21 +345,21 @@ def __init__(self, objective, constraints=[], include_cash_return=True, planning
if not (hasattr(constraints, '__iter__') and len(constraints) and (hasattr(constraints[0], '__iter__') and len(objective) == len(constraints))):
raise SyntaxError(
'If you pass objective as a list, constraints should be a list of lists of the same length.')
self.planning_horizon = len(objective)
self._planning_horizon = len(objective)
self.objective = objective
self.constraints = constraints
else:
if not np.isscalar(planning_horizon):
raise SyntaxError(
'If `objective` and `constraints` are the same for all steps you must specify `planning_horizon`.')
self.planning_horizon = planning_horizon
self._planning_horizon = planning_horizon
self.objective = [copy.deepcopy(objective) for i in range(
planning_horizon)] if planning_horizon > 1 else [objective]
self.constraints = [copy.deepcopy(constraints) for i in range(
planning_horizon)] if planning_horizon > 1 else [constraints]

self.include_cash_return = include_cash_return
if self.include_cash_return:
self._include_cash_return = include_cash_return
if self._include_cash_return:
self.objective = [el + CashReturn() for el in self.objective]
self.terminal_constraint = terminal_constraint
self.benchmark = benchmark() if isinstance(benchmark, type) else benchmark
Expand Down Expand Up @@ -394,7 +394,7 @@ def compile_and_check_constraint(constr, i):
self.cvxpy_constraints = sum(self.cvxpy_constraints, [])
self.cvxpy_constraints += [cp.sum(z) == 0 for z in self.z_at_lags]
w = self.w_current
for i in range(self.planning_horizon):
for i in range(self._planning_horizon):
self.cvxpy_constraints.append(
self.w_plus_at_lags[i] == self.z_at_lags[i] + w)
self.cvxpy_constraints.append(
Expand Down Expand Up @@ -433,11 +433,11 @@ def _recursive_pre_evaluation(self, universe, backtest_times):
# self.portfolio_value = cp.Parameter(nonneg=True)
self.w_current = cp.Parameter(len(universe))
self.z_at_lags = [cp.Variable(len(universe))
for i in range(self.planning_horizon)]
for i in range(self._planning_horizon)]
self.w_plus_at_lags = [cp.Variable(
len(universe)) for i in range(self.planning_horizon)]
len(universe)) for i in range(self._planning_horizon)]
self.w_plus_minus_w_bm_at_lags = [cp.Variable(
len(universe)) for i in range(self.planning_horizon)]
len(universe)) for i in range(self._planning_horizon)]

# simulator will overwrite this with cached loaded from disk
self.cache = {}
Expand Down Expand Up @@ -499,7 +499,7 @@ def _collect_hyperparameters(self):
result += el._collect_hyperparameters()
for el in self.constraints:
for constr in el:
result += el._collect_hyperparameters()
result += constr._collect_hyperparameters()
return result


Expand Down
2 changes: 1 addition & 1 deletion cvxportfolio/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def __repr__(self):
"Per-period absolute growth rate": self._print_growth_rate(self.growth_rates.mean()),
"Per-period excess growth rate": self._print_growth_rate(self.excess_growth_rates.mean()),
# stats
"Sharpe ratio (w/ excess returns)": self.sharpe_ratio,
"Sharpe ratio": self.sharpe_ratio,
"Worst drawdown (%)": self.drawdown.min() * 100,
"Average drawdown (%)": self.drawdown.mean() * 100,
"Per-period Turnover (%)": self.turnover.mean() * 100,
Expand Down
Loading

0 comments on commit 3239fdf

Please sign in to comment.