From e3837a8768113b63f8b15f4f5e174153581bc939 Mon Sep 17 00:00:00 2001
From: Enzo Busseti <enzo.busseti@me.com>
Date: Mon, 4 Sep 2023 22:00:18 +0400
Subject: [PATCH 1/8] working on GH issue #107 (fixed CashReturn), whitespaces

---
 Makefile                                   |  6 +++-
 cvxportfolio/costs.py                      | 20 +++++++-----
 cvxportfolio/hyperparameters.py            | 29 ++++++++++++-----
 cvxportfolio/policies.py                   | 18 +++++-----
 cvxportfolio/result.py                     |  2 +-
 cvxportfolio/returns.py                    | 38 ++++++++++------------
 cvxportfolio/simulator.py                  | 20 ++++++++++++
 cvxportfolio/tests/test_hyperparameters.py |  6 ++--
 cvxportfolio/tests/test_returns.py         |  4 +--
 cvxportfolio/tests/test_simulator.py       | 30 ++++++++++++-----
 10 files changed, 112 insertions(+), 61 deletions(-)

diff --git a/Makefile b/Makefile
index 2a6461195..f506e9654 100644
--- a/Makefile
+++ b/Makefile
@@ -16,7 +16,7 @@ env:
 	$(BINDIR)/python -m pip install -r requirements.txt
 
 test:
-	$(BINDIR)/python -m unittest $(PROJECT)/tests/*.py
+	$(BINDIR)/coverage run -m unittest $(PROJECT)/tests/*.py
 		
 pytest:
 	$(BINDIR)/pytest $(PROJECT)/tests/*.py
@@ -36,6 +36,10 @@ cleanenv:
 docs:
 	$(BINDIR)/sphinx-build -E docs $(BUILDDIR); open build/index.html
 
+coverage: test
+	$(BINDIR)/coverage html
+	open htmlcov/index.html
+
 pep8:
 	# use autopep8 to make innocuous fixes 
 	$(BINDIR)/autopep8 -i $(PROJECT)/*.py $(PROJECT)/tests/*.py
diff --git a/cvxportfolio/costs.py b/cvxportfolio/costs.py
index 69e3d5639..b20d60cd3 100644
--- a/cvxportfolio/costs.py
+++ b/cvxportfolio/costs.py
@@ -153,8 +153,9 @@ def _compile_to_cvxpy(self, w_plus, z, portfolio_value):
         """Iterate over constituent costs."""
         self.expression = 0
         for multiplier, cost in zip(self.multipliers, self.costs):
-            add = multiplier * \
-                cost._compile_to_cvxpy(w_plus, z, portfolio_value)
+            add = (multiplier.current_value 
+                if hasattr(multiplier, 'current_value') else multiplier) * \
+                    cost._compile_to_cvxpy(w_plus, z, portfolio_value)
             if not add.is_dcp():
                 raise ConvexSpecificationError(cost * multiplier)
             if not add.is_concave():
@@ -171,13 +172,16 @@ def __repr__(self):
         """Pretty-print."""
         result = ''
         for i, (mult, cost) in enumerate(zip(self.multipliers, self.costs)):
-            if mult == 0:
-                continue
-            if mult < 0:
-                result += ' - ' if i > 0 else '-'
+            if not isinstance(mult, HyperParameter):
+                if mult == 0:
+                    continue
+                if mult < 0:
+                    result += ' - ' if i > 0 else '-'
+                else:
+                    result += ' + ' if i > 0 else ''
+                    result += (str(abs(mult)) + ' * ' if abs(mult) != 1 else '')
             else:
-                result += ' + ' if i > 0 else ''
-            result += (str(abs(mult)) + ' * ' if abs(mult) != 1 else '')
+                result += str(mult) + ' * '
             result += cost.__repr__()
         return result
 
diff --git a/cvxportfolio/hyperparameters.py b/cvxportfolio/hyperparameters.py
index 24691a3c2..81040f6be 100644
--- a/cvxportfolio/hyperparameters.py
+++ b/cvxportfolio/hyperparameters.py
@@ -91,6 +91,12 @@ def _collect_hyperparameters(self):
             if hasattr(el, '_collect_hyperparameters'):
                 result += el._collect_hyperparameters()
         return result
+        
+    def __repr__(self):
+        result = ''
+        for le, ri in zip(self.left, self.right):
+            result += str(le) + ' * ' + str(ri)
+        return result
 
 
 class RangeHyperParameter(HyperParameter):
@@ -100,29 +106,34 @@ class RangeHyperParameter(HyperParameter):
     its subclasses for ones that you can use.
     """
 
-    def __init__(self, values_range, initial_value):
-        if not (initial_value in values_range):
+    def __init__(self, values_range, current_value):
+        if not (current_value in values_range):
             raise SyntaxError('Initial value must be in the provided range')
         self.values_range = values_range
-        self.current_value = initial_value
+        self.current_value = current_value
+        
+    def __repr__(self):
+        return self.__class__.__name__ \
+            + f'(values_range={self.values_range}'\
+            + f', current_value={self.current_value})'
 
 
 class GammaRisk(RangeHyperParameter):
     """Multiplier of a risk term."""
 
-    def __init__(self, values_range=GAMMA_RISK_RANGE, initial_value=1.):
-        super().__init__(values_range, initial_value)
+    def __init__(self, values_range=GAMMA_RISK_RANGE, current_value=1.):
+        super().__init__(values_range, current_value)
 
 
 class GammaTrade(RangeHyperParameter):
     """Multiplier of a transaction cost term."""
 
-    def __init__(self, values_range=GAMMA_COST_RANGE, initial_value=1.):
-        super().__init__(values_range, initial_value)
+    def __init__(self, values_range=GAMMA_COST_RANGE, current_value=1.):
+        super().__init__(values_range, current_value)
 
 
 class GammaHold(RangeHyperParameter):
     """Multiplier of a holding cost term."""
 
-    def __init__(self, values_range=GAMMA_COST_RANGE, initial_value=1.):
-        super().__init__(values_range, initial_value)
+    def __init__(self, values_range=GAMMA_COST_RANGE, current_value=1.):
+        super().__init__(values_range, current_value)
diff --git a/cvxportfolio/policies.py b/cvxportfolio/policies.py
index 9bfcd4fb1..a074da00d 100644
--- a/cvxportfolio/policies.py
+++ b/cvxportfolio/policies.py
@@ -345,21 +345,21 @@ def __init__(self, objective, constraints=[], include_cash_return=True, planning
             if not (hasattr(constraints, '__iter__') and len(constraints) and (hasattr(constraints[0], '__iter__') and len(objective) == len(constraints))):
                 raise SyntaxError(
                     'If you pass objective as a list, constraints should be a list of lists of the same length.')
-            self.planning_horizon = len(objective)
+            self._planning_horizon = len(objective)
             self.objective = objective
             self.constraints = constraints
         else:
             if not np.isscalar(planning_horizon):
                 raise SyntaxError(
                     'If `objective` and `constraints` are the same for all steps you must specify `planning_horizon`.')
-            self.planning_horizon = planning_horizon
+            self._planning_horizon = planning_horizon
             self.objective = [copy.deepcopy(objective) for i in range(
                 planning_horizon)] if planning_horizon > 1 else [objective]
             self.constraints = [copy.deepcopy(constraints) for i in range(
                 planning_horizon)] if planning_horizon > 1 else [constraints]
 
-        self.include_cash_return = include_cash_return
-        if self.include_cash_return:
+        self._include_cash_return = include_cash_return
+        if self._include_cash_return:
             self.objective = [el + CashReturn() for el in self.objective]
         self.terminal_constraint = terminal_constraint
         self.benchmark = benchmark() if isinstance(benchmark, type) else benchmark
@@ -394,7 +394,7 @@ def compile_and_check_constraint(constr, i):
         self.cvxpy_constraints = sum(self.cvxpy_constraints, [])
         self.cvxpy_constraints += [cp.sum(z) == 0 for z in self.z_at_lags]
         w = self.w_current
-        for i in range(self.planning_horizon):
+        for i in range(self._planning_horizon):
             self.cvxpy_constraints.append(
                 self.w_plus_at_lags[i] == self.z_at_lags[i] + w)
             self.cvxpy_constraints.append(
@@ -433,11 +433,11 @@ def _recursive_pre_evaluation(self, universe, backtest_times):
         # self.portfolio_value = cp.Parameter(nonneg=True)
         self.w_current = cp.Parameter(len(universe))
         self.z_at_lags = [cp.Variable(len(universe))
-                          for i in range(self.planning_horizon)]
+                          for i in range(self._planning_horizon)]
         self.w_plus_at_lags = [cp.Variable(
-            len(universe)) for i in range(self.planning_horizon)]
+            len(universe)) for i in range(self._planning_horizon)]
         self.w_plus_minus_w_bm_at_lags = [cp.Variable(
-            len(universe)) for i in range(self.planning_horizon)]
+            len(universe)) for i in range(self._planning_horizon)]
 
         # simulator will overwrite this with cached loaded from disk
         self.cache = {}
@@ -499,7 +499,7 @@ def _collect_hyperparameters(self):
             result += el._collect_hyperparameters()
         for el in self.constraints:
             for constr in el:
-                result += el._collect_hyperparameters()
+                result += constr._collect_hyperparameters()
         return result
 
 
diff --git a/cvxportfolio/result.py b/cvxportfolio/result.py
index 7c693ebec..99ae6639b 100644
--- a/cvxportfolio/result.py
+++ b/cvxportfolio/result.py
@@ -204,7 +204,7 @@ def __repr__(self):
             "Per-period absolute growth rate": self._print_growth_rate(self.growth_rates.mean()),
             "Per-period excess growth rate": self._print_growth_rate(self.excess_growth_rates.mean()),
             # stats
-            "Sharpe ratio (w/ excess returns)": self.sharpe_ratio,
+            "Sharpe ratio": self.sharpe_ratio,
             "Worst drawdown (%)": self.drawdown.min() * 100,
             "Average drawdown (%)": self.drawdown.mean() * 100,
             "Per-period Turnover (%)": self.turnover.mean() * 100,
diff --git a/cvxportfolio/returns.py b/cvxportfolio/returns.py
index 11d5aabeb..738786b1b 100644
--- a/cvxportfolio/returns.py
+++ b/cvxportfolio/returns.py
@@ -42,41 +42,39 @@ class BaseReturnsModel(BaseCost):
 class CashReturn(BaseReturnsModel):
     r"""Objective term representing cash return.
 
-    The forecast of cash return :math:`{\left(\hat{r}_t\right)}_n` is the observed value
-    from last period :math:`{\left({r}_{t-1}\right)}_n`.
+    By default, the forecast of cash return :math:`{\left(\hat{r}_t\right)}_n` 
+    is the observed value from last period :math:`{\left({r}_{t-1}\right)}_n`.
 
     This object is included automatically in :class:`SinglePeriodOptimization`
     and :class:`MultiPeriodOptimization` policies. You can change
-    this behavior by setting their ``include_cash_return`` to False.
-
-    :param short_margin_requirement: fraction of value of a short positions
-        that is margined by portfolio cash
-    :type short_margin_requirement: float
+    this behavior by setting their ``include_cash_return`` to False. If you do
+    so, you may include this cost explicitely in the objective. You need
+    to do so (only) if you provide your own cash return forecast.
+
+    :param cash_returns: if you have your forecast for the cash return, you
+        should pass it here, either as a float (if constant) or as pd.Series
+        with datetime index (if it changes in time). If you leave the default,
+        None, the cash return forecast at time t is the observed cash return 
+        at time t-1. (As is suggested in the book.)
+    :type cash_returns: float or pd.Series or None
     """
 
-    def __init__(self, cash_returns=None, short_margin_requirement=1.):
-        self.cash_returns = None if cash_returns is None else DataEstimator(cash_returns,
-                                                                            compile_parameter=True, non_negative=True)
-        self.short_margin_requirement = short_margin_requirement
+    def __init__(self, cash_returns=None):
+        self.cash_returns = None if cash_returns is None else DataEstimator(
+            cash_returns, compile_parameter=True)
 
     def _pre_evaluation(self, universe, backtest_times):
-        self.cash_return_parameter = cp.Parameter(nonneg=True) if self.cash_returns is None \
+        self.cash_return_parameter = cp.Parameter() if self.cash_returns is None \
             else self.cash_returns.parameter
 
-        # else DataEstimator(self.cash_returns, non_negative=True, compile_parameter=True)
-
     def _values_in_time(self, t, past_returns, **kwargs):
         """Update cash return parameter as last cash return."""
         if self.cash_returns is None:
             self.cash_return_parameter.value = past_returns.iloc[-1, -1]
 
     def _compile_to_cvxpy(self, w_plus, z, w_plus_minus_w_bm):
-        """Apply cash return to "real" cash position (without shorts and margins)."""
-        realcash = (w_plus[-1] - (1 + self.short_margin_requirement)
-                    * cp.sum(cp.neg(w_plus[:-1])))
-        result = realcash * self.cash_return_parameter
-        assert result.is_concave()
-        return result
+        """Apply cash return to cash position."""
+        return w_plus[-1] * self.cash_return_parameter
 
 
 class ReturnsForecast(BaseReturnsModel):
diff --git a/cvxportfolio/simulator.py b/cvxportfolio/simulator.py
index 63e9ca6ba..274ea2bab 100644
--- a/cvxportfolio/simulator.py
+++ b/cvxportfolio/simulator.py
@@ -649,6 +649,26 @@ def _concatenate_backtest_results(self, results):
     @staticmethod
     def _worker(policy, simulator, start_time, end_time, h):
         return simulator._concatenated_backtests(policy, start_time, end_time, h)
+        
+    def optimize_hyperparameters(self, policy, start_time=None, end_time=None, 
+        initial_value=1E6, h=None, objective='sharpe_ratio'):
+        """Optimize hyperparameters of a policy to maximize backtest objective.
+        
+        EXPERIMENTAL: this method is currently being developed.
+        """
+        hyperparameters = policy._collect_hyperparameters()
+        print(hyperparameters)
+        
+        # def evaluate()
+        result_init = self.backtest(policy, start_time=start_time, end_time=end_time, 
+            initial_value=1E6, h=h)
+        
+        objective_init = getattr(result_init, objective)
+        print(result_init)
+        
+        print(objective_init)
+        
+        
 
     def backtest(self, policy, start_time=None, end_time=None, initial_value=1E6, h=None):
         """Backtest trading policy.
diff --git a/cvxportfolio/tests/test_hyperparameters.py b/cvxportfolio/tests/test_hyperparameters.py
index 017607ce1..1a1f96e96 100644
--- a/cvxportfolio/tests/test_hyperparameters.py
+++ b/cvxportfolio/tests/test_hyperparameters.py
@@ -43,7 +43,7 @@ def setUpClass(cls):
 
     def test_basic_HP(self):
 
-        gamma = GammaRisk(initial_value=1)
+        gamma = GammaRisk(current_value=1)
 
         self.assertTrue((-gamma).current_value == -1)
         self.assertTrue((gamma * .5).current_value == .5)
@@ -58,8 +58,8 @@ def test_basic_HP(self):
         cvx.SinglePeriodOptimization(-GammaRisk() * cvx.FullCovariance())
 
     def test_HP_algebra(self):
-        grisk = GammaRisk(initial_value=1)
-        gtrade = GammaRisk(initial_value=.5)
+        grisk = GammaRisk(current_value=1)
+        gtrade = GammaRisk(current_value=.5)
 
         self.assertTrue((grisk + gtrade).current_value == 1.5)
         self.assertTrue((grisk * gtrade).current_value == .5)
diff --git a/cvxportfolio/tests/test_returns.py b/cvxportfolio/tests/test_returns.py
index b43b433e4..f89184c5b 100644
--- a/cvxportfolio/tests/test_returns.py
+++ b/cvxportfolio/tests/test_returns.py
@@ -55,7 +55,7 @@ def test_cash_returns(self):
             t=None, past_returns=self.returns.iloc[:123])
         cr = self.returns.iloc[122, -1]
         self.assertTrue(cvxpy_expression.value == cr * (
-            self.w_plus[-1].value + 2 * np.sum(np.minimum(self.w_plus[:-1].value, 0.))))
+            self.w_plus[-1].value ))
 
     def test_cash_returns_provided(self):
         "Test CashReturn object with provided cash returns."
@@ -66,7 +66,7 @@ def test_cash_returns_provided(self):
             t=self.returns.index[123], past_returns=None)
         cr = self.returns.iloc[123, -1]
         self.assertTrue(cvxpy_expression.value == cr * (
-            self.w_plus[-1].value + 2 * np.sum(np.minimum(self.w_plus[:-1].value, 0.))))
+            self.w_plus[-1].value))
 
     def test_returns_forecast(self):
         "Test ReturnsForecast object with provided assets' returns."
diff --git a/cvxportfolio/tests/test_simulator.py b/cvxportfolio/tests/test_simulator.py
index 0909ee63c..303ffced6 100644
--- a/cvxportfolio/tests/test_simulator.py
+++ b/cvxportfolio/tests/test_simulator.py
@@ -613,7 +613,7 @@ def test_spo_benchmark(self):
                                cvx.Benchmark(myunif)]]
 
         results = sim.backtest_many(policies, start_time='2023-01-01',
-             parallel=False)  # important for test coverage!!
+                                    parallel=False)  # important for test coverage!!
 
         # check myunif is the same as uniform
         self.assertTrue(np.isclose(
@@ -635,15 +635,15 @@ def test_market_neutral(self):
         """Test SPO with market neutral constraint."""
 
         sim = cvx.MarketSimulator(
-            ['AAPL', 'MSFT', 'GE', 'ZM', 'META'], trading_frequency='monthly', base_location=self.datadir)
+            ['AAPL', 'MSFT', 'GE', 'GOOG', 'META', 'GLD'], trading_frequency='monthly', base_location=self.datadir)
 
-        objective = cvx.ReturnsForecast() - 10 * cvx.FullCovariance()
+        objective = cvx.ReturnsForecast() - 2 * cvx.FullCovariance()
 
         policies = [cvx.SinglePeriodOptimization(objective, co) for co in [
             [], [cvx.MarketNeutral()], [cvx.DollarNeutral()]]]
 
         results = sim.backtest_many(policies, start_time='2023-01-01',
-            parallel=False)  # important for test coverage
+                                    parallel=False)  # important for test coverage
         print(results)
 
         # check that market neutral sol is closer to
@@ -660,7 +660,8 @@ def test_timed_constraints(self):
         """Test some constraints that depend on time."""
 
         sim = cvx.StockMarketSimulator(
-            ['AAPL', 'MSFT', 'GE', 'ZM', 'META'], trading_frequency='monthly', base_location=self.datadir)
+            ['AAPL', 'MSFT', 'GE', 'GLD', 'META'], 
+            trading_frequency='monthly', base_location=self.datadir)
 
         # cvx.NoTrade
         objective = cvx.ReturnsForecast() - 10 * cvx.FullCovariance()
@@ -706,7 +707,7 @@ def test_eq_soft_constraints(self):
         policies.append(cvx.SinglePeriodOptimization(
             objective, [cvx.DollarNeutral()]))
         results = sim.backtest_many(policies, start_time='2023-01-01',
-            parallel=False)  # important for test coverage
+                                    parallel=False)  # important for test coverage
         print(results)
         allcashpos = [((res.w.iloc[:, -1]-1)**2).mean() for res in results]
         print(allcashpos)
@@ -728,7 +729,7 @@ def test_ineq_soft_constraints(self):
         policies.append(cvx.SinglePeriodOptimization(
             objective, [cvx.LongOnly(), cvx.MarketNeutral()]))
         results = sim.backtest_many(policies, start_time='2023-01-01',
-            parallel=False)  # important for test coverage
+                                    parallel=False)  # important for test coverage
         print(results)
         allshorts = [np.minimum(res.w.iloc[:, :-1], 0.).sum().sum()
                      for res in results]
@@ -749,7 +750,7 @@ def test_cost_constraints(self):
             for el in [0.01, .02, .05, .1]]
 
         results = sim.backtest_many(policies, start_time='2023-01-01',
-            parallel=False)  # important for test coverage
+                                    parallel=False)  # important for test coverage
         print(results)
 
         self.assertTrue(results[0].volatility < results[1].volatility)
@@ -773,6 +774,19 @@ def test_dcp_convex_raises(self):
         with self.assertRaises(ConvexityError):
             sim.backtest(policy)
 
+    def test_hyperparameters_optimize(self):
+
+        objective = cvx.ReturnsForecast() - cvx.GammaRisk() * cvx.FullCovariance()
+        policy = cvx.SinglePeriodOptimization(
+            objective, [cvx.LongOnly(), cvx.LeverageLimit(1)])
+
+        simulator = cvx.StockMarketSimulator(
+            ['AAPL', 'MSFT', 'GE', 'ZM', 'META'],
+            trading_frequency='monthly',
+            base_location=self.datadir)
+
+        simulator.optimize_hyperparameters(policy, start_time='2023-01-01')
+
 
 if __name__ == '__main__':
     unittest.main()

From 075441f085d8fa2895b5490c983620ac50afe7b0 Mon Sep 17 00:00:00 2001
From: Enzo Busseti <enzo.busseti@me.com>
Date: Mon, 4 Sep 2023 23:17:41 +0400
Subject: [PATCH 2/8] half-rewritten HoldingCost

---
 cvxportfolio/costs.py | 130 ++++++++++++++++++++++++++----------------
 1 file changed, 80 insertions(+), 50 deletions(-)

diff --git a/cvxportfolio/costs.py b/cvxportfolio/costs.py
index b20d60cd3..84604f8dc 100644
--- a/cvxportfolio/costs.py
+++ b/cvxportfolio/costs.py
@@ -211,59 +211,100 @@ def _compile_to_cvxpy(self, w_plus, z, w_plus_minus_w_bm):
             return cp.sum(cp.pos(expr))
 
 
+def _annual_percent_to_per_period(value, ppy):
+    """Transform annual percent to per-period return."""
+    return np.exp(np.log(1 + value / 100) / ppy) - 1
+    
 class HoldingCost(BaseCost):
-    """This is a generic holding cost model.
-
-    Currently it is not meant to be used directly. Look at
-    :class:`StocksHoldingCost` for its version specialized to
-    the stock market.
+    """Generic holding cost model, as described in page 11 of the book.
+
+    :param short_fees:
+    :type short_fees: float, pd.Series, pd.DataFrame or None
+    :param short_fees:
+    :type short_fees:  float, pd.Series, pd.DataFrame or None
+    :param dividends:
+    :type short_fees: float, pd.Series, pd.DataFrame or None
+    :param periods_per_year:
+    :type periods_per_year: float or None
     """
 
-    def __init__(self,
-                 spread_on_borrowing_assets_percent=None,
-                 spread_on_lending_cash_percent=None,
-                 spread_on_borrowing_cash_percent=None,
-                 periods_per_year=None,
-                 # TODO revisit this plus spread_on_borrowing_stocks_percent syntax
-                 cash_return_on_borrow=False,
-                 dividends=None):
-
-        self.spread_on_borrowing_assets_percent = None if spread_on_borrowing_assets_percent is None else \
-            DataEstimator(spread_on_borrowing_assets_percent)
-        self.dividends = None if dividends is None else DataEstimator(
-            dividends, compile_parameter=True)
-        self.spread_on_lending_cash_percent = None if spread_on_lending_cash_percent is None else \
-            DataEstimator(spread_on_lending_cash_percent)
-        self.spread_on_borrowing_cash_percent = None if spread_on_borrowing_cash_percent is None else \
-            DataEstimator(spread_on_borrowing_cash_percent)
+    def __init__(self, short_fees=None, long_fees=None, dividends=None,
+                 periods_per_year=None):
 
+        self.short_fees = None if short_fees is None else DataEstimator(short_fees)
+        self.long_fees = None if long_fees is None else DataEstimator(long_fees)
+        self.dividends = None if dividends is None else DataEstimator(dividends)
         self.periods_per_year = periods_per_year
-        self.cash_return_on_borrow = cash_return_on_borrow
 
     def _pre_evaluation(self, universe, backtest_times):
+        """Initialize cvxpy parameters.
+        
+        We don't use the parameter from DataEstimator because we need to
+        divide the value by periods_per_year.
+        """
+
+        if not (self.short_fees is None):
+            self._short_fees_parameter = cp.Parameter(len(universe) - 1)
 
-        if self.spread_on_borrowing_assets_percent is not None or self.cash_return_on_borrow:
-            self.borrow_cost_stocks = cp.Parameter(
-                len(universe) - 1, nonneg=True)
+        if not (self.long_fees is None):
+            self._long_fees_parameter = cp.Parameter(len(universe) - 1)
+
+        if not (self.dividends is None):
+            self._dividends_parameter = cp.Parameter(len(universe) - 1)
 
     def _values_in_time(self, t, past_returns, **kwargs):
-        """We use yesterday's value of the cash return here while in the simulator
-        we use today's. In the US, updates to the FED rate are published outside
-        of trading hours so we might as well use the actual value for today's. The difference
-        is very small so for now we do this. 
+        """Update cvxpy parameters.
+        
+        We compute the estimate of periods per year from past returns
+        (if not provided by the user) and populate the cvxpy parameters
+        with the current values of the user-provided data, transformed
+        to per-period.
         """
-        ppy = periods_per_year(past_returns.index) if self.periods_per_year is None else \
-            self.periods_per_year
+        
+        if not ((self.short_fees is None) 
+                and (self.long_fees is None) 
+                and (self.dividends is None)):
+            ppy = periods_per_year(past_returns.index) if self.periods_per_year is None else \
+                self.periods_per_year
+
+        if not (self.short_fees is None):
+            self._short_fees_parameter.value = \
+                np.ones(past_returns.shape[1]-1) * \
+                     _annual_percent_to_per_period(self.short_fees.current_value, ppy)
+
+        if not (self.long_fees is None):
+            self._long_fees_parameter.value = \
+                np.ones(past_returns.shape[1]-1) * \
+                     _annual_percent_to_per_period(self.long_fees.current_value, ppy)
 
-        cash_return = past_returns.iloc[-1, -1]
+        if not (self.dividends is None):
+            self._dividends_parameter.value = \
+                np.ones(past_returns.shape[1]-1) * \
+                     _annual_percent_to_per_period(self.dividends.current_value, ppy)
 
-        if self.spread_on_borrowing_assets_percent is not None or self.cash_return_on_borrow:
-            self.borrow_cost_stocks.value = np.ones(past_returns.shape[1] - 1) * (
-                cash_return if self.cash_return_on_borrow else 0.) + \
-                self.spread_on_borrowing_assets_percent.current_value / \
-                (100 * ppy)
 
+    def _compile_to_cvxpy(self, w_plus, z, w_plus_minus_w_bm):
+        """Compile cost to cvxpy expression."""
+
+        expression = 0.
+        
+        if not (self.short_fees is None):
+            expression += self._short_fees_parameter.T @ cp.neg(w_plus[:-1])
+        
+        if not (self.long_fees is None):
+            expression += self._long_fees_parameter.T @ cp.pos(w_plus[:-1])
+
+        if not (self.dividends is None):
+            # we have a minus sign because costs are deducted from PnL
+            expression -= self.dividends.parameter.T @ w_plus[:-1]
+            
+        assert expression.is_convex()
+        
+        return expression
+
+    
     def _simulate(self, t, h_plus, current_and_past_returns, **kwargs):
+        """Simulate cost."""
 
         ppy = periods_per_year(current_and_past_returns.index) if self.periods_per_year is None else \
             self.periods_per_year
@@ -300,19 +341,7 @@ def _simulate(self, t, h_plus, current_and_past_returns, **kwargs):
 
         return result
 
-    def _compile_to_cvxpy(self, w_plus, z, w_plus_minus_w_bm):
-        """Compile cost to cvxpy expression."""
-
-        expression = 0.
-
-        if not (self.spread_on_borrowing_assets_percent is None):
-            expression += cp.multiply(self.borrow_cost_stocks,
-                                      cp.neg(w_plus)[:-1])
 
-        if not (self.dividends is None):
-            expression -= cp.multiply(self.dividends.parameter, w_plus[:-1])
-        assert cp.sum(expression).is_convex()
-        return cp.sum(expression)
 
 
 class StocksHoldingCost(HoldingCost):
@@ -340,6 +369,7 @@ class StocksHoldingCost(HoldingCost):
     """
 
     def __init__(self,
+                 
                  spread_on_borrowing_stocks_percent=.5,
                  spread_on_lending_cash_percent=.5,
                  spread_on_borrowing_cash_percent=.5,

From 92f74131f121dbba8718d1e6e5c6874b8b0c80bb Mon Sep 17 00:00:00 2001
From: Enzo Busseti <enzo.busseti@me.com>
Date: Mon, 4 Sep 2023 23:31:50 +0400
Subject: [PATCH 3/8] rewritten HoldingCost need to update MarketSimulator (we
 want t_next, which we have for free) and also address GH issue #106 for 
 DataEstimator in the meantime

---
 cvxportfolio/costs.py | 61 +++++++++++++++++--------------------------
 1 file changed, 24 insertions(+), 37 deletions(-)

diff --git a/cvxportfolio/costs.py b/cvxportfolio/costs.py
index 84604f8dc..8d0336655 100644
--- a/cvxportfolio/costs.py
+++ b/cvxportfolio/costs.py
@@ -214,7 +214,8 @@ def _compile_to_cvxpy(self, w_plus, z, w_plus_minus_w_bm):
 def _annual_percent_to_per_period(value, ppy):
     """Transform annual percent to per-period return."""
     return np.exp(np.log(1 + value / 100) / ppy) - 1
-    
+
+
 class HoldingCost(BaseCost):
     """Generic holding cost model, as described in page 11 of the book.
 
@@ -296,52 +297,38 @@ def _compile_to_cvxpy(self, w_plus, z, w_plus_minus_w_bm):
 
         if not (self.dividends is None):
             # we have a minus sign because costs are deducted from PnL
-            expression -= self.dividends.parameter.T @ w_plus[:-1]
+            expression -= self._dividends_parameter.T @ w_plus[:-1]
             
         assert expression.is_convex()
         
         return expression
 
     
-    def _simulate(self, t, h_plus, current_and_past_returns, **kwargs):
-        """Simulate cost."""
-
-        ppy = periods_per_year(current_and_past_returns.index) if self.periods_per_year is None else \
-            self.periods_per_year
+    def _simulate(self, t, h_plus, current_and_past_returns, t_next, **kwargs):
+        """
+        TODO: make sure simulator cost sign convention is the same as optimization cost!
+        TODO: make sure DataEstimator returns np.array of correct size!
+        TODO: make sure simulator cost estimators are recursively evaluated!
+        """
+        
+        year_divided_by_period = pd.TimeDelta('365.24d') / (t_next - t)
 
-        cash_return = current_and_past_returns.iloc[-1, -1]
-        multiplier = 1 / (100 * ppy)
-        result = 0.
-        borrowed_stock_positions = np.minimum(h_plus.iloc[:-1], 0.)
-        result += np.sum(((cash_return if self.cash_return_on_borrow else 0.) +
-                          (self.spread_on_borrowing_assets_percent._recursive_values_in_time(t) * multiplier if
-                           self.spread_on_borrowing_assets_percent is not None else 0.))
-                         * borrowed_stock_positions)
-
-        if self.dividends is not None:
-            result += np.sum(h_plus[:-1] *
-                             self.dividends._recursive_values_in_time(t))
-
-        # lending_spread = DataEstimator(spread_on_lending_cash_percent)._recursive_values_in_time(t) * multiplier
-        # borrowing_spread = DataEstimator(spread_on_borrowing_cash_percent)._recursive_values_in_time(t) * multiplier
-
-        # cash_return = current_and_past_returns.iloc[-1,-1]
-        real_cash = h_plus.iloc[-1] + sum(np.minimum(h_plus.iloc[:-1], 0.))
-
-        if real_cash > 0:
-            if self.spread_on_lending_cash_percent is not None:
-                result += real_cash * \
-                    (max(cash_return - self.spread_on_lending_cash_percent._recursive_values_in_time(
-                        t) * multiplier, 0.) - cash_return)
-        else:
-            if self.spread_on_borrowing_cash_percent is not None:
-                result += real_cash * \
-                    self.spread_on_borrowing_cash_percent._recursive_values_in_time(
-                        t) * multiplier
+        cost = 0. 
 
-        return result
+        if not (self.short_fees is None):
+            cost += _annual_percent_to_per_period(
+                self.short_fees.current_value, year_divided_by_period).T @ cp.neg(h_plus[:-1])
+        
+        if not (self.long_fees is None):
+            expression += _annual_percent_to_per_period(
+                self.long_fees.current_value, year_divided_by_period).T @ cp.pos(h_plus[:-1])
 
+        if not (self.dividends is None):
+            # we have a minus sign because costs are deducted from PnL
+            expression -= _annual_percent_to_per_period(
+                self.dividends.current_value, year_divided_by_period).T @ h_plus[:-1]
 
+        return result
 
 
 class StocksHoldingCost(HoldingCost):

From 32fa8c150035feca6ea3925827d969adcea6a209 Mon Sep 17 00:00:00 2001
From: Enzo Busseti <enzo.busseti@me.com>
Date: Mon, 4 Sep 2023 23:34:56 +0400
Subject: [PATCH 4/8] typo, provisionally eliminated StocksHoldingCost (added
 default value to HoldingCost)

---
 cvxportfolio/costs.py | 82 +++++++++++++++++++------------------------
 1 file changed, 37 insertions(+), 45 deletions(-)

diff --git a/cvxportfolio/costs.py b/cvxportfolio/costs.py
index 8d0336655..b7f7f48fc 100644
--- a/cvxportfolio/costs.py
+++ b/cvxportfolio/costs.py
@@ -229,7 +229,7 @@ class HoldingCost(BaseCost):
     :type periods_per_year: float or None
     """
 
-    def __init__(self, short_fees=None, long_fees=None, dividends=None,
+    def __init__(self, short_fees=0.05, long_fees=None, dividends=None,
                  periods_per_year=None):
 
         self.short_fees = None if short_fees is None else DataEstimator(short_fees)
@@ -320,58 +320,50 @@ def _simulate(self, t, h_plus, current_and_past_returns, t_next, **kwargs):
                 self.short_fees.current_value, year_divided_by_period).T @ cp.neg(h_plus[:-1])
         
         if not (self.long_fees is None):
-            expression += _annual_percent_to_per_period(
+            cost += _annual_percent_to_per_period(
                 self.long_fees.current_value, year_divided_by_period).T @ cp.pos(h_plus[:-1])
 
         if not (self.dividends is None):
             # we have a minus sign because costs are deducted from PnL
-            expression -= _annual_percent_to_per_period(
+            cost -= _annual_percent_to_per_period(
                 self.dividends.current_value, year_divided_by_period).T @ h_plus[:-1]
 
-        return result
-
+        return cost
 
-class StocksHoldingCost(HoldingCost):
-    """A model for holding cost of stocks.
-
-    :param spread_on_borrowing_stocks_percent: spread on top of cash return payed for borrowing assets,
-        including cash. If ``None``, the default, it gets from :class:`Backtest` the
-        value for the period.
-    :type spread_on_borrowing_stocks_percent: float or pd.Series or pd.DataFrame or None
-    :param spread_on_lending_cash_percent: spread that subtracts from the cash return for
-        uninvested cash.
-    :type spread_on_lending_cash_percent: float or pd.Series or None
-    :param spread_on_borrowing_cash_percent: spread that adds to the cash return as rate payed for
-        borrowed cash. This is not used as a policy optimization cost but is for the simulator cost.
-    :type spread_on_borrowing_cash_percent: float or pd.Series or None
-    :param dividends: dividends payed (expressed as fraction of the stock value)
-        for each period.  If ``None``, the default, it gets from :class:`Backtest` the
-        value for the period.
-    :type dividends: pd.DataFrame or None
-    :param periods_per_year: period per year (used in calculation of per-period cost). If None it is calculated
-        automatically.
-    :type periods_per_year: int or None
-    :param cash_return_on_borrow: whether to add (negative of) cash return to borrow cost of assets
-    :type cash_return_on_borrow: bool
-    """
 
-    def __init__(self,
-                 
-                 spread_on_borrowing_stocks_percent=.5,
-                 spread_on_lending_cash_percent=.5,
-                 spread_on_borrowing_cash_percent=.5,
-                 periods_per_year=None,
-                 # TODO revisit this plus spread_on_borrowing_stocks_percent syntax
-                 cash_return_on_borrow=True,
-                 dividends=0.):
-
-        super().__init__(
-            spread_on_borrowing_assets_percent=spread_on_borrowing_stocks_percent,
-            spread_on_lending_cash_percent=spread_on_lending_cash_percent,
-            spread_on_borrowing_cash_percent=spread_on_borrowing_cash_percent,
-            periods_per_year=periods_per_year,
-            cash_return_on_borrow=cash_return_on_borrow,
-            dividends=dividends)
+# class StocksHoldingCost(HoldingCost):
+#     """A model for holding cost of stocks.
+#
+#     :param spread_on_borrowing_stocks_percent: spread on top of cash return payed for borrowing assets,
+#         including cash. If ``None``, the default, it gets from :class:`Backtest` the
+#         value for the period.
+#     :type spread_on_borrowing_stocks_percent: float or pd.Series or pd.DataFrame or None
+#     :param spread_on_lending_cash_percent: spread that subtracts from the cash return for
+#         uninvested cash.
+#     :type spread_on_lending_cash_percent: float or pd.Series or None
+#     :param spread_on_borrowing_cash_percent: spread that adds to the cash return as rate payed for
+#         borrowed cash. This is not used as a policy optimization cost but is for the simulator cost.
+#     :type spread_on_borrowing_cash_percent: float or pd.Series or None
+#     :param dividends: dividends payed (expressed as fraction of the stock value)
+#         for each period.  If ``None``, the default, it gets from :class:`Backtest` the
+#         value for the period.
+#     :type dividends: pd.DataFrame or None
+#     :param periods_per_year: period per year (used in calculation of per-period cost). If None it is calculated
+#         automatically.
+#     :type periods_per_year: int or None
+#     :param cash_return_on_borrow: whether to add (negative of) cash return to borrow cost of assets
+#     :type cash_return_on_borrow: bool
+#     """
+#
+#     def __init__(self, short_fees=0.05, long_fees=None, dividends=None, periods_per_year=None):
+#
+#         super().__init__(
+#             spread_on_borrowing_assets_percent=spread_on_borrowing_stocks_percent,
+#             spread_on_lending_cash_percent=spread_on_lending_cash_percent,
+#             spread_on_borrowing_cash_percent=spread_on_borrowing_cash_percent,
+#             periods_per_year=periods_per_year,
+#             cash_return_on_borrow=cash_return_on_borrow,
+#             dividends=dividends)
 
 
 class TransactionCost(BaseCost):

From 4bce3a449c7f60f3dd3da5a68df9fcb996de5f18 Mon Sep 17 00:00:00 2001
From: Enzo Busseti <enzo.busseti@me.com>
Date: Tue, 5 Sep 2023 00:51:38 +0400
Subject: [PATCH 5/8] completed correcting cvx.(Stocks)HoldingCost for gh #107

---
 cvxportfolio/costs.py                | 96 +++++++++++++---------------
 cvxportfolio/simulator.py            | 21 +++---
 cvxportfolio/tests/test_costs.py     | 14 ++--
 cvxportfolio/tests/test_simulator.py | 80 +++++++----------------
 4 files changed, 87 insertions(+), 124 deletions(-)

diff --git a/cvxportfolio/costs.py b/cvxportfolio/costs.py
index b7f7f48fc..787572bec 100644
--- a/cvxportfolio/costs.py
+++ b/cvxportfolio/costs.py
@@ -221,15 +221,13 @@ class HoldingCost(BaseCost):
 
     :param short_fees:
     :type short_fees: float, pd.Series, pd.DataFrame or None
-    :param short_fees:
-    :type short_fees:  float, pd.Series, pd.DataFrame or None
-    :param dividends:
-    :type short_fees: float, pd.Series, pd.DataFrame or None
+    :param long_fees:
+    :type long_fees: float, pd.Series, pd.DataFrame or None
     :param periods_per_year:
     :type periods_per_year: float or None
     """
 
-    def __init__(self, short_fees=0.05, long_fees=None, dividends=None,
+    def __init__(self, short_fees=None, long_fees=None, dividends=None,
                  periods_per_year=None):
 
         self.short_fees = None if short_fees is None else DataEstimator(short_fees)
@@ -245,10 +243,10 @@ def _pre_evaluation(self, universe, backtest_times):
         """
 
         if not (self.short_fees is None):
-            self._short_fees_parameter = cp.Parameter(len(universe) - 1)
+            self._short_fees_parameter = cp.Parameter(len(universe) - 1, nonneg=True)
 
         if not (self.long_fees is None):
-            self._long_fees_parameter = cp.Parameter(len(universe) - 1)
+            self._long_fees_parameter = cp.Parameter(len(universe) - 1, nonneg=True)
 
         if not (self.dividends is None):
             self._dividends_parameter = cp.Parameter(len(universe) - 1)
@@ -280,8 +278,7 @@ def _values_in_time(self, t, past_returns, **kwargs):
 
         if not (self.dividends is None):
             self._dividends_parameter.value = \
-                np.ones(past_returns.shape[1]-1) * \
-                     _annual_percent_to_per_period(self.dividends.current_value, ppy)
+                np.ones(past_returns.shape[1]-1) * self.dividends.current_value
 
 
     def _compile_to_cvxpy(self, w_plus, z, w_plus_minus_w_bm):
@@ -306,64 +303,57 @@ def _compile_to_cvxpy(self, w_plus, z, w_plus_minus_w_bm):
     
     def _simulate(self, t, h_plus, current_and_past_returns, t_next, **kwargs):
         """
-        TODO: make sure simulator cost sign convention is the same as optimization cost!
-        TODO: make sure DataEstimator returns np.array of correct size!
-        TODO: make sure simulator cost estimators are recursively evaluated!
+        TODO: make sure simulator cost sign convention is the same as optimization cost! OK
+        TODO: make sure DataEstimator returns np.array of correct size! ~OK
+        TODO: make sure simulator cost estimators are recursively evaluated! ~OK
         """
         
-        year_divided_by_period = pd.TimeDelta('365.24d') / (t_next - t)
+        year_divided_by_period = pd.Timedelta('365.24d') / (t_next - t)
 
         cost = 0. 
+        
+        # TODO this is a temporary fix, we should plug this into a recursive tree
+        for est in [self.short_fees, self.long_fees, self.dividends]:
+            if not (est is None):
+                est._recursive_pre_evaluation(universe=h_plus.index[:-1], backtest_times=[t])
+                est._recursive_values_in_time(t=t)
 
         if not (self.short_fees is None):
-            cost += _annual_percent_to_per_period(
-                self.short_fees.current_value, year_divided_by_period).T @ cp.neg(h_plus[:-1])
+            cost += np.sum(_annual_percent_to_per_period(
+                self.short_fees.current_value, year_divided_by_period) * (-np.minimum(h_plus[:-1], 0.)))
         
         if not (self.long_fees is None):
-            cost += _annual_percent_to_per_period(
-                self.long_fees.current_value, year_divided_by_period).T @ cp.pos(h_plus[:-1])
+            cost += np.sum(_annual_percent_to_per_period(
+                self.long_fees.current_value, year_divided_by_period) * np.maximum(h_plus[:-1], 0.))
 
         if not (self.dividends is None):
             # we have a minus sign because costs are deducted from PnL
-            cost -= _annual_percent_to_per_period(
-                self.dividends.current_value, year_divided_by_period).T @ h_plus[:-1]
+            cost -= np.sum(self.dividends.current_value * h_plus[:-1])
 
         return cost
 
 
-# class StocksHoldingCost(HoldingCost):
-#     """A model for holding cost of stocks.
-#
-#     :param spread_on_borrowing_stocks_percent: spread on top of cash return payed for borrowing assets,
-#         including cash. If ``None``, the default, it gets from :class:`Backtest` the
-#         value for the period.
-#     :type spread_on_borrowing_stocks_percent: float or pd.Series or pd.DataFrame or None
-#     :param spread_on_lending_cash_percent: spread that subtracts from the cash return for
-#         uninvested cash.
-#     :type spread_on_lending_cash_percent: float or pd.Series or None
-#     :param spread_on_borrowing_cash_percent: spread that adds to the cash return as rate payed for
-#         borrowed cash. This is not used as a policy optimization cost but is for the simulator cost.
-#     :type spread_on_borrowing_cash_percent: float or pd.Series or None
-#     :param dividends: dividends payed (expressed as fraction of the stock value)
-#         for each period.  If ``None``, the default, it gets from :class:`Backtest` the
-#         value for the period.
-#     :type dividends: pd.DataFrame or None
-#     :param periods_per_year: period per year (used in calculation of per-period cost). If None it is calculated
-#         automatically.
-#     :type periods_per_year: int or None
-#     :param cash_return_on_borrow: whether to add (negative of) cash return to borrow cost of assets
-#     :type cash_return_on_borrow: bool
-#     """
-#
-#     def __init__(self, short_fees=0.05, long_fees=None, dividends=None, periods_per_year=None):
-#
-#         super().__init__(
-#             spread_on_borrowing_assets_percent=spread_on_borrowing_stocks_percent,
-#             spread_on_lending_cash_percent=spread_on_lending_cash_percent,
-#             spread_on_borrowing_cash_percent=spread_on_borrowing_cash_percent,
-#             periods_per_year=periods_per_year,
-#             cash_return_on_borrow=cash_return_on_borrow,
-#             dividends=dividends)
+class StocksHoldingCost(HoldingCost):
+    """A model for holding cost of stocks.
+
+    :param short_fees:
+    :type short_fees: float, pd.Series, pd.DataFrame or None
+    :param short_fees:
+    :type short_fees:  float, pd.Series, pd.DataFrame or None
+    :param dividends:
+    :type short_fees: float, pd.Series, pd.DataFrame or None
+    :param periods_per_year:
+    :type periods_per_year: float or None
+    """
+
+    def __init__(self, short_fees=5, long_fees=None, dividends=None, 
+        periods_per_year=None):
+
+        super().__init__(
+            short_fees=short_fees,
+            long_fees=long_fees,
+            dividends=dividends,
+            periods_per_year=periods_per_year)
 
 
 class TransactionCost(BaseCost):
@@ -467,7 +457,7 @@ def _simulate(self, t, u, current_and_past_returns,
         assert not np.isnan(result)
         assert not np.isinf(result)
 
-        return -result
+        return result
 
     def _compile_to_cvxpy(self, w_plus, z, w_plus_minus_w_bm):
 
diff --git a/cvxportfolio/simulator.py b/cvxportfolio/simulator.py
index 274ea2bab..1a2a663ec 100644
--- a/cvxportfolio/simulator.py
+++ b/cvxportfolio/simulator.py
@@ -470,7 +470,7 @@ def _round_trade_vector(u, current_prices):
         result.iloc[-1] = -sum(result.iloc[:-1])
         return result
 
-    def _simulate(self, t, h, policy, **kwargs):
+    def _simulate(self, t, t_next, h, policy, **kwargs):
         """Get next portfolio and statistics used by Backtest for reporting.
 
         The signature of this method differs from other estimators
@@ -510,13 +510,17 @@ def _simulate(self, t, h, policy, **kwargs):
         if not (current_and_past_volumes is None):
             current_volumes = current_and_past_volumes.iloc[-1]
             non_tradable_stocks = current_volumes[current_volumes <= 0].index
+            if len(non_tradable_stocks):
+                logger.info(
+                    f"At time {t} the simulator canceled trades on assets {non_tradable_stocks}"
+                    " because their market volumes for the period are zero.")
             u[non_tradable_stocks] = 0.
 
         # round trades
         if self.round_trades:
             u = self._round_trade_vector(u, current_prices)
 
-        # for safety recompute cash
+        # recompute cash
         u.iloc[-1] = -sum(u.iloc[:-1])
         assert sum(u) == 0.
 
@@ -529,16 +533,17 @@ def _simulate(self, t, h, policy, **kwargs):
             current_and_past_volumes=current_and_past_volumes,
             current_and_past_returns=current_and_past_returns,
             current_prices=current_prices,
+            t_next=t_next,
             periods_per_year=self.market_data.PPY,
             windowsigma=self.market_data.PPY) for cost in self.costs}
 
         # initialize tomorrow's holdings
         h_next = pd.Series(h_plus, copy=True)
 
-        # credit costs to cash account
-        h_next.iloc[-1] = h_plus.iloc[-1] + sum(realized_costs.values())
+        # debit costs from cash account
+        h_next.iloc[-1] = h_plus.iloc[-1] - sum(realized_costs.values())
 
-        # multiply positions by market returns
+        # multiply positions (including cash) by market returns
         current_returns = current_and_past_returns.iloc[-1]
         h_next *= (1 + current_returns)
 
@@ -548,7 +553,7 @@ def _single_backtest(self, policy, start_time, end_time, h, universe=None):
         if universe is None:
             universe = self.market_data.universe
         backtest_times = self.market_data._get_backtest_times(
-            start_time, end_time, include_end=False)
+            start_time, end_time, include_end=True)
 
         if hasattr(policy, '_compile_to_cvxpy'):
             policy._compile_to_cvxpy()
@@ -556,12 +561,12 @@ def _single_backtest(self, policy, start_time, end_time, h, universe=None):
         result = BacktestResult(universe, backtest_times, self.costs)
 
         # this is the main loop of a backtest
-        for t in backtest_times:
+        for t, t_next in zip(backtest_times[:-1], backtest_times[1:]):
             result.h.loc[t] = h
             s = time.time()
             h, result.z.loc[t], result.u.loc[t], realized_costs, \
                 result.policy_times.loc[t] = self._simulate(
-                    t=t, h=h, policy=policy)
+                    t=t, h=h, policy=policy, t_next=t_next)
             for cost in realized_costs:
                 result.costs[cost].loc[t] = realized_costs[cost]
             result.simulator_times.loc[t] = time.time(
diff --git a/cvxportfolio/tests/test_costs.py b/cvxportfolio/tests/test_costs.py
index 13305ef3d..b1f86b53c 100644
--- a/cvxportfolio/tests/test_costs.py
+++ b/cvxportfolio/tests/test_costs.py
@@ -100,7 +100,7 @@ def test_hcost(self):
         dividends = pd.Series(np.random.randn(self.N-1),
                               self.returns.columns[:-1])
         dividends *= 0.
-        hcost = StocksHoldingCost(spread_on_borrowing_stocks_percent=.5,
+        hcost = StocksHoldingCost(short_fees=5,
                                   dividends=dividends)
 
         t = 100  # this is picked so that periods_per_year evaluates to 252
@@ -118,14 +118,14 @@ def test_hcost(self):
 
             print(expression.value)
 
-            print(-np.sum(np.minimum(self.w_plus.value[:-1], 0.)) * (
-                cash_ret + .5/(100 * 252)))
-            print(- self.w_plus.value[:-1].T @ dividends)
-            print(-np.sum(np.minimum(self.w_plus.value[:-1], 0.)) * (cash_ret + 0.5/(100 * 252))
-                  - self.w_plus.value[:-1].T @ dividends)
+            # print(-np.sum(np.minimum(self.w_plus.value[:-1], 0.)) * (
+            #     cash_ret + 5/(100 * 252)))
+            # print(- self.w_plus.value[:-1].T @ dividends)
+            # print(-np.sum(np.minimum(self.w_plus.value[:-1], 0.)) * (cash_ret + 0.5/(100 * 252))
+            #       - self.w_plus.value[:-1].T @ dividends)
 
             self.assertTrue(np.isclose(expression.value,
-                                       -np.sum(np.minimum(self.w_plus.value[:-1], 0.)) * (cash_ret + 0.5/(100 * 252))
+                                       -np.sum(np.minimum(self.w_plus.value[:-1], 0.)) * (np.exp(np.log(1.05)/252) - 1)
                                        # + np.abs(self.w_plus.value[-1])* 0.5/(100 * 252)
                                        - self.w_plus.value[:-1].T @ dividends
                                        ))
diff --git a/cvxportfolio/tests/test_simulator.py b/cvxportfolio/tests/test_simulator.py
index 303ffced6..352f0b8ec 100644
--- a/cvxportfolio/tests/test_simulator.py
+++ b/cvxportfolio/tests/test_simulator.py
@@ -241,43 +241,10 @@ def test_prepare_data(self):
         self.assertTrue(
             simulator.market_data.returns.index[-1] == simulator.market_data.prices.index[-1])
 
-    def test_cash_holding_cost(self):
 
-        t = self.returns.index[-40]
 
-        current_and_past_returns, current_and_past_volumes, current_prices = self.market_data._serve_data_simulator(
-            t)
-
-        cash_return = self.returns.loc[t, 'cash']
-
-        hcost = cvx.StocksHoldingCost(
-            # spread_on_lending_cash_percent=0.,
-            # spread_on_borrowing_cash_percent=0.,
-            dividends=0.,
-            periods_per_year=252,
-            spread_on_borrowing_stocks_percent=0.,
-            cash_return_on_borrow=False)
-
-        for i in range(10):
-            np.random.seed(i)
-            h_plus = np.random.randn(self.returns.shape[1])*1000
-            h_plus = pd.Series(h_plus, self.returns.columns)
-            h_plus.iloc[-1] = 1000 - sum(h_plus.iloc[:-1])
-
-            sim_cash_hcost = hcost._simulate(
-                t, h_plus=h_plus, current_and_past_returns=current_and_past_returns)
-
-            real_cash_position = h_plus.iloc[-1] + \
-                sum(np.minimum(h_plus.iloc[:-1], 0.))
-            if real_cash_position > 0:
-                cash_hcost = real_cash_position * \
-                    (np.maximum(cash_return - 0.005/252, 0.) - cash_return)
-            if real_cash_position < 0:
-                cash_hcost = real_cash_position * (0.005/252)
-
-            self.assertTrue(np.isclose(cash_hcost, sim_cash_hcost))
-
-    def test_stocks_holding_cost(self):
+    def test_holding_cost(self):
+        """Test the simulator interface of cvx.StocksHoldingCost."""
 
         t = self.returns.index[-20]
 
@@ -295,19 +262,15 @@ def test_stocks_holding_cost(self):
 
             dividends = np.random.uniform(size=len(h_plus)-1) * 1E-4
 
-            hcost = cvx.StocksHoldingCost(
-                spread_on_lending_cash_percent=0.,
-                spread_on_borrowing_cash_percent=0.,
-                dividends=dividends, periods_per_year=252)
+            hcost = cvx.StocksHoldingCost(short_fees=5, dividends=dividends)
 
             sim_hcost = hcost._simulate(
-                t=t, h_plus=h_plus, current_and_past_returns=current_and_past_returns)
+                t=t, h_plus=h_plus, current_and_past_returns=current_and_past_returns, t_next=t + pd.Timedelta('1d'))
 
-            total_borrow_cost = cash_return + (0.005)/252
-            hcost = -total_borrow_cost * sum(-np.minimum(h_plus, 0.)[:-1])
+            hcost = -(np.exp(np.log(1.05)/365.24)-1) * sum(-np.minimum(h_plus, 0.)[:-1])
             hcost += dividends @ h_plus[:-1]
-
-            self.assertTrue(np.isclose(hcost, sim_hcost))
+            print(hcost, -sim_hcost)
+            self.assertTrue(np.isclose(hcost, -sim_hcost))
 
     def test_transaction_cost_syntax(self):
 
@@ -375,7 +338,7 @@ def test_transaction_cost(self):
             # sim_tcost = simulator.transaction_costs(u)
             #
             print(tcost, sim_cost)
-            self.assertTrue(np.isclose(tcost, sim_cost))
+            self.assertTrue(np.isclose(tcost, -sim_cost))
 
     def test_methods(self):
         simulator = MarketSimulator(
@@ -422,16 +385,18 @@ def test_simulate_policy(self):
                     start_time, end_time, include_end=False)
             )
 
-            for t in simulator.market_data.returns.index[(simulator.market_data.returns.index >= start_time) & (simulator.market_data.returns.index <= end_time)]:
+            for (i, t) in enumerate(simulator.market_data.returns.index[
+                    (simulator.market_data.returns.index >= start_time) & (simulator.market_data.returns.index <= end_time)]):
+                t_next = simulator.market_data.returns.index[i+1]
                 oldcash = h.iloc[-1]
                 h, z, u, costs, timer = simulator._simulate(
-                    t=t, h=h, policy=policy)
+                    t=t, h=h, policy=policy, t_next=t_next)
                 tcost, hcost = costs['StocksTransactionCost'], costs['StocksHoldingCost']
                 assert tcost == 0.
                 # if np.all(h0[:2] > 0):
                 #    assert hcost == 0.
                 assert np.isclose(
-                    (oldcash + hcost) * (1+simulator.market_data.returns.loc[t, 'USDOLLAR']), h.iloc[-1])
+                    (oldcash - hcost) * (1+simulator.market_data.returns.loc[t, 'USDOLLAR']), h.iloc[-1])
 
             simh = h0[:-1] * simulator.market_data.prices.loc[pd.Timestamp(
                 end_time) + pd.Timedelta('1d')] / simulator.market_data.prices.loc[start_time]
@@ -453,10 +418,13 @@ def test_simulate_policy(self):
                     start_time, end_time, include_end=False)
             )
 
-            for t in simulator.market_data.returns.index[(simulator.market_data.returns.index >= start_time) & (simulator.market_data.returns.index <= end_time)]:
+            for i, t in enumerate(simulator.market_data.returns.index[
+                    (simulator.market_data.returns.index >= start_time) & 
+                        (simulator.market_data.returns.index <= end_time)]):
+                t_next = simulator.market_data.returns.index[i+1]
                 oldcash = h.iloc[-1]
                 h, z, u, costs, timer = simulator._simulate(
-                    t=t, h=h, policy=policy)
+                    t=t, h=h, policy=policy, t_next=t_next)
                 tcost, hcost = costs['StocksTransactionCost'], costs['StocksHoldingCost']
                 print(h)
                 # print(tcost, stock_hcost, cash_hcost)
@@ -660,11 +628,11 @@ def test_timed_constraints(self):
         """Test some constraints that depend on time."""
 
         sim = cvx.StockMarketSimulator(
-            ['AAPL', 'MSFT', 'GE', 'GLD', 'META'], 
+            ['AAPL', 'MSFT', 'GE', 'META'], 
             trading_frequency='monthly', base_location=self.datadir)
 
         # cvx.NoTrade
-        objective = cvx.ReturnsForecast() - 10 * cvx.FullCovariance()
+        objective = cvx.ReturnsForecast() - 2 * cvx.FullCovariance()
 
         no_trade_ts = [sim.market_data.returns.index[-2],
                        sim.market_data.returns.index[-6]]
@@ -681,18 +649,18 @@ def test_timed_constraints(self):
         policies = [cvx.MultiPeriodOptimization(objective - cvx.StocksTransactionCost(),
                                                 [cvx.MinWeightsAtTimes(
                                                     0., no_trade_ts), cvx.MaxWeightsAtTimes(0., no_trade_ts)],
-                                                planning_horizon=p) for p in [1, 2, 5]]
+                                                planning_horizon=p) for p in [1, 3, 5]]
 
         results = sim.backtest_many(
-            policies, start_time='2023-01-01', initial_value=1E9,
+            policies, start_time='2023-01-01', initial_value=1E6,
             parallel=False)  # important for test coverage
         print(results)
 
         total_tcosts = [result.costs['StocksTransactionCost'].sum()
                         for result in results]
         print(total_tcosts)
-        self.assertTrue(total_tcosts[0] < total_tcosts[1])
-        self.assertTrue(total_tcosts[1] < total_tcosts[2])
+        self.assertTrue(total_tcosts[0] > total_tcosts[1])
+        self.assertTrue(total_tcosts[1] > total_tcosts[2])
 
     def test_eq_soft_constraints(self):
         """We check that soft DollarNeutral penalizes non-dollar-neutrality."""

From 26c66bd38c806283aac049cdfa8f322d9015a022 Mon Sep 17 00:00:00 2001
From: Enzo Busseti <enzo.busseti@me.com>
Date: Tue, 5 Sep 2023 01:39:12 +0400
Subject: [PATCH 6/8] docstring of cvx.(Stocks)HoldingCost and minor fixes

---
 cvxportfolio/costs.py                | 112 ++++++++++++++++++++++-----
 cvxportfolio/tests/test_costs.py     |   3 +-
 cvxportfolio/tests/test_simulator.py |   4 +-
 docs/returns.rst                     |   4 +
 4 files changed, 98 insertions(+), 25 deletions(-)

diff --git a/cvxportfolio/costs.py b/cvxportfolio/costs.py
index 787572bec..22e6fd076 100644
--- a/cvxportfolio/costs.py
+++ b/cvxportfolio/costs.py
@@ -217,13 +217,78 @@ def _annual_percent_to_per_period(value, ppy):
 
 
 class HoldingCost(BaseCost):
-    """Generic holding cost model, as described in page 11 of the book.
-
-    :param short_fees:
+    r"""Generic holding cost model, as described in page 11 of the book.
+    
+    There are two ways to use this class. Either in the costs attribute
+    of a :class:`MarketSimulator`, in which case the costs are evaluated
+    on the post-trade dollar positions :math:`h^+_t`. Or, 
+    as part of the objective function (or as a constraint!) 
+    of a :class:`SinglePeriodOptimization` 
+    or :class:`MultiPeriodOptimization` trading policy, in which case they
+    are evaluated on the post-trade weights :math:`w_t + z_t`. The mathematical
+    form is the same (see the discussion at pages 11-12 of the book).
+    
+    This particular implementation represents the following objective terms
+    (expressed here in terms of the post-trade dollar positions):
+    
+    .. math::
+ 
+        s^T_t {(h^+_t)}_- + l^T_t {(h^+_t)}_+ - d^T_t h^+_t
+    
+    where :math:`s_t` are the (short) borrowing fees, 
+    :math:`l_t` are the fees on long positions,
+    and :math:`d_t` are dividend rates (their sign is flipped because
+    the costs are deducted from the cash account at each period). See
+    below for their precise definition.
+    
+    Example usage as simulator cost:
+    
+    .. code-block:: python
+    
+        borrow_fees = pd.Series([5, 10], index=['AAPL', 'ZM'])
+        simulator = cvx.MarketSimulator(['AAPL', 'ZM'], 
+            costs=cvx.HoldingCost(short_fees=borrow_fees))
+    
+    Example usage as trading policy cost:
+    
+    .. code-block:: python
+        
+        objective = cvx.ReturnsForecast() - 5 * cvx.FullCovariance() \
+            - cvx.HoldingCost(short_fees=10)
+        constraints = [cvx.LeverageLimit(3)]    
+        policy = cvx.SinglePeriodOptimization(objective, constraints)
+    
+    :param short_fees: Short borrowing fees expressed as annual percentage;
+        you can provide them as a float (constant for all times and all 
+        assets), a :class:`pd.Series` indexed by time (constant for all
+        assets but varying in time) or by assets' names (constant in time
+        but varying across assets), or a :class:`pd.DataFrame` indexed by 
+        time and whose columns are the assets' names, if varying both
+        in time and across assets. If you use a time-indexed pandas object
+        be careful to include all times at which a backtest is evaluated
+        (otherwise you'll get a :class:`MissingValueError` exception). If `None`,
+        the term is ignored.
     :type short_fees: float, pd.Series, pd.DataFrame or None
-    :param long_fees:
+    :param long_fees: Fees on long positions expressed as annual percentage;
+        same convention as above applies.
     :type long_fees: float, pd.Series, pd.DataFrame or None
-    :param periods_per_year:
+    :param dividends: Dividend rates per period. Dividends are already included in the market
+        returns by the default data interface (based on Yahoo Finance "adjusted prices")
+        and thus this parameter should not be used in normal circumstances.
+    :type dividends: float, pd.Series, pd.DataFrame or None
+    :param periods_per_year: How many trading periods are there in a year, for
+        example 252 (for trading days in the US). This is
+        only relevant when using this class as part of a trading policy. If you leave
+        this to `None` the following happens.
+        The value of periods per year are estimated at each period by looking at 
+        the past market returns at that point in time: the number of past periods is divided by 
+        the timestamp of the most recent period minus the timestamp of the
+        first period (in years). That works well in most cases where there is enough history
+        (say, a few years) and saves the user from having to manually enter this.
+        If instead you use this object as a cost in a market simulator the parameter has no
+        effect. (The length of each trading period in the simulation is known and so the per-period
+        rates are evaluated exactly. For example, the rate over a weekend will be higher
+        than overnight.)
     :type periods_per_year: float or None
     """
 
@@ -334,26 +399,31 @@ def _simulate(self, t, h_plus, current_and_past_returns, t_next, **kwargs):
 
 
 class StocksHoldingCost(HoldingCost):
-    """A model for holding cost of stocks.
-
-    :param short_fees:
-    :type short_fees: float, pd.Series, pd.DataFrame or None
-    :param short_fees:
-    :type short_fees:  float, pd.Series, pd.DataFrame or None
-    :param dividends:
+    """A simplified version of :class:`HoldingCost` for the holding cost of stocks.
+    
+    This implements the simple model describe at page 11 of the book, *i.e.*
+    the cost (in terms of the post-trade dollar positions):
+    
+    .. math::
+ 
+        s^T_t {(h^+_t)}_-
+    
+    This class is a specialized version of :class:`HoldingCost`, and you should
+    read its documentation for all details. Here we
+    drop most of the parameters and use the default values explained above.
+    We use a default value of :math:`5\%` annualized borrowing
+    fee which is a rough (optimistic) approximation of the cost
+    of shorting liquid US stocks. This cost is included **by default**
+    in :class:`StockMarketSimulator`, the market simulator specialized
+    to US (liquid) stocks.
+
+    :param short_fees: Same as in :class:`HoldingCost`.
     :type short_fees: float, pd.Series, pd.DataFrame or None
-    :param periods_per_year:
-    :type periods_per_year: float or None
     """
 
-    def __init__(self, short_fees=5, long_fees=None, dividends=None, 
-        periods_per_year=None):
+    def __init__(self, short_fees=5):
 
-        super().__init__(
-            short_fees=short_fees,
-            long_fees=long_fees,
-            dividends=dividends,
-            periods_per_year=periods_per_year)
+        super().__init__(short_fees=short_fees)
 
 
 class TransactionCost(BaseCost):
diff --git a/cvxportfolio/tests/test_costs.py b/cvxportfolio/tests/test_costs.py
index b1f86b53c..c807df9f9 100644
--- a/cvxportfolio/tests/test_costs.py
+++ b/cvxportfolio/tests/test_costs.py
@@ -100,8 +100,7 @@ def test_hcost(self):
         dividends = pd.Series(np.random.randn(self.N-1),
                               self.returns.columns[:-1])
         dividends *= 0.
-        hcost = StocksHoldingCost(short_fees=5,
-                                  dividends=dividends)
+        hcost = HoldingCost(short_fees=5, dividends=dividends)
 
         t = 100  # this is picked so that periods_per_year evaluates to 252
         hcost._recursive_pre_evaluation(
diff --git a/cvxportfolio/tests/test_simulator.py b/cvxportfolio/tests/test_simulator.py
index 352f0b8ec..45ea6cf61 100644
--- a/cvxportfolio/tests/test_simulator.py
+++ b/cvxportfolio/tests/test_simulator.py
@@ -244,7 +244,7 @@ def test_prepare_data(self):
 
 
     def test_holding_cost(self):
-        """Test the simulator interface of cvx.StocksHoldingCost."""
+        """Test the simulator interface of cvx.HoldingCost."""
 
         t = self.returns.index[-20]
 
@@ -262,7 +262,7 @@ def test_holding_cost(self):
 
             dividends = np.random.uniform(size=len(h_plus)-1) * 1E-4
 
-            hcost = cvx.StocksHoldingCost(short_fees=5, dividends=dividends)
+            hcost = cvx.HoldingCost(short_fees=5, dividends=dividends)
 
             sim_hcost = hcost._simulate(
                 t=t, h_plus=h_plus, current_and_past_returns=current_and_past_returns, t_next=t + pd.Timedelta('1d'))
diff --git a/docs/returns.rst b/docs/returns.rst
index 616deab70..834b12f37 100644
--- a/docs/returns.rst
+++ b/docs/returns.rst
@@ -50,8 +50,12 @@ Cost models
 
 .. py:module:: cvxportfolio
 
+.. autoclass:: HoldingCost
+
 .. autoclass:: StocksHoldingCost
 
+.. autoclass:: TransactionCost
+
 .. autoclass:: StocksTransactionCost
 
 

From 469951e4d7d5f9d1f5523baef66bcda639908cac Mon Sep 17 00:00:00 2001
From: Enzo Busseti <enzo.busseti@me.com>
Date: Tue, 5 Sep 2023 02:27:14 +0400
Subject: [PATCH 7/8] fixed GH issue #106 by adding _universe_subselect method
 to DataEstimator

---
 cvxportfolio/benchmark.py            |   4 +-
 cvxportfolio/constraints.py          |   6 +-
 cvxportfolio/costs.py                |   2 +-
 cvxportfolio/data.py                 |   1 +
 cvxportfolio/estimator.py            | 131 ++++++++++++++++++++-------
 cvxportfolio/risks.py                |   2 +-
 cvxportfolio/tests/test_estimator.py |   5 +-
 7 files changed, 112 insertions(+), 39 deletions(-)

diff --git a/cvxportfolio/benchmark.py b/cvxportfolio/benchmark.py
index 64929f47a..a4624d7e7 100644
--- a/cvxportfolio/benchmark.py
+++ b/cvxportfolio/benchmark.py
@@ -39,7 +39,9 @@ class Benchmark(BaseBenchmark, DataEstimator):
     """
 
     def __init__(self, benchmark_weights):
-        DataEstimator.__init__(self, benchmark_weights)
+        DataEstimator.__init__(self, 
+            benchmark_weights, 
+            data_includes_cash=True)
 
 
 class CashBenchmark(BaseBenchmark):
diff --git a/cvxportfolio/constraints.py b/cvxportfolio/constraints.py
index a814e3dbd..a6d310221 100644
--- a/cvxportfolio/constraints.py
+++ b/cvxportfolio/constraints.py
@@ -442,7 +442,8 @@ class FactorMaxLimit(BaseWeightConstraint, InequalityConstraint):
     def __init__(self, factor_exposure, limit):
         self.factor_exposure = DataEstimator(
             factor_exposure, compile_parameter=True)
-        self.limit = DataEstimator(limit, compile_parameter=True)
+        self.limit = DataEstimator(limit, compile_parameter=True, 
+            ignore_shape_check=True)
 
     def _compile_constr_to_cvxpy(self, w_plus, z, w_plus_minus_w_bm):
         "Compile left hand side of the constraint expression."
@@ -478,7 +479,8 @@ class FactorMinLimit(BaseWeightConstraint, InequalityConstraint):
     def __init__(self, factor_exposure, limit):
         self.factor_exposure = DataEstimator(
             factor_exposure, compile_parameter=True)
-        self.limit = DataEstimator(limit, compile_parameter=True)
+        self.limit = DataEstimator(limit, compile_parameter=True,
+            ignore_shape_check=True)
 
     def _compile_constr_to_cvxpy(self, w_plus, z, w_plus_minus_w_bm):
         "Compile left hand side of the constraint expression."
diff --git a/cvxportfolio/costs.py b/cvxportfolio/costs.py
index 22e6fd076..b0b858c90 100644
--- a/cvxportfolio/costs.py
+++ b/cvxportfolio/costs.py
@@ -380,7 +380,7 @@ def _simulate(self, t, h_plus, current_and_past_returns, t_next, **kwargs):
         # TODO this is a temporary fix, we should plug this into a recursive tree
         for est in [self.short_fees, self.long_fees, self.dividends]:
             if not (est is None):
-                est._recursive_pre_evaluation(universe=h_plus.index[:-1], backtest_times=[t])
+                est._recursive_pre_evaluation(universe=h_plus.index, backtest_times=[t])
                 est._recursive_values_in_time(t=t)
 
         if not (self.short_fees is None):
diff --git a/cvxportfolio/data.py b/cvxportfolio/data.py
index 3ba3727a6..a6e6b5fab 100644
--- a/cvxportfolio/data.py
+++ b/cvxportfolio/data.py
@@ -560,6 +560,7 @@ def __init__(
 
         self.base_location = base_location
         self.use_last_available_time = use_last_available_time
+        self.universe_maybe_noncash = None # fix, but we should retire this class
 
     def _recursive_pre_evaluation(self, *args, **kwargs):
         self.data = self.update_and_load(self.symbol)
diff --git a/cvxportfolio/estimator.py b/cvxportfolio/estimator.py
index 413859297..f4fad23ba 100644
--- a/cvxportfolio/estimator.py
+++ b/cvxportfolio/estimator.py
@@ -146,30 +146,35 @@ class DataEstimator(PolicyEstimator):
     by its `_recursive_values_in_time` method, which is the way `cvxportfolio`
     objects use this class to get data.
 
-    Args:
-        data (object, pandas.Series, pandas.DataFrame): Data expressed
-            preferably as pandas Series or DataFrame where the first
-            index is a pandas.DateTimeIndex. Otherwise you can
-            pass a callable object which implements the _recursive_values_in_time method
-            (with the standard signature) and returns the corresponding value in time,
-             or a constant float, numpy.array, or even pandas Series or DataFrame not
-            indexed by time (e.g., a covariance matrix where both index and columns
-            are the stock symbols).
-        use_last_available_time (bool): if the pandas index exists
-            and is a pandas.DateTimeIndex you can instruct self._recursive_values_in_time
-            to retrieve the last available value at time t by setting
-            this to True. Default is False.
-
+    :param data: Data expressed preferably as pandas Series or DataFrame 
+        where the first index is a pandas.DateTimeIndex. Otherwise you can
+        pass a callable object which implements the _recursive_values_in_time method
+        (with the standard signature) and returns the corresponding value in time,
+        or a constant float, numpy.array, or even pandas Series or DataFrame not
+        indexed by time (e.g., a covariance matrix where both index and columns
+        are the stock symbols).
+    :type data: object, pandas.Series, pandas.DataFrame 
+    :param use_last_available_time: if the pandas index exists
+        and is a pandas.DateTimeIndex you can instruct self._recursive_values_in_time
+        to retrieve the last available value at time t by setting
+        this to True. Default is False.
+    :type use_last_available_time: bool 
     """
 
     def __init__(self, data, use_last_available_time=False, allow_nans=False,
-                 compile_parameter=False, non_negative=False, positive_semi_definite=False):
+                 compile_parameter=False, non_negative=False, positive_semi_definite=False,
+                 data_includes_cash=False, # affects _universe_subselect
+                 ignore_shape_check=False # affects _universe_subselect
+                 ):
         self.data = data
         self.use_last_available_time = use_last_available_time
         self.allow_nans = allow_nans
         self.compile_parameter = compile_parameter
         self.non_negative = non_negative
         self.positive_semi_definite = positive_semi_definite
+        self.universe_maybe_noncash = None
+        self.data_includes_cash = data_includes_cash
+        self.ignore_shape_check = ignore_shape_check
 
     def _recursive_pre_evaluation(self, universe, backtest_times):
         # super()._recursive_pre_evaluation(universe, backtest_times)
@@ -177,7 +182,9 @@ def _recursive_pre_evaluation(self, universe, backtest_times):
             value = self.internal__recursive_values_in_time(
                 t=backtest_times[0])
             self.parameter = cp.Parameter(value.shape if hasattr(value, "shape") else (),
-                                          PSD=self.positive_semi_definite, nonneg=self.non_negative)
+                                          PSD=self.positive_semi_definite, nonneg=self.non_negative)          
+        
+        self.universe_maybe_noncash = universe if self.data_includes_cash else universe[:-1]
 
     def value_checker(self, result):
         """Ensure that only scalars or arrays without np.nan are returned.
@@ -215,50 +222,110 @@ def value_checker(self, result):
         raise DataError(
             f"{self.__class__.__name__}._recursive_values_in_time result is not a scalar or array."
         )
+        
+    def _universe_subselect(self, data):
+        """This function subselects from ``data`` the relevant universe.
+        
+        See github issue #106.
+        
+        If data is a pandas Series we subselect its index. If we fail
+        we throw an error. If data is a pandas DataFrame (covariance, exposure matrix) 
+        we try to subselect its index and columns. If we fail on either
+        we ignore the failure, but if we fail on both we throw an error.
+        If data is a numpy 1-d array we check that its length is the same as the 
+        universe's.
+        If it is a 2-d array we check that at least one dimension is the
+        same as the universe's.
+        If the universe is None we skip all checks. (We may revisit this choice.) This only happens
+        if the DataEstimator instance is not part of a PolicyEstimator tree 
+        (a usecase which we will probably drop).
+        """
+        
+        if (self.universe_maybe_noncash is None) or self.ignore_shape_check:
+            return data
+        
+        if isinstance(data, pd.Series):
+            try:
+                return data.loc[self.universe_maybe_noncash]
+            except KeyError:
+                raise MissingValuesError(
+                f"The pandas Series found by {self.__class__.__name__} has index {self.data.index}"
+                f" while the current universe (minus cash) is {self.universe_maybe_noncash}."
+                " It was not possibly to reconcile the two.")
+        
+        if isinstance(data, pd.DataFrame):
+            try:
+                return data.loc[self.universe_maybe_noncash, self.universe_maybe_noncash]
+            except KeyError:
+                try:
+                    return data.loc[:, self.universe_maybe_noncash]
+                except KeyError:
+                    try:
+                        return data.loc[self.universe_maybe_noncash, :]
+                    except KeyError:
+                        pass
+            raise MissingValuesError(
+                f"The pandas DataFrame found by {self.__class__.__name__} has index {self.data.index}"
+                f" and columns {self.data.columns}"
+                f" while the current universe (minus cash) is {self.universe_maybe_noncash}."
+                " It was not possibly to reconcile the two.")
+        
+        if isinstance(data, np.ndarray):
+            dimensions = data.shape
+            if not len(self.universe_maybe_noncash) in dimensions:
+                raise MissingValuesError(
+                    f"The numpy array found by {self.__class__.__name__} has dimensions {self.data.shape}"
+                    f" while the current universe (minus cash) has size {len(self.universe_maybe_noncash)}.")
+            return data
+        
+        # scalar
+        return data
+
+                            
 
     def internal__recursive_values_in_time(self, t, *args, **kwargs):
         """Internal method called by `self._recursive_values_in_time`."""
 
+        # if self.data has values_in_time we use it
         if hasattr(self.data, "values_in_time"):
-            _ = self.data.values_in_time(t=t, *args, **kwargs)
-            if hasattr(_, 'values'):
-                return self.value_checker(_.values)
+            tmp = self.data.values_in_time(t=t, *args, **kwargs)
+            tmp = self._universe_subselect(tmp)
+            if hasattr(tmp, 'values'):
+                return self.value_checker(tmp.values)
             else:
-                return self.value_checker(_)
+                return self.value_checker(tmp)
 
+        # if self.data is pandas and has datetime (first) index
         if (hasattr(self.data, "loc") and hasattr(self.data, "index")
             and (isinstance(self.data.index, pd.DatetimeIndex)
-                 or (
-                isinstance(self.data.index, pd.MultiIndex)
-                and isinstance(self.data.index.levels[0], pd.DatetimeIndex)
-            )
-        )
-        ):
+                 or (isinstance(self.data.index, pd.MultiIndex) and 
+                     isinstance(self.data.index.levels[0], pd.DatetimeIndex)))):
             try:
                 if self.use_last_available_time:
                     if isinstance(self.data.index, pd.MultiIndex):
                         newt = self.data.index.levels[0][
-                            self.data.index.levels[0] <= t
-                        ][-1]
+                            self.data.index.levels[0] <= t][-1]
                     else:
                         newt = self.data.index[self.data.index <= t][-1]
                     tmp = self.data.loc[newt]
                 else:
                     tmp = self.data.loc[t]
                 if hasattr(tmp, "values"):
-                    return self.value_checker(tmp.values)
+                    return self.value_checker(self._universe_subselect(tmp.values))
                 else:
-                    return self.value_checker(tmp)
+                    return self.value_checker(self._universe_subselect(tmp))
 
             except (KeyError, IndexError):
                 raise MissingValuesError(
                     f"{self.__class__.__name__}._recursive_values_in_time could not find data for requested time."
                 )
 
+        # if data is pandas but no datetime index (constant in time)
         if hasattr(self.data, "values"):
-            return self.value_checker(self.data.values)
+            return self.value_checker(self._universe_subselect(self.data.values))
 
-        return self.value_checker(self.data)
+        # if data is scalar or numpy
+        return self.value_checker(self._universe_subselect(self.data))
 
     def _recursive_values_in_time(self, t, *args, **kwargs):
         """Obtain value of `self.data` at time t or right before.
diff --git a/cvxportfolio/risks.py b/cvxportfolio/risks.py
index 77b83cc3c..b38c5d701 100644
--- a/cvxportfolio/risks.py
+++ b/cvxportfolio/risks.py
@@ -281,7 +281,7 @@ class FactorModelCovariance(BaseRiskModel):
     def __init__(self, F=None, d=None, Sigma_F=None, num_factors=1, kelly=True):
         self.F = F if F is None else DataEstimator(F, compile_parameter=True)
         self.d = d if d is None else DataEstimator(d)
-        self.Sigma_F = Sigma_F if Sigma_F is None else DataEstimator(Sigma_F)
+        self.Sigma_F = Sigma_F if Sigma_F is None else DataEstimator(Sigma_F, ignore_shape_check=True)
         if (self.F is None) or (self.d is None):
             self._fit = True
             self.Sigma = HistoricalFactorizedCovariance(kelly=kelly)
diff --git a/cvxportfolio/tests/test_estimator.py b/cvxportfolio/tests/test_estimator.py
index acfa3698c..254595580 100644
--- a/cvxportfolio/tests/test_estimator.py
+++ b/cvxportfolio/tests/test_estimator.py
@@ -165,10 +165,11 @@ def test_parameter_estimator(self):
         second_level = ["hello", "ciao", "hola"]
         index = pd.MultiIndex.from_product([timeindex, second_level])
         data = pd.DataFrame(np.random.randn(len(index), 10), index=index)
-        estimator = DataEstimator(data, compile_parameter=True)
+        estimator = DataEstimator(data, compile_parameter=True, 
+            data_includes_cash=True)
         self.assertTrue(not hasattr(estimator, "parameter"))
         estimator._recursive_pre_evaluation(
-            universe=None, backtest_times=timeindex)
+            universe=data.columns, backtest_times=timeindex)
         # assert hasattr(estimator, 'parameter')
         self.assertTrue(hasattr(estimator, "parameter"))
         estimator._recursive_values_in_time("2022-01-05")

From 85c3c6b17a256da7ac878930b72399720aa11786 Mon Sep 17 00:00:00 2001
From: Enzo Busseti <enzo.busseti@me.com>
Date: Tue, 5 Sep 2023 02:33:54 +0400
Subject: [PATCH 8/8] minor

---
 cvxportfolio/estimator.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/cvxportfolio/estimator.py b/cvxportfolio/estimator.py
index f4fad23ba..3302c3e7b 100644
--- a/cvxportfolio/estimator.py
+++ b/cvxportfolio/estimator.py
@@ -250,8 +250,8 @@ def _universe_subselect(self, data):
             except KeyError:
                 raise MissingValuesError(
                 f"The pandas Series found by {self.__class__.__name__} has index {self.data.index}"
-                f" while the current universe (minus cash) is {self.universe_maybe_noncash}."
-                " It was not possibly to reconcile the two.")
+                f" while the current universe {'minus cash' if not self.data_includes_cash else ''}"
+                f" is {self.universe_maybe_noncash}. It was not possibly to reconcile the two.")
         
         if isinstance(data, pd.DataFrame):
             try:
@@ -267,15 +267,16 @@ def _universe_subselect(self, data):
             raise MissingValuesError(
                 f"The pandas DataFrame found by {self.__class__.__name__} has index {self.data.index}"
                 f" and columns {self.data.columns}"
-                f" while the current universe (minus cash) is {self.universe_maybe_noncash}."
-                " It was not possibly to reconcile the two.")
+                f" while the current universe {'minus cash' if not self.data_includes_cash else ''}"
+                f" is {self.universe_maybe_noncash}. It was not possibly to reconcile the two.")
         
         if isinstance(data, np.ndarray):
             dimensions = data.shape
             if not len(self.universe_maybe_noncash) in dimensions:
                 raise MissingValuesError(
                     f"The numpy array found by {self.__class__.__name__} has dimensions {self.data.shape}"
-                    f" while the current universe (minus cash) has size {len(self.universe_maybe_noncash)}.")
+                    f" while the current universe {'minus cash' if not self.data_includes_cash else ''}" 
+                    f" has size {len(self.universe_maybe_noncash)}.")
             return data
         
         # scalar