Added log property to BacktestResult,

which contains all logs produced in the process that ran that back-test. Level and formatting are configurable. Improved logging calls from the rest of the library.
cvxgrp · Dec 28, 2023 · e53bb0e · e53bb0e
1 parent 5f1337c
commit e53bb0e
Show file tree

Hide file tree

Showing 7 changed files with 118 additions and 43 deletions.
diff --git a/cvxportfolio/cache.py b/cvxportfolio/cache.py
@@ -17,6 +17,7 @@
 import os
 import pickle
 
+logger = logging.getLogger(__name__)
 
 def _mp_init(l):
     """Shared lock to disk access for multiprocessing."""
@@ -43,42 +44,42 @@ def cache_name(signature, base_location):
 def _load_cache(signature, base_location):
     """Load cache from disk."""
     if signature is None:
-        logging.info('Market data has no signature!')
+        logger.info('Market data has no signature!')
         return {}
     name = cache_name(signature, base_location)
     if 'LOCK' in globals():
-        logging.debug( # pragma: no cover
+        logger.debug( # pragma: no cover
             'Acquiring cache lock from process %s', os.getpid())
         LOCK.acquire() # pragma: no cover
     try:
         with open(name, 'rb') as f:
             res = pickle.load(f)
-            logging.info('Loaded cache %s', name)
+            logger.info('Loaded cache %s', name)
             return res
     except FileNotFoundError:
-        logging.info('Cache not found!')
+        logger.info('Cache not found!')
         return {}
     finally:
         if 'LOCK' in globals():
-            logging.debug( # pragma: no cover
+            logger.debug( # pragma: no cover
                 'Releasing cache lock from process %s', os.getpid())
             LOCK.release() # pragma: no cover
 
 def _store_cache(cache, signature, base_location):
     """Store cache to disk."""
     if signature is None:
-        logging.info('Market data has no signature!')
+        logger.info('Market data has no signature!')
         return
     name = cache_name(signature, base_location)
     if 'LOCK' in globals():
-        logging.debug( # pragma: no cover
+        logger.debug( # pragma: no cover
             'Acquiring cache lock from process %s', os.getpid())
         LOCK.acquire() # pragma: no cover
     name.parent.mkdir(exist_ok=True)
     with open(name, 'wb') as f:
-        logging.info('Storing cache %s', name)
+        logger.info('Storing cache %s', name)
         pickle.dump(cache, f)
     if 'LOCK' in globals():
-        logging.debug( # pragma: no cover
+        logger.debug( # pragma: no cover
             'Releasing cache lock from process %s', os.getpid())
         LOCK.release() # pragma: no cover
diff --git a/cvxportfolio/data.py b/cvxportfolio/data.py
@@ -40,6 +40,8 @@
 __all__ = ["YahooFinance", "Fred",
            "UserProvidedMarketData", "DownloadedMarketData"]
 
+logger = logging.getLogger(__name__)
+
 BASE_LOCATION = Path.home() / "cvxportfolio_data"
 
 def now_timezoned():
@@ -132,7 +134,7 @@ def _load_raw(self):
         # we could implement multiprocess safety here
         loader = globals()['_loader_' + self._storage_backend]
         try:
-            logging.info(
+            logger.info(
                 f"{self.__class__.__name__} is trying to load {self.symbol}"
                 + f" with {self._storage_backend} backend"
                 + f" from {self.storage_location}")
@@ -156,7 +158,7 @@ def _store(self, data):
         """
         # we could implement multiprocess safety here
         storer = globals()['_storer_' + self._storage_backend]
-        logging.info(
+        logger.info(
             f"{self.__class__.__name__} is storing {self.symbol}"
             + f" with {self._storage_backend} backend"
             + f" in {self.storage_location}")
@@ -178,14 +180,14 @@ def update(self, grace_period):
         :type grace_period: pandas.Timedelta
         """
         current = self._load_raw()
-        logging.info(
+        logger.info(
             f"Downloading {self.symbol}"
             + f" from {self.__class__.__name__}")
         updated = self._download(
             self.symbol, current, grace_period=grace_period)
 
         if np.any(updated.iloc[:-1].isnull()):
-            logging.warning(
+            logger.warning(
               " cvxportfolio.%s('%s').data contains NaNs."
               + " You may want to inspect it. If you want, you can delete the"
               + " data file in %s to force re-download from the start.",
@@ -199,25 +201,25 @@ def update(self, grace_period):
                         np.isclose(updated.loc[current.index[:-1]],
                             current.iloc[:-1], equal_nan=True,
                             rtol=1e-08, atol=1e-08)):
-                    logging.error(f"{self.__class__.__name__} update"
+                    logger.error(f"{self.__class__.__name__} update"
                         + f" of {self.symbol} is not append-only!")
                     self._print_difference(current, updated)
                 if hasattr(current, 'columns'):
                     # the first column is open price
                     if not current.iloc[-1, 0] == updated.loc[
                             current.index[-1]].iloc[0]:
-                        logging.error(
+                        logger.error(
                             f"{self.__class__.__name__} update "
                             + f" of {self.symbol} changed last open price!")
                         self._print_difference(current, updated)
                 else:
                     if not current.iloc[-1] == updated.loc[current.index[-1]]:
-                        logging.error(
+                        logger.error(
                             f"{self.__class__.__name__} update"
                             + f" of {self.symbol} changed last value!")
                         self._print_difference(current, updated)
         except KeyError:
-            logging.error("%s update of %s could not be checked for"
+            logger.error("%s update of %s could not be checked for"
                 + " append-only edits. Was there a DST change?",
                 self.__class__.__name__, self.symbol)
         self._store(updated)
@@ -327,7 +329,7 @@ def _clean(data):
         # fill open price with close from day(s) before
         # repeat as long as it helps (up to 1 year)
         for shifter in range(252):
-            logging.info(
+            logger.info(
                 "Filling opens with close from %s days before", shifter)
             orig_missing_opens = data['open'].isnull().sum()
             data['open'] = data['open'].fillna(data['close'].shift(
@@ -481,15 +483,15 @@ def _download(self, symbol, current=None,
                 + ' could have issues with DST.')
         if (current is None) or (len(current) < overlap):
             updated = self._get_data_yahoo(symbol, **kwargs)
-            logging.info('Downloading from the start.')
+            logger.info('Downloading from the start.')
             result = self._clean(updated)
             # we remove first row if it contains NaNs
             if np.any(result.iloc[0].isnull()):
                 result = result.iloc[1:]
             return result
         if (now_timezoned() - current.index[-1]
                 ) < pd.Timedelta(grace_period):
-            logging.info(
+            logger.info(
                 'Skipping download because stored data is recent enough.')
             return current
         new = self._get_data_yahoo(symbol, start=current.index[-overlap])
@@ -564,15 +566,15 @@ def _download(
             return self._internal_download(symbol)
         if (pd.Timestamp.today() - current.index[-1]
             ) < pd.Timedelta(grace_period):
-            logging.info(
+            logger.info(
                 'Skipping download because stored data is recent enough.')
             return current
 
         new = self._internal_download(symbol)
         new = new.loc[new.index > current.index[-1]]
 
         if new.empty:
-            logging.info('New downloaded data is empty!')
+            logger.info('New downloaded data is empty!')
             return current
 
         assert new.index[0] > current.index[-1]
@@ -833,7 +835,7 @@ def __init__(
     def _mask_dataframes(self, mask):
         """Mask internal dataframes if necessary."""
         if (self._mask is None) or not np.all(self._mask == mask):
-            logging.info("Masking internal %s dataframes.",
+            logger.info("Masking internal %s dataframes.",
                 self.__class__.__name__)
             colmask = self.returns.columns[mask]
             # self._masked_returns = self._df_or_ser_set_read_only(
@@ -1307,7 +1309,7 @@ def _get_market_data(self, universe, grace_period, storage_backend):
         sys.stdout.flush()
 
         for stock in universe:
-            logging.info(
+            logger.info(
                 'Updating %s with %s.', stock, self.datasource.__name__)
             print('.', end='')
             sys.stdout.flush()
@@ -1342,10 +1344,10 @@ def _remove_missing_recent(self):
         """
 
         if self.prices.iloc[-5:].isnull().any().any():
-            logging.debug(
+            logger.debug(
                 'Removing some recent lines because there are missing values.')
             drop_at = self.prices.iloc[-5:].isnull().any(axis=1).idxmax()
-            logging.debug('Dropping at index %s', drop_at)
+            logger.debug('Dropping at index %s', drop_at)
             self.returns = self.returns.loc[self.returns.index < drop_at]
             if self.prices is not None:
                 self.prices = self.prices.loc[self.prices.index < drop_at]

diff --git a/cvxportfolio/forecast.py b/cvxportfolio/forecast.py
@@ -35,6 +35,7 @@
 from .errors import ForecastError
 from .estimator import Estimator
 
+logger = logging.getLogger(__name__)
 
 def online_cache(values_in_time):
     """A simple online cache that decorates values_in_time.
@@ -74,13 +75,13 @@ def wrapped(self, t, cache=None, **kwargs):
             cache[self] = {}
 
         if t in cache[self]:
-            logging.debug(
+            logger.debug(
                 '%s.values_in_time at time %s is retrieved from cache.',
                 self, t)
             result = cache[self][t]
         else:
             result = values_in_time(self, t=t, cache=cache, **kwargs)
-            logging.debug('%s.values_in_time at time %s is stored in cache.',
+            logger.debug('%s.values_in_time at time %s is stored in cache.',
                 self, t)
             cache[self][t] = result
         return result
@@ -97,12 +98,12 @@ def _agnostic_update(self, t, past_returns):
         """Choose whether to make forecast from scratch or update last one."""
         if (self._last_time is None) or (
             self._last_time != past_returns.index[-1]):
-            logging.debug(
+            logger.debug(
                 '%s.values_in_time at time %s is computed from scratch.',
                 self, t)
             self._initial_compute(t=t, past_returns=past_returns)
         else:
-            logging.debug(
+            logger.debug(
               '%s.values_in_time at time %s is updated from previous value.',
               self, t)
             self._online_update(t=t, past_returns=past_returns)

diff --git a/cvxportfolio/policies.py b/cvxportfolio/policies.py
@@ -46,6 +46,8 @@
     "MultiPeriodOpt",
 ]
 
+logger = logging.getLogger(__name__)
+
 class Policy(Estimator):
     """Base trading policy class, defines execute method."""
 
@@ -361,7 +363,7 @@ def values_in_time_recursive(self, t, current_weights, **kwargs):
             result = current_weights + pd.Series(
                 self.trades_weights.current_value, current_weights.index)
         except MissingTimesError:
-            logging.info("%s didn't trade at time %s because it couldn't find"
+            logger.info("%s didn't trade at time %s because it couldn't find"
                 + " trade weights among the provided ones.",
                 self.__class__.__name__, t)
             result = current_weights
@@ -413,7 +415,7 @@ def values_in_time_recursive(self, t, current_weights, **kwargs):
                 self.target_weights.current_value, current_weights.index
                 ) - current_weights
         except MissingTimesError:
-            logging.info("%s didn't trade at time %s because it couldn't find"
+            logger.info("%s didn't trade at time %s because it couldn't find"
                 + " target weights among the provided ones.",
                 self.__class__.__name__, t)
             result = current_weights