diff --git a/cvxportfolio/cache.py b/cvxportfolio/cache.py index 243771ccc..3920c4348 100644 --- a/cvxportfolio/cache.py +++ b/cvxportfolio/cache.py @@ -17,6 +17,7 @@ import os import pickle +logger = logging.getLogger(__name__) def _mp_init(l): """Shared lock to disk access for multiprocessing.""" @@ -43,42 +44,42 @@ def cache_name(signature, base_location): def _load_cache(signature, base_location): """Load cache from disk.""" if signature is None: - logging.info('Market data has no signature!') + logger.info('Market data has no signature!') return {} name = cache_name(signature, base_location) if 'LOCK' in globals(): - logging.debug( # pragma: no cover + logger.debug( # pragma: no cover 'Acquiring cache lock from process %s', os.getpid()) LOCK.acquire() # pragma: no cover try: with open(name, 'rb') as f: res = pickle.load(f) - logging.info('Loaded cache %s', name) + logger.info('Loaded cache %s', name) return res except FileNotFoundError: - logging.info('Cache not found!') + logger.info('Cache not found!') return {} finally: if 'LOCK' in globals(): - logging.debug( # pragma: no cover + logger.debug( # pragma: no cover 'Releasing cache lock from process %s', os.getpid()) LOCK.release() # pragma: no cover def _store_cache(cache, signature, base_location): """Store cache to disk.""" if signature is None: - logging.info('Market data has no signature!') + logger.info('Market data has no signature!') return name = cache_name(signature, base_location) if 'LOCK' in globals(): - logging.debug( # pragma: no cover + logger.debug( # pragma: no cover 'Acquiring cache lock from process %s', os.getpid()) LOCK.acquire() # pragma: no cover name.parent.mkdir(exist_ok=True) with open(name, 'wb') as f: - logging.info('Storing cache %s', name) + logger.info('Storing cache %s', name) pickle.dump(cache, f) if 'LOCK' in globals(): - logging.debug( # pragma: no cover + logger.debug( # pragma: no cover 'Releasing cache lock from process %s', os.getpid()) LOCK.release() # pragma: no cover diff --git a/cvxportfolio/data.py b/cvxportfolio/data.py index 11106d411..2367d365d 100644 --- a/cvxportfolio/data.py +++ b/cvxportfolio/data.py @@ -40,6 +40,8 @@ __all__ = ["YahooFinance", "Fred", "UserProvidedMarketData", "DownloadedMarketData"] +logger = logging.getLogger(__name__) + BASE_LOCATION = Path.home() / "cvxportfolio_data" def now_timezoned(): @@ -132,7 +134,7 @@ def _load_raw(self): # we could implement multiprocess safety here loader = globals()['_loader_' + self._storage_backend] try: - logging.info( + logger.info( f"{self.__class__.__name__} is trying to load {self.symbol}" + f" with {self._storage_backend} backend" + f" from {self.storage_location}") @@ -156,7 +158,7 @@ def _store(self, data): """ # we could implement multiprocess safety here storer = globals()['_storer_' + self._storage_backend] - logging.info( + logger.info( f"{self.__class__.__name__} is storing {self.symbol}" + f" with {self._storage_backend} backend" + f" in {self.storage_location}") @@ -178,14 +180,14 @@ def update(self, grace_period): :type grace_period: pandas.Timedelta """ current = self._load_raw() - logging.info( + logger.info( f"Downloading {self.symbol}" + f" from {self.__class__.__name__}") updated = self._download( self.symbol, current, grace_period=grace_period) if np.any(updated.iloc[:-1].isnull()): - logging.warning( + logger.warning( " cvxportfolio.%s('%s').data contains NaNs." + " You may want to inspect it. If you want, you can delete the" + " data file in %s to force re-download from the start.", @@ -199,25 +201,25 @@ def update(self, grace_period): np.isclose(updated.loc[current.index[:-1]], current.iloc[:-1], equal_nan=True, rtol=1e-08, atol=1e-08)): - logging.error(f"{self.__class__.__name__} update" + logger.error(f"{self.__class__.__name__} update" + f" of {self.symbol} is not append-only!") self._print_difference(current, updated) if hasattr(current, 'columns'): # the first column is open price if not current.iloc[-1, 0] == updated.loc[ current.index[-1]].iloc[0]: - logging.error( + logger.error( f"{self.__class__.__name__} update " + f" of {self.symbol} changed last open price!") self._print_difference(current, updated) else: if not current.iloc[-1] == updated.loc[current.index[-1]]: - logging.error( + logger.error( f"{self.__class__.__name__} update" + f" of {self.symbol} changed last value!") self._print_difference(current, updated) except KeyError: - logging.error("%s update of %s could not be checked for" + logger.error("%s update of %s could not be checked for" + " append-only edits. Was there a DST change?", self.__class__.__name__, self.symbol) self._store(updated) @@ -327,7 +329,7 @@ def _clean(data): # fill open price with close from day(s) before # repeat as long as it helps (up to 1 year) for shifter in range(252): - logging.info( + logger.info( "Filling opens with close from %s days before", shifter) orig_missing_opens = data['open'].isnull().sum() data['open'] = data['open'].fillna(data['close'].shift( @@ -481,7 +483,7 @@ def _download(self, symbol, current=None, + ' could have issues with DST.') if (current is None) or (len(current) < overlap): updated = self._get_data_yahoo(symbol, **kwargs) - logging.info('Downloading from the start.') + logger.info('Downloading from the start.') result = self._clean(updated) # we remove first row if it contains NaNs if np.any(result.iloc[0].isnull()): @@ -489,7 +491,7 @@ def _download(self, symbol, current=None, return result if (now_timezoned() - current.index[-1] ) < pd.Timedelta(grace_period): - logging.info( + logger.info( 'Skipping download because stored data is recent enough.') return current new = self._get_data_yahoo(symbol, start=current.index[-overlap]) @@ -564,7 +566,7 @@ def _download( return self._internal_download(symbol) if (pd.Timestamp.today() - current.index[-1] ) < pd.Timedelta(grace_period): - logging.info( + logger.info( 'Skipping download because stored data is recent enough.') return current @@ -572,7 +574,7 @@ def _download( new = new.loc[new.index > current.index[-1]] if new.empty: - logging.info('New downloaded data is empty!') + logger.info('New downloaded data is empty!') return current assert new.index[0] > current.index[-1] @@ -833,7 +835,7 @@ def __init__( def _mask_dataframes(self, mask): """Mask internal dataframes if necessary.""" if (self._mask is None) or not np.all(self._mask == mask): - logging.info("Masking internal %s dataframes.", + logger.info("Masking internal %s dataframes.", self.__class__.__name__) colmask = self.returns.columns[mask] # self._masked_returns = self._df_or_ser_set_read_only( @@ -1307,7 +1309,7 @@ def _get_market_data(self, universe, grace_period, storage_backend): sys.stdout.flush() for stock in universe: - logging.info( + logger.info( 'Updating %s with %s.', stock, self.datasource.__name__) print('.', end='') sys.stdout.flush() @@ -1342,10 +1344,10 @@ def _remove_missing_recent(self): """ if self.prices.iloc[-5:].isnull().any().any(): - logging.debug( + logger.debug( 'Removing some recent lines because there are missing values.') drop_at = self.prices.iloc[-5:].isnull().any(axis=1).idxmax() - logging.debug('Dropping at index %s', drop_at) + logger.debug('Dropping at index %s', drop_at) self.returns = self.returns.loc[self.returns.index < drop_at] if self.prices is not None: self.prices = self.prices.loc[self.prices.index < drop_at] diff --git a/cvxportfolio/forecast.py b/cvxportfolio/forecast.py index 263aa23d8..03a86e7fd 100644 --- a/cvxportfolio/forecast.py +++ b/cvxportfolio/forecast.py @@ -35,6 +35,7 @@ from .errors import ForecastError from .estimator import Estimator +logger = logging.getLogger(__name__) def online_cache(values_in_time): """A simple online cache that decorates values_in_time. @@ -74,13 +75,13 @@ def wrapped(self, t, cache=None, **kwargs): cache[self] = {} if t in cache[self]: - logging.debug( + logger.debug( '%s.values_in_time at time %s is retrieved from cache.', self, t) result = cache[self][t] else: result = values_in_time(self, t=t, cache=cache, **kwargs) - logging.debug('%s.values_in_time at time %s is stored in cache.', + logger.debug('%s.values_in_time at time %s is stored in cache.', self, t) cache[self][t] = result return result @@ -97,12 +98,12 @@ def _agnostic_update(self, t, past_returns): """Choose whether to make forecast from scratch or update last one.""" if (self._last_time is None) or ( self._last_time != past_returns.index[-1]): - logging.debug( + logger.debug( '%s.values_in_time at time %s is computed from scratch.', self, t) self._initial_compute(t=t, past_returns=past_returns) else: - logging.debug( + logger.debug( '%s.values_in_time at time %s is updated from previous value.', self, t) self._online_update(t=t, past_returns=past_returns) diff --git a/cvxportfolio/policies.py b/cvxportfolio/policies.py index b4c647225..928da5a13 100644 --- a/cvxportfolio/policies.py +++ b/cvxportfolio/policies.py @@ -46,6 +46,8 @@ "MultiPeriodOpt", ] +logger = logging.getLogger(__name__) + class Policy(Estimator): """Base trading policy class, defines execute method.""" @@ -361,7 +363,7 @@ def values_in_time_recursive(self, t, current_weights, **kwargs): result = current_weights + pd.Series( self.trades_weights.current_value, current_weights.index) except MissingTimesError: - logging.info("%s didn't trade at time %s because it couldn't find" + logger.info("%s didn't trade at time %s because it couldn't find" + " trade weights among the provided ones.", self.__class__.__name__, t) result = current_weights @@ -413,7 +415,7 @@ def values_in_time_recursive(self, t, current_weights, **kwargs): self.target_weights.current_value, current_weights.index ) - current_weights except MissingTimesError: - logging.info("%s didn't trade at time %s because it couldn't find" + logger.info("%s didn't trade at time %s because it couldn't find" + " target weights among the provided ones.", self.__class__.__name__, t) result = current_weights diff --git a/cvxportfolio/result.py b/cvxportfolio/result.py index 20ac3d51d..9b9c6c878 100644 --- a/cvxportfolio/result.py +++ b/cvxportfolio/result.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""This module defines :class:`cvxportfolio.BacktestResult`. +"""This module defines :class:`BacktestResult`. This is the object that is returned by the :meth:`cvxportfolio.MarketSimulator.backtest` @@ -22,6 +22,11 @@ :meth:`BacktestResult.plot` method for producing plots and ``__repr__`` magic method, which is invoked when the user prints an instance. + +.. versionadded:: 1.1.0 + The :attr:`BacktestResult.log` property, which returns the logs produced + during the back-test, at level ``INFO`` or higher. It works also for + back-tests run in parallel! """ @@ -29,6 +34,7 @@ import collections import logging +from io import StringIO from typing import Dict import matplotlib.pyplot as plt @@ -39,6 +45,12 @@ __all__ = ['BacktestResult'] +# Module level constants, should be exposed to user (move to configuration.py?) +RECORD_LOGS = 'INFO' +LOG_FORMAT = '| %(asctime)s | %(levelname)s | process:%(process)d | %(pathname)s:%(lineno)s | %(message)s ' + + +logger = logging.getLogger(__name__) # def getFiscalQuarter(dt): # """Convert a time to a fiscal quarter.""" @@ -47,6 +59,7 @@ # return "Q%i %s" % (quarter, year) +# pylint: disable=too-many-public-methods class BacktestResult: """Store the data from a back-test and produce metrics and plots. @@ -76,9 +89,38 @@ def __init__(self, universe, trading_calendar, costs): self._current_universe = pd.Index(universe) self._indexer = np.arange(len(universe), dtype=int) + # record logs + self._log = '' + self._root_logger = logging.getLogger() + + # We modify the root logger to filter at our chosen level (or lower + # if its was lower) and pre-existing handlers (notably the + # stderr one) to filter at the level of the original logger, or theirs + # if higher. We put them back to their initial state at the end. + self._orig_rootlogger_level = self._root_logger.level + self._root_logger.setLevel( + min(logging.getLevelNamesMapping()[RECORD_LOGS], + self._root_logger.level)) + self._orig_loghandlers_levels = [] + for pre_existing_handler in self._root_logger.handlers: + self._orig_loghandlers_levels.append(pre_existing_handler.level) + pre_existing_handler.setLevel( + max(self._orig_rootlogger_level, pre_existing_handler.level)) + + # add stream handler that we use + self._log_stream = StringIO() + self._log_stream_handler = logging.StreamHandler( + stream=self._log_stream) + self._log_stream_handler.setLevel(RECORD_LOGS) + self._root_logger.addHandler(self._log_stream_handler) + + # logs formatting + formatter = logging.Formatter(LOG_FORMAT) + self._log_stream_handler.setFormatter(formatter) + @property def _current_full_universe(self): - """Helper property used by _change_universe. + """Helper property used by ``_change_universe``. :returns: Current full universe (including assets that were seen in the past but have been dropped). @@ -110,7 +152,7 @@ def _change_universe(self, new_universe): # otherwise we lose the ordering :( else: - logging.info( + logger.info( "%s joining new universe with old", self.__class__.__name__) joined = pd.Index( @@ -127,6 +169,7 @@ def _change_universe(self, new_universe): self._current_universe = new_universe self._indexer = self._h.columns.get_indexer(new_universe) + #pylint: disable=too-many-arguments def _log_trading(self, t: pd.Timestamp, h: pd.Series[float], u: pd.Series[float], z: pd.Series[float], costs: Dict[str, float], @@ -168,10 +211,33 @@ def _log_final(self, t, t_next, h, extra_simulator_time): self._cash_returns = self._cash_returns.iloc[:tidx] self._benchmark_returns = self._benchmark_returns.iloc[:tidx] + # logging output + self._log = self._log_stream.getvalue() + + # put back root logger and handlers to initial state + self._root_logger.setLevel(self._orig_rootlogger_level) + self._root_logger.removeHandler(self._log_stream_handler) + for i, handler in enumerate(self._root_logger.handlers): + handler.setLevel(self._orig_loghandlers_levels[i]) + + # delete logging helpers (unnecessary?) + del self._log_stream_handler + self._log_stream.close() + del self._log_stream + # # General backtest information # + @property + def log(self): + """Logs from the policy, simulator, market data server, .... + + :return: Logs produced during the back-test, newline separated. + :rtype: str + """ + return self._log + @property def policy_times(self): """The computation time of the policy object at each period. diff --git a/cvxportfolio/simulator.py b/cvxportfolio/simulator.py index 11feb0a42..0b630788e 100644 --- a/cvxportfolio/simulator.py +++ b/cvxportfolio/simulator.py @@ -39,6 +39,7 @@ PPY = 252 __all__ = ['StockMarketSimulator', 'MarketSimulator'] +logger = logging.getLogger(__name__) class MarketSimulator: """This class is a generic financial market simulator. @@ -227,7 +228,7 @@ def simulate( if not current_volumes is None: non_tradable_stocks = current_volumes[current_volumes <= 0].index if len(non_tradable_stocks): - logging.info( + logger.info( "At time %s the simulator canceled trades on assets %s" + " because their market volumes for the period are zero.", t, non_tradable_stocks) @@ -281,7 +282,7 @@ def _get_initialized_policy(self, orig_policy, universe, trading_calendar): # if policy uses a cache load it from disk if hasattr(policy, '_cache'): - logging.info('Trying to load cache from disk...') + logger.info('Trying to load cache from disk...') policy._cache = _load_cache( signature=self.market_data.partial_universe_signature(universe), base_location=self.base_location) @@ -293,7 +294,7 @@ def _get_initialized_policy(self, orig_policy, universe, trading_calendar): def _finalize_policy(self, policy, universe): if hasattr(policy, '_cache'): - logging.info('Storing cache from policy to disk...') + logger.info('Storing cache from policy to disk...') _store_cache( cache=policy._cache, signature=self.market_data.partial_universe_signature(universe), @@ -361,7 +362,7 @@ def _backtest(self, policy, start_time, end_time, h): h = h_next if sum(h) <= 0.: # bankruptcy - logging.warning('Back-test ended in bankruptcy at time %s!', t) + logger.warning('Back-test ended in bankruptcy at time %s!', t) break self._finalize_policy(used_policy, h.index) @@ -410,12 +411,12 @@ def _adjust_h_new_universe(self, h, new_universe): new_assets = pd.Index(set(new_universe).difference(h.index)) if len(new_assets): - logging.info("Adjusting h vector by adding assets %s", new_assets) + logger.info("Adjusting h vector by adding assets %s", new_assets) remove_assets = pd.Index(set(h.index).difference(new_universe)) if len(remove_assets): total_liquidation = h[remove_assets].sum() - logging.info( + logger.info( "Adjusting h vector by removing assets %s." + " Their current market value of %s is added" + " to the cash account.", remove_assets, total_liquidation) diff --git a/docs/result.rst b/docs/result.rst index 25a7ed26f..386475e66 100644 --- a/docs/result.rst +++ b/docs/result.rst @@ -1,11 +1,11 @@ Back-test result ================ -.. automodule:: cvxportfolio.result - .. py:module:: cvxportfolio :noindex: +.. automodule:: cvxportfolio.result + .. autoclass:: BacktestResult .. autoproperty:: cash_key @@ -88,6 +88,8 @@ Back-test result .. autoproperty:: simulator_times + .. autoproperty:: log + .. automethod:: plot