Skip to content

Commit

Permalink
Added log property to BacktestResult,
Browse files Browse the repository at this point in the history
which contains all logs produced in the process that ran that back-test.
Level and formatting are configurable.
Improved logging calls from the rest of the library.
  • Loading branch information
enzbus committed Dec 28, 2023
1 parent 5f1337c commit e53bb0e
Show file tree
Hide file tree
Showing 7 changed files with 118 additions and 43 deletions.
19 changes: 10 additions & 9 deletions cvxportfolio/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import os
import pickle

logger = logging.getLogger(__name__)

def _mp_init(l):
"""Shared lock to disk access for multiprocessing."""
Expand All @@ -43,42 +44,42 @@ def cache_name(signature, base_location):
def _load_cache(signature, base_location):
"""Load cache from disk."""
if signature is None:
logging.info('Market data has no signature!')
logger.info('Market data has no signature!')
return {}
name = cache_name(signature, base_location)
if 'LOCK' in globals():
logging.debug( # pragma: no cover
logger.debug( # pragma: no cover
'Acquiring cache lock from process %s', os.getpid())
LOCK.acquire() # pragma: no cover
try:
with open(name, 'rb') as f:
res = pickle.load(f)
logging.info('Loaded cache %s', name)
logger.info('Loaded cache %s', name)
return res
except FileNotFoundError:
logging.info('Cache not found!')
logger.info('Cache not found!')
return {}
finally:
if 'LOCK' in globals():
logging.debug( # pragma: no cover
logger.debug( # pragma: no cover
'Releasing cache lock from process %s', os.getpid())
LOCK.release() # pragma: no cover

def _store_cache(cache, signature, base_location):
"""Store cache to disk."""
if signature is None:
logging.info('Market data has no signature!')
logger.info('Market data has no signature!')
return
name = cache_name(signature, base_location)
if 'LOCK' in globals():
logging.debug( # pragma: no cover
logger.debug( # pragma: no cover
'Acquiring cache lock from process %s', os.getpid())
LOCK.acquire() # pragma: no cover
name.parent.mkdir(exist_ok=True)
with open(name, 'wb') as f:
logging.info('Storing cache %s', name)
logger.info('Storing cache %s', name)
pickle.dump(cache, f)
if 'LOCK' in globals():
logging.debug( # pragma: no cover
logger.debug( # pragma: no cover
'Releasing cache lock from process %s', os.getpid())
LOCK.release() # pragma: no cover
36 changes: 19 additions & 17 deletions cvxportfolio/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
__all__ = ["YahooFinance", "Fred",
"UserProvidedMarketData", "DownloadedMarketData"]

logger = logging.getLogger(__name__)

BASE_LOCATION = Path.home() / "cvxportfolio_data"

def now_timezoned():
Expand Down Expand Up @@ -132,7 +134,7 @@ def _load_raw(self):
# we could implement multiprocess safety here
loader = globals()['_loader_' + self._storage_backend]
try:
logging.info(
logger.info(
f"{self.__class__.__name__} is trying to load {self.symbol}"
+ f" with {self._storage_backend} backend"
+ f" from {self.storage_location}")
Expand All @@ -156,7 +158,7 @@ def _store(self, data):
"""
# we could implement multiprocess safety here
storer = globals()['_storer_' + self._storage_backend]
logging.info(
logger.info(
f"{self.__class__.__name__} is storing {self.symbol}"
+ f" with {self._storage_backend} backend"
+ f" in {self.storage_location}")
Expand All @@ -178,14 +180,14 @@ def update(self, grace_period):
:type grace_period: pandas.Timedelta
"""
current = self._load_raw()
logging.info(
logger.info(
f"Downloading {self.symbol}"
+ f" from {self.__class__.__name__}")
updated = self._download(
self.symbol, current, grace_period=grace_period)

if np.any(updated.iloc[:-1].isnull()):
logging.warning(
logger.warning(
" cvxportfolio.%s('%s').data contains NaNs."
+ " You may want to inspect it. If you want, you can delete the"
+ " data file in %s to force re-download from the start.",
Expand All @@ -199,25 +201,25 @@ def update(self, grace_period):
np.isclose(updated.loc[current.index[:-1]],
current.iloc[:-1], equal_nan=True,
rtol=1e-08, atol=1e-08)):
logging.error(f"{self.__class__.__name__} update"
logger.error(f"{self.__class__.__name__} update"
+ f" of {self.symbol} is not append-only!")
self._print_difference(current, updated)
if hasattr(current, 'columns'):
# the first column is open price
if not current.iloc[-1, 0] == updated.loc[
current.index[-1]].iloc[0]:
logging.error(
logger.error(
f"{self.__class__.__name__} update "
+ f" of {self.symbol} changed last open price!")
self._print_difference(current, updated)
else:
if not current.iloc[-1] == updated.loc[current.index[-1]]:
logging.error(
logger.error(
f"{self.__class__.__name__} update"
+ f" of {self.symbol} changed last value!")
self._print_difference(current, updated)
except KeyError:
logging.error("%s update of %s could not be checked for"
logger.error("%s update of %s could not be checked for"
+ " append-only edits. Was there a DST change?",
self.__class__.__name__, self.symbol)
self._store(updated)
Expand Down Expand Up @@ -327,7 +329,7 @@ def _clean(data):
# fill open price with close from day(s) before
# repeat as long as it helps (up to 1 year)
for shifter in range(252):
logging.info(
logger.info(
"Filling opens with close from %s days before", shifter)
orig_missing_opens = data['open'].isnull().sum()
data['open'] = data['open'].fillna(data['close'].shift(
Expand Down Expand Up @@ -481,15 +483,15 @@ def _download(self, symbol, current=None,
+ ' could have issues with DST.')
if (current is None) or (len(current) < overlap):
updated = self._get_data_yahoo(symbol, **kwargs)
logging.info('Downloading from the start.')
logger.info('Downloading from the start.')
result = self._clean(updated)
# we remove first row if it contains NaNs
if np.any(result.iloc[0].isnull()):
result = result.iloc[1:]
return result
if (now_timezoned() - current.index[-1]
) < pd.Timedelta(grace_period):
logging.info(
logger.info(
'Skipping download because stored data is recent enough.')
return current
new = self._get_data_yahoo(symbol, start=current.index[-overlap])
Expand Down Expand Up @@ -564,15 +566,15 @@ def _download(
return self._internal_download(symbol)
if (pd.Timestamp.today() - current.index[-1]
) < pd.Timedelta(grace_period):
logging.info(
logger.info(
'Skipping download because stored data is recent enough.')
return current

new = self._internal_download(symbol)
new = new.loc[new.index > current.index[-1]]

if new.empty:
logging.info('New downloaded data is empty!')
logger.info('New downloaded data is empty!')
return current

assert new.index[0] > current.index[-1]
Expand Down Expand Up @@ -833,7 +835,7 @@ def __init__(
def _mask_dataframes(self, mask):
"""Mask internal dataframes if necessary."""
if (self._mask is None) or not np.all(self._mask == mask):
logging.info("Masking internal %s dataframes.",
logger.info("Masking internal %s dataframes.",
self.__class__.__name__)
colmask = self.returns.columns[mask]
# self._masked_returns = self._df_or_ser_set_read_only(
Expand Down Expand Up @@ -1307,7 +1309,7 @@ def _get_market_data(self, universe, grace_period, storage_backend):
sys.stdout.flush()

for stock in universe:
logging.info(
logger.info(
'Updating %s with %s.', stock, self.datasource.__name__)
print('.', end='')
sys.stdout.flush()
Expand Down Expand Up @@ -1342,10 +1344,10 @@ def _remove_missing_recent(self):
"""

if self.prices.iloc[-5:].isnull().any().any():
logging.debug(
logger.debug(
'Removing some recent lines because there are missing values.')
drop_at = self.prices.iloc[-5:].isnull().any(axis=1).idxmax()
logging.debug('Dropping at index %s', drop_at)
logger.debug('Dropping at index %s', drop_at)
self.returns = self.returns.loc[self.returns.index < drop_at]
if self.prices is not None:
self.prices = self.prices.loc[self.prices.index < drop_at]
Expand Down
9 changes: 5 additions & 4 deletions cvxportfolio/forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from .errors import ForecastError
from .estimator import Estimator

logger = logging.getLogger(__name__)

def online_cache(values_in_time):
"""A simple online cache that decorates values_in_time.
Expand Down Expand Up @@ -74,13 +75,13 @@ def wrapped(self, t, cache=None, **kwargs):
cache[self] = {}

if t in cache[self]:
logging.debug(
logger.debug(
'%s.values_in_time at time %s is retrieved from cache.',
self, t)
result = cache[self][t]
else:
result = values_in_time(self, t=t, cache=cache, **kwargs)
logging.debug('%s.values_in_time at time %s is stored in cache.',
logger.debug('%s.values_in_time at time %s is stored in cache.',
self, t)
cache[self][t] = result
return result
Expand All @@ -97,12 +98,12 @@ def _agnostic_update(self, t, past_returns):
"""Choose whether to make forecast from scratch or update last one."""
if (self._last_time is None) or (
self._last_time != past_returns.index[-1]):
logging.debug(
logger.debug(
'%s.values_in_time at time %s is computed from scratch.',
self, t)
self._initial_compute(t=t, past_returns=past_returns)
else:
logging.debug(
logger.debug(
'%s.values_in_time at time %s is updated from previous value.',
self, t)
self._online_update(t=t, past_returns=past_returns)
Expand Down
6 changes: 4 additions & 2 deletions cvxportfolio/policies.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@
"MultiPeriodOpt",
]

logger = logging.getLogger(__name__)

class Policy(Estimator):
"""Base trading policy class, defines execute method."""

Expand Down Expand Up @@ -361,7 +363,7 @@ def values_in_time_recursive(self, t, current_weights, **kwargs):
result = current_weights + pd.Series(
self.trades_weights.current_value, current_weights.index)
except MissingTimesError:
logging.info("%s didn't trade at time %s because it couldn't find"
logger.info("%s didn't trade at time %s because it couldn't find"
+ " trade weights among the provided ones.",
self.__class__.__name__, t)
result = current_weights
Expand Down Expand Up @@ -413,7 +415,7 @@ def values_in_time_recursive(self, t, current_weights, **kwargs):
self.target_weights.current_value, current_weights.index
) - current_weights
except MissingTimesError:
logging.info("%s didn't trade at time %s because it couldn't find"
logger.info("%s didn't trade at time %s because it couldn't find"
+ " target weights among the provided ones.",
self.__class__.__name__, t)
result = current_weights
Expand Down
Loading

0 comments on commit e53bb0e

Please sign in to comment.