Source code for bt.backtest

"""
Contains backtesting logic and objects.
"""
from __future__ import division
from copy import deepcopy
import bt
import ffn
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import pyprind


[docs]def run(*backtests): """ Runs a series of backtests and returns a Result object containing the results of the backtests. Args: * backtest (*list): List of backtests. Returns: Result """ # run each backtest for bkt in backtests: bkt.run() return Result(*backtests)
[docs]def benchmark_random(backtest, random_strategy, nsim=100): """ Given a backtest and a random strategy, compare backtest to a number of random portfolios. The idea here is to benchmark your strategy vs a bunch of random strategies that have a similar structure but execute some part of the logic randomly - basically you are trying to determine if your strategy has any merit - does it beat randomly picking weight? Or randomly picking the selected securities? Args: * backtest (Backtest): A backtest you want to benchmark * random_strategy (Strategy): A strategy you want to benchmark against. The strategy should have a random component to emulate skilless behavior. * nsim (int): number of random strategies to create. Returns: RandomBenchmarkResult """ # save name for future use if backtest.name is None: backtest.name = "original" # run if necessary if not backtest.has_run: backtest.run() bts = [] bts.append(backtest) data = backtest.data.dropna() # create and run random backtests for i in range(nsim): random_strategy.name = "random_%s" % i rbt = bt.Backtest(random_strategy, data) rbt.run() bts.append(rbt) # now create new RandomBenchmarkResult res = RandomBenchmarkResult(*bts) return res
[docs]class Backtest(object): """ A Backtest combines a Strategy with data to produce a Result. A backtest is basically testing a strategy over a data set. Note: The Strategy will be deepcopied so it is re-usable in other backtests. To access the backtested strategy, simply access the strategy attribute. Args: * strategy (Strategy, Node, StrategyBase): The Strategy to be tested. * data (DataFrame): DataFrame containing data used in backtest. This will be the Strategy's "universe". * name (str): Backtest name - defaults to strategy name * initial_capital (float): Initial amount of capital passed to Strategy. * commissions (fn(quantity, price)): The commission function to be used. Ex: commissions=lambda q, p: max(1, abs(q) * 0.01) * integer_positions (bool): Whether to use integer positions for securities in the backtest. This can have unintended consequences when prices are high relative to the amount of capital (i.e. though split-adjusted prices, or too-low of a capital amount), causing allocated positions to round to zero. While the default is True, try setting to False for more robust behavior. * progress_bar (Bool): Display progress bar while running backtest * additional_data (dict): Additional kwargs passed to StrategyBase.setup, after preprocessing This data can be retrieved by Algos using StrategyBase.get_data. The data may also be used by the Strategy itself, i.e. - ``bidoffer``: A DataFrame with the same format as 'data', will be used by the strategy for transaction cost modeling - ``coupons``: A DataFrame with the same format as 'data', will by used by :class:`CouponPayingSecurity <bt.core.CouponPayingSecurity>` to determine cashflows. - ``cost_long``/``cost_short``: A DataFrame with the same format as 'data', will by used by :class:`CouponPayingSecurity <bt.core.CouponPayingSecurity>` to calculate asymmetric holding cost of long (or short) positions. Attributes: * strategy (Strategy): The Backtest's Strategy. This will be a deepcopy of the Strategy that was passed in. * data (DataFrame): Data passed in * dates (DateTimeIndex): Data's index * initial_capital (float): Initial capital * name (str): Backtest name * stats (ffn.PerformanceStats): Performance statistics * has_run (bool): Run flag * weights (DataFrame): Weights of each component over time * security_weights (DataFrame): Weights of each security as a percentage of the whole portfolio over time * additional_data (dict): Additional data passed at construction """ def __init__( self, strategy, data, name=None, initial_capital=1000000.0, commissions=None, integer_positions=True, progress_bar=False, additional_data=None, ): if data.columns.duplicated().any(): cols = data.columns[data.columns.duplicated().tolist()].tolist() raise Exception( "data provided has some duplicate column names: \n%s \n" "Please remove duplicates!" % cols ) # we want to reuse strategy logic - copy it! # basically strategy is a template self.strategy = deepcopy(strategy) self.strategy.use_integer_positions(integer_positions) self._process_data(data, additional_data) self.initial_capital = initial_capital self.name = name if name is not None else strategy.name self.progress_bar = progress_bar if commissions is not None: self.strategy.set_commissions(commissions) self.stats = {} self._original_prices = None self._weights = None self._sweights = None self.has_run = False def _process_data(self, data, additional_data): # add virtual row at t0-1day with NaNs # this is so that any trading action at t0 can be evaluated relative to # a clean starting point. This is related to #83. Basically, if you # have a big trade / commision on day 0, then the Strategy.prices will # be adjusted at 0, and hide the 'total' return. The series should # start at 100, but may start at 90, for example. Here, we add a # starting point at t0-1day, and this is the reference starting point data_new = pd.concat( [ pd.DataFrame( np.nan, columns=data.columns, index=[data.index[0] - pd.DateOffset(days=1)], ), data, ] ) self.data = data_new self.dates = data_new.index self.additional_data = (additional_data or {}).copy() # Look for data frames with the same index as (original) data, # and add in the first row as well (i.e. "bidoffer") for k in self.additional_data: old = self.additional_data[k] if isinstance(old, pd.DataFrame) and old.index.equals(data.index): empty_row = pd.DataFrame( np.nan, columns=old.columns, index=[old.index[0] - pd.DateOffset(days=1)], ) new = pd.concat([empty_row, old]) self.additional_data[k] = new elif isinstance(old, pd.Series) and old.index.equals(data.index): empty_row = pd.Series( np.nan, index=[old.index[0] - pd.DateOffset(days=1)] ) new = pd.concat([empty_row, old]) self.additional_data[k] = new
[docs] def run(self): """ Runs the Backtest. """ if self.has_run: return # set run flag to avoid running same test more than once self.has_run = True # setup strategy self.strategy.setup(self.data, **self.additional_data) # adjust strategy with initial capital self.strategy.adjust(self.initial_capital) # loop through dates # init progress bar if self.progress_bar: bar = pyprind.ProgBar(len(self.dates), title=self.name, stream=1) # since there is a dummy row at time 0, start backtest at date 1. # we must still update for t0 self.strategy.update(self.dates[0]) # and for the backtest loop, start at date 1 for dt in self.dates[1:]: # update progress bar if self.progress_bar: bar.update() # update strategy self.strategy.update(dt) if not self.strategy.bankrupt: self.strategy.run() # need update after to save weights, values and such self.strategy.update(dt) else: if self.progress_bar: bar.stop() self.stats = self.strategy.prices.calc_perf_stats() self._original_prices = self.strategy.prices
@property def weights(self): """ DataFrame of each component's weight over time """ if self._weights is not None: return self._weights else: if self.strategy.fixed_income: vals = pd.DataFrame( {x.full_name: x.notional_values for x in self.strategy.members} ) vals = vals.div(self.strategy.notional_values, axis=0) else: vals = pd.DataFrame( {x.full_name: x.values for x in self.strategy.members} ) vals = vals.div(self.strategy.values, axis=0) self._weights = vals return vals @property def positions(self): """ DataFrame of each component's position over time """ return self.strategy.positions @property def security_weights(self): """ DataFrame containing weights of each security as a percentage of the whole portfolio over time """ if self._sweights is not None: return self._sweights else: # get values for all securities in tree and divide by root values # for security weights vals = {} for m in self.strategy.members: if isinstance(m, bt.core.SecurityBase): if self.strategy.fixed_income: m_values = m.notional_values else: m_values = m.values if m.name in vals: vals[m.name] += m_values else: vals[m.name] = m_values vals = pd.DataFrame(vals) # divide by root strategy values if self.strategy.fixed_income: vals = vals.div(self.strategy.notional_values, axis=0) else: vals = vals.div(self.strategy.values, axis=0) # save for future use self._sweights = vals return vals @property def herfindahl_index(self): """ Calculate Herfindahl-Hirschman Index (HHI) for the portfolio. For each given day, HHI is defined as a sum of squared weights of securities in a portfolio; and varies from 1/N to 1. Value of 1/N would correspond to an equally weighted portfolio and value of 1 corresponds to an extreme case when all amount is invested in a single asset. 1 / HHI is often considered as "an effective number of assets" in a given portfolio """ w = self.security_weights return (w**2).sum(axis=1) @property def turnover(self): """ Calculate the turnover for the backtest. This function will calculate the turnover for the strategy. Turnover is defined as the lesser of positive or negative outlays divided by NAV """ s = self.strategy outlays = s.outlays # seperate positive and negative outlays, sum them up, and keep min outlaysp = outlays[outlays >= 0].fillna(value=0).sum(axis=1) outlaysn = np.abs(outlays[outlays < 0].fillna(value=0).sum(axis=1)) # merge and keep minimum min_outlay = pd.DataFrame({"pos": outlaysp, "neg": outlaysn}).min(axis=1) # turnover is defined as min outlay / nav mrg = pd.DataFrame({"outlay": min_outlay, "nav": s.values}) return mrg["outlay"] / mrg["nav"]
[docs]class Result(ffn.GroupStats): """ Based on ffn's GroupStats with a few extra helper methods. Args: * backtests (list): List of backtests Attributes: * backtest_list (list): List of bactests in the same order as provided * backtests (dict): Dict of backtests by name """ def __init__(self, *backtests): tmp = [pd.DataFrame({x.name: x.strategy.prices}) for x in backtests] super(Result, self).__init__(*tmp) self.backtest_list = backtests self.backtests = {x.name: x for x in backtests}
[docs] def display_monthly_returns(self, backtest=0): """ Display monthly returns for a specific backtest. Args: * backtest (str, int): Backtest. Can be either a index (int) or the name (str) """ key = self._get_backtest(backtest) self[key].display_monthly_returns()
[docs] def get_weights(self, backtest=0, filter=None): """ :param backtest: (str, int) Backtest can be either a index (int) or the name (str) :param filter: (list, str) filter columns for specific columns. Filter is simply passed as is to DataFrame[filter], so use something that makes sense with a DataFrame. :return: (pd.DataFrame) DataFrame of weights """ key = self._get_backtest(backtest) if filter is not None: data = self.backtests[key].weights[filter] else: data = self.backtests[key].weights return data
[docs] def plot_weights(self, backtest=0, filter=None, figsize=(15, 5), **kwds): """ Plots the weights of a given backtest over time. Args: * backtest (str, int): Backtest can be either a index (int) or the name (str) * filter (list, str): filter columns for specific columns. Filter is simply passed as is to DataFrame[filter], so use something that makes sense with a DataFrame. * figsize ((width, height)): figure size * kwds (dict): Keywords passed to plot """ data = self.get_weights(backtest, filter) data.plot(figsize=figsize, **kwds)
[docs] def get_security_weights(self, backtest=0, filter=None): """ :param backtest: (str, int) Backtest can be either a index (int) or the name (str) :param filter: (list, str) filter columns for specific columns. Filter is simply passed as is to DataFrame[filter], so use something that makes sense with a DataFrame. :return: (pd.DataFrame) DataFrame of security weights """ key = self._get_backtest(backtest) if filter is not None: data = self.backtests[key].security_weights[filter] else: data = self.backtests[key].security_weights return data
[docs] def plot_security_weights(self, backtest=0, filter=None, figsize=(15, 5), **kwds): """ Plots the security weights of a given backtest over time. Args: * backtest (str, int): Backtest. Can be either a index (int) or the name (str) * filter (list, str): filter columns for specific columns. Filter is simply passed as is to DataFrame[filter], so use something that makes sense with a DataFrame. * figsize ((width, height)): figure size * kwds (dict): Keywords passed to plot """ data = self.get_security_weights(backtest, filter) data.plot(figsize=figsize, **kwds)
[docs] def plot_histogram(self, backtest=0, **kwds): """ Plots the return histogram of a given backtest over time. Args: * backtest (str, int): Backtest. Can be either a index (int) or the name (str) * kwds (dict): Keywords passed to plot_histogram """ key = self._get_backtest(backtest) self[key].plot_histogram(**kwds)
def _get_backtest(self, backtest): # based on input order if type(backtest) == int: return self.backtest_list[backtest].name # default case assume ok return backtest
[docs] def get_transactions(self, strategy_name=None): """ Helper function that returns the transactions in the following format: Date, Security | quantity, price The result is a MultiIndex DataFrame. Args: * strategy_name (str): If none, it will take the first backtest's strategy (self.backtest_list[0].name) """ if strategy_name is None: strategy_name = self.backtest_list[0].name # extract strategy given strategy_name return self.backtests[strategy_name].strategy.get_transactions()
[docs]class RandomBenchmarkResult(Result): """ RandomBenchmarkResult expands on Result to add methods specific to random strategy benchmarking. Args: * backtests (list): List of backtests Attributes: * base_name (str): Name of backtest being benchmarked * r_stats (Result): Stats for random strategies * b_stats (Result): Stats for benchmarked strategy """ def __init__(self, *backtests): super(RandomBenchmarkResult, self).__init__(*backtests) self.base_name = backtests[0].name # seperate stats to make self.r_stats = self.stats.drop(self.base_name, axis=1) self.b_stats = self.stats[self.base_name]
[docs] def plot_histogram( self, statistic="monthly_sharpe", figsize=(15, 5), title=None, bins=20, **kwargs ): """ Plots the distribution of a given statistic. The histogram represents the distribution of the random strategies' statistic and the vertical line is the value of the benchmarked strategy's statistic. This helps you determine if your strategy is statistically 'better' than the random versions. Args: * statistic (str): Statistic - any numeric statistic in Result is valid. * figsize ((x, y)): Figure size * title (str): Chart title * bins (int): Number of bins * kwargs (dict): Passed to pandas hist function. """ if statistic not in self.r_stats.index: raise ValueError( "Invalid statistic. Valid statistics" "are the statistics in self.stats" ) if title is None: title = "%s histogram" % statistic plt.figure(figsize=figsize) ser = self.r_stats.loc[statistic] ax = ser.hist(bins=bins, figsize=figsize, density=True, **kwargs) ax.set_title(title) plt.axvline(self.b_stats[statistic], linewidth=4, color="r") ser.plot(kind="kde")
[docs]class RenormalizedFixedIncomeResult(Result): """ A new result type to help compare results generated from :class:`FixedIncomeStrategy <bt.core.FixedIncomeStrategy>`. Recall that in a fixed income strategy, the normalized prices are computed using additive returns expressed as a percentage of current outstanding notional (i.e. fixed-notional equivalent). In strategies where the notional is varying, this may lead to counter- intuitive results because the different terms in the sum are being scaled by different notionals in the denominator (i.e. price could be below par, but overall change in value is positive). This class provides a way to "renormalize" the results with a different denominator value or series, i.e. using max or average notional exposure, or the risk exposure of the strategy. Args: * normalizing_value: pd.Series, float or dict thereof(by strategy name) * backtests (list): List of backtests (i.e. from Result.backtest_list) """ def __init__(self, normalizing_value, *backtests): for backtest in backtests: if not backtest.strategy.fixed_income: raise ValueError( "Cannot apply RenormalizedFixedIncomeResult " "because backtest %s is not on a fixed income " "strategy" % backtest.name ) if not isinstance(normalizing_value, dict): normalizing_value = {x.name: normalizing_value for x in backtests} tmp = [ pd.DataFrame({x.name: self._price(x.strategy, normalizing_value[x.name])}) for x in backtests ] super(Result, self).__init__(*tmp) self.backtest_list = backtests self.backtests = {x.name: x for x in backtests} def _price(self, s, v): """ Compute the new price series from the strategy (s) and the normalizing value (v) """ # Compute additive returns net of flows returns = s.values.diff() - s.flows prices = bt.core.PAR * (1.0 + (returns / v).cumsum()) prices.iloc[0] = bt.core.PAR return prices