-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
/
Copy pathlib.py
467 lines (377 loc) · 16.5 KB
/
lib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
"""
Collection of common building blocks, helper auxiliary functions and
composable strategy classes for reuse.
Intended for simple missing-link procedures, not reinventing
of better-suited, state-of-the-art, fast libraries,
such as TA-Lib, Tulipy, PyAlgoTrade, NumPy, SciPy ...
Please raise ideas for additions to this collection on the [issue tracker].
[issue tracker]: https://github.com/kernc/backtesting.py
"""
from collections import OrderedDict
from itertools import compress
from numbers import Number
from inspect import currentframe
from typing import Sequence, Optional, Union, Callable
import numpy as np
import pandas as pd
from .backtesting import Strategy
from ._plotting import plot_heatmaps as _plot_heatmaps
from ._util import _Array, _as_str
__pdoc__ = {}
OHLCV_AGG = OrderedDict((
('Open', 'first'),
('High', 'max'),
('Low', 'min'),
('Close', 'last'),
('Volume', 'sum'),
))
"""Dictionary of rules for aggregating resampled OHLCV data frames,
e.g.
df.resample('4H', label='right').agg(OHLCV_AGG)
"""
TRADES_AGG = OrderedDict((
('Size', 'sum'),
('EntryBar', 'first'),
('ExitBar', 'last'),
('EntryPrice', 'mean'),
('ExitPrice', 'mean'),
('PnL', 'sum'),
('ReturnPct', 'mean'),
('EntryTime', 'first'),
('ExitTime', 'last'),
('Duration', 'sum'),
))
"""Dictionary of rules for aggregating resampled trades data,
e.g.
stats['_trades'].resample('1D', on='ExitTime',
label='right').agg(TRADES_AGG)
"""
_EQUITY_AGG = {
'Equity': 'last',
'DrawdownPct': 'max',
'DrawdownDuration': 'max',
}
def barssince(condition: Sequence[bool], default=np.inf) -> int:
"""
Return the number of bars since `condition` sequence was last `True`,
or if never, return `default`.
>>> barssince(self.data.Close > self.data.Open)
3
"""
return next(compress(range(len(condition)), reversed(condition)), default)
def cross(series1: Sequence, series2: Sequence) -> bool:
"""
Return `True` if `series1` and `series2` just crossed (either
direction).
>>> cross(self.data.Close, self.sma)
True
"""
return crossover(series1, series2) or crossover(series2, series1)
def crossover(series1: Sequence, series2: Sequence) -> bool:
"""
Return `True` if `series1` just crossed over
`series2`.
>>> crossover(self.data.Close, self.sma)
True
"""
series1 = (
series1.values if isinstance(series1, pd.Series) else
(series1, series1) if isinstance(series1, Number) else
series1)
series2 = (
series2.values if isinstance(series2, pd.Series) else
(series2, series2) if isinstance(series2, Number) else
series2)
try:
return series1[-2] < series2[-2] and series1[-1] > series2[-1]
except IndexError:
return False
def plot_heatmaps(heatmap: pd.Series,
agg: Union[str, Callable] = 'max',
*,
ncols: int = 3,
plot_width: int = 1200,
filename: str = '',
open_browser: bool = True):
"""
Plots a grid of heatmaps, one for every pair of parameters in `heatmap`.
`heatmap` is a Series as returned by
`backtesting.backtesting.Backtest.optimize` when its parameter
`return_heatmap=True`.
When projecting the n-dimensional heatmap onto 2D, the values are
aggregated by 'max' function by default. This can be tweaked
with `agg` parameter, which accepts any argument pandas knows
how to aggregate by.
.. todo::
Lay heatmaps out lower-triangular instead of in a simple grid.
Like [`skopt.plots.plot_objective()`][plot_objective] does.
[plot_objective]: \
https://scikit-optimize.github.io/stable/modules/plots.html#plot-objective
"""
return _plot_heatmaps(heatmap, agg, ncols, filename, plot_width, open_browser)
def quantile(series: Sequence, quantile: Union[None, float] = None):
"""
If `quantile` is `None`, return the quantile _rank_ of the last
value of `series` wrt former series values.
If `quantile` is a value between 0 and 1, return the _value_ of
`series` at this quantile. If used to working with percentiles, just
divide your percentile amount with 100 to obtain quantiles.
>>> quantile(self.data.Close[-20:], .1)
162.130
>>> quantile(self.data.Close)
0.13
"""
if quantile is None:
try:
last, series = series[-1], series[:-1]
return np.mean(series < last)
except IndexError:
return np.nan
assert 0 <= quantile <= 1, "quantile must be within [0, 1]"
return np.nanpercentile(series, quantile * 100)
def resample_apply(rule: str,
func: Optional[Callable[..., Sequence]],
series: Union[pd.Series, pd.DataFrame, _Array],
*args,
agg: Union[str, dict] = None,
**kwargs):
"""
Apply `func` (such as an indicator) to `series`, resampled to
a time frame specified by `rule`. When called from inside
`backtesting.backtesting.Strategy.init`,
the result (returned) series will be automatically wrapped in
`backtesting.backtesting.Strategy.I`
wrapper method.
`rule` is a valid [Pandas offset string] indicating
a time frame to resample `series` to.
[Pandas offset string]: \
http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
`func` is the indicator function to apply on the resampled series.
`series` is a data series (or array), such as any of the
`backtesting.backtesting.Strategy.data` series. Due to pandas
resampling limitations, this only works when input series
has a datetime index.
`agg` is the aggregation function to use on resampled groups of data.
Valid values are anything accepted by `pandas/resample/.agg()`.
Default value for dataframe input is `OHLCV_AGG` dictionary.
Default value for series input is the appropriate entry from `OHLCV_AGG`
if series has a matching name, or otherwise the value `"last"`,
which is suitable for closing prices,
but you might prefer another (e.g. `"max"` for peaks, or similar).
Finally, any `*args` and `**kwargs` that are not already eaten by
implicit `backtesting.backtesting.Strategy.I` call
are passed to `func`.
For example, if we have a typical moving average function
`SMA(values, lookback_period)`, _hourly_ data source, and need to
apply the moving average MA(10) on a _daily_ time frame,
but don't want to plot the resulting indicator, we can do:
class System(Strategy):
def init(self):
self.sma = resample_apply(
'D', SMA, self.data.Close, 10, plot=False)
The above short snippet is roughly equivalent to:
class System(Strategy):
def init(self):
# Strategy exposes `self.data` as raw NumPy arrays.
# Let's convert closing prices back to pandas Series.
close = self.data.Close.s
# Resample to daily resolution. Aggregate groups
# using their last value (i.e. closing price at the end
# of the day). Notice `label='right'`. If it were set to
# 'left' (default), the strategy would exhibit
# look-ahead bias.
daily = close.resample('D', label='right').agg('last')
# We apply SMA(10) to daily close prices,
# then reindex it back to original hourly index,
# forward-filling the missing values in each day.
# We make a separate function that returns the final
# indicator array.
def SMA(series, n):
from backtesting.test import SMA
return SMA(series, n).reindex(close.index).ffill()
# The result equivalent to the short example above:
self.sma = self.I(SMA, daily, 10, plot=False)
"""
if func is None:
def func(x, *_, **__):
return x
if not isinstance(series, (pd.Series, pd.DataFrame)):
assert isinstance(series, _Array), \
'resample_apply() takes either a `pd.Series`, `pd.DataFrame`, ' \
'or a `Strategy.data.*` array'
series = series.s
if agg is None:
agg = OHLCV_AGG.get(getattr(series, 'name', None), 'last')
if isinstance(series, pd.DataFrame):
agg = {column: OHLCV_AGG.get(column, 'last')
for column in series.columns}
resampled = series.resample(rule, label='right').agg(agg).dropna()
resampled.name = _as_str(series) + '[' + rule + ']'
# Check first few stack frames if we are being called from
# inside Strategy.init, and if so, extract Strategy.I wrapper.
frame, level = currentframe(), 0
while frame and level <= 3:
frame = frame.f_back
level += 1
if isinstance(frame.f_locals.get('self'), Strategy): # type: ignore
strategy_I = frame.f_locals['self'].I # type: ignore
break
else:
def strategy_I(func, *args, **kwargs):
return func(*args, **kwargs)
def wrap_func(resampled, *args, **kwargs):
result = func(resampled, *args, **kwargs)
if not isinstance(result, pd.DataFrame) and not isinstance(result, pd.Series):
result = np.asarray(result)
if result.ndim == 1:
result = pd.Series(result, name=resampled.name)
elif result.ndim == 2:
result = pd.DataFrame(result.T)
# Resample back to data index
if not isinstance(result.index, pd.DatetimeIndex):
result.index = resampled.index
result = result.reindex(index=series.index.union(resampled.index),
method='ffill').reindex(series.index)
return result
wrap_func.__name__ = func.__name__ # type: ignore
array = strategy_I(wrap_func, resampled, *args, **kwargs)
return array
def random_ohlc_data(example_data: pd.DataFrame, *,
frac=1., random_state: int = None) -> pd.DataFrame:
"""
OHLC data generator. The generated OHLC data has basic
[descriptive statistics](https://en.wikipedia.org/wiki/Descriptive_statistics)
similar to the provided `example_data`.
`frac` is a fraction of data to sample (with replacement). Values greater
than 1 result in oversampling.
Such random data can be effectively used for stress testing trading
strategy robustness, Monte Carlo simulations, significance testing, etc.
>>> from backtesting.test import EURUSD
>>> ohlc_generator = random_ohlc_data(EURUSD)
>>> next(ohlc_generator) # returns new random data
...
>>> next(ohlc_generator) # returns new random data
...
"""
def shuffle(x):
return x.sample(frac=frac, replace=frac > 1, random_state=random_state)
if len(example_data.columns.intersection({'Open', 'High', 'Low', 'Close'})) != 4:
raise ValueError("`data` must be a pandas.DataFrame with columns "
"'Open', 'High', 'Low', 'Close'")
while True:
df = shuffle(example_data)
df.index = example_data.index
padding = df.Close - df.Open.shift(-1)
gaps = shuffle(example_data.Open.shift(-1) - example_data.Close)
deltas = (padding + gaps).shift(1).fillna(0).cumsum()
for key in ('Open', 'High', 'Low', 'Close'):
df[key] += deltas
yield df
class SignalStrategy(Strategy):
"""
A simple helper strategy that operates on position entry/exit signals.
This makes the backtest of the strategy simulate a [vectorized backtest].
See [tutorials] for usage examples.
[vectorized backtest]: https://www.google.com/search?q=vectorized+backtest
[tutorials]: index.html#tutorials
To use this helper strategy, subclass it, override its
`backtesting.backtesting.Strategy.init` method,
and set the signal vector by calling
`backtesting.lib.SignalStrategy.set_signal` method from within it.
class ExampleStrategy(SignalStrategy):
def init(self):
super().init()
self.set_signal(sma1 > sma2, sma1 < sma2)
Remember to call `super().init()` and `super().next()` in your
overridden methods.
"""
__entry_signal = (0,)
__exit_signal = (False,)
def set_signal(self, entry_size: Sequence[float],
exit_portion: Sequence[float] = None,
*,
plot: bool = True):
"""
Set entry/exit signal vectors (arrays).
A long entry signal is considered present wherever `entry_size`
is greater than zero, and a short signal wherever `entry_size`
is less than zero, following `backtesting.backtesting.Order.size` semantics.
If `exit_portion` is provided, a nonzero value closes portion the position
(see `backtesting.backtesting.Trade.close()`) in the respective direction
(positive values close long trades, negative short).
If `plot` is `True`, the signal entry/exit indicators are plotted when
`backtesting.backtesting.Backtest.plot` is called.
"""
self.__entry_signal = self.I( # type: ignore
lambda: pd.Series(entry_size, dtype=float).replace(0, np.nan),
name='entry size', plot=plot, overlay=False, scatter=True, color='black')
if exit_portion is not None:
self.__exit_signal = self.I( # type: ignore
lambda: pd.Series(exit_portion, dtype=float).replace(0, np.nan),
name='exit portion', plot=plot, overlay=False, scatter=True, color='black')
def next(self):
super().next()
exit_portion = self.__exit_signal[-1]
if exit_portion > 0:
for trade in self.trades:
if trade.is_long:
trade.close(exit_portion)
elif exit_portion < 0:
for trade in self.trades:
if trade.is_short:
trade.close(-exit_portion)
entry_size = self.__entry_signal[-1]
if entry_size > 0:
self.buy(size=entry_size)
elif entry_size < 0:
self.sell(size=-entry_size)
class TrailingStrategy(Strategy):
"""
A strategy with automatic trailing stop-loss, trailing the current
price at distance of some multiple of average true range (ATR). Call
`TrailingStrategy.set_trailing_sl()` to set said multiple
(`6` by default). See [tutorials] for usage examples.
[tutorials]: index.html#tutorials
Remember to call `super().init()` and `super().next()` in your
overridden methods.
"""
__n_atr = 6.
__atr = None
def init(self):
super().init()
self.set_atr_periods()
def set_atr_periods(self, periods: int = 100):
"""
Set the lookback period for computing ATR. The default value
of 100 ensures a _stable_ ATR.
"""
h, l, c_prev = self.data.High, self.data.Low, pd.Series(self.data.Close).shift(1)
tr = np.max([h - l, (c_prev - h).abs(), (c_prev - l).abs()], axis=0)
atr = pd.Series(tr).rolling(periods).mean().bfill().values
self.__atr = atr
def set_trailing_sl(self, n_atr: float = 6):
"""
Sets the future trailing stop-loss as some multiple (`n_atr`)
average true bar ranges away from the current price.
"""
self.__n_atr = n_atr
def next(self):
super().next()
for trade in self.trades:
if trade.is_long:
trade.sl = max(trade.sl or -np.inf,
self.data.Close[-1] - self.__atr[-1] * self.__n_atr)
else:
trade.sl = min(trade.sl or np.inf,
self.data.Close[-1] + self.__atr[-1] * self.__n_atr)
# Prevent pdoc3 documenting __init__ signature of Strategy subclasses
for cls in list(globals().values()):
if isinstance(cls, type) and issubclass(cls, Strategy):
__pdoc__[f'{cls.__name__}.__init__'] = False
# NOTE: Don't put anything below this __all__ list
__all__ = [getattr(v, '__name__', k)
for k, v in globals().items() # export
if ((callable(v) and v.__module__ == __name__ or # callables from this module
k.isupper()) and # or CONSTANTS
not getattr(v, '__name__', k).startswith('_'))] # neither marked internal
# NOTE: Don't put anything below here. See above.