Skip to content

Commit 8858f87

Browse files
committed
Add backtest data retrieve functionality
1 parent cb4bc82 commit 8858f87

6 files changed

Lines changed: 205 additions & 31 deletions

File tree

investing_algorithm_framework/app/app.py

Lines changed: 122 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1246,9 +1246,28 @@ def run_vector_backtest(
12461246
show_progress (bool): Whether to show progress bars during
12471247
data source initialization. This is useful for long-running
12481248
initialization processes.
1249+
dynamic_position_sizing (bool): Whether to use dynamic position
1250+
sizing based on volatility or other factors. Defaults to False.
12491251
12501252
Returns:
12511253
Backtest: Instance of Backtest
1254+
1255+
Examples:
1256+
# Basic usage
1257+
backtest = app.run_vector_backtest(
1258+
strategy=my_strategy,
1259+
backtest_date_range=my_date_range,
1260+
initial_amount=1000
1261+
)
1262+
1263+
# With custom storage and checkpoints
1264+
backtest = app.run_vector_backtest(
1265+
strategy=my_strategy,
1266+
backtest_date_range=my_date_range,
1267+
initial_amount=1000,
1268+
use_checkpoints=True,
1269+
backtest_storage_directory="./backtest_results"
1270+
)
12521271
"""
12531272
# Use registered strategy if none provided
12541273
if strategy is None:
@@ -1479,6 +1498,107 @@ def run_backtests(
14791498

14801499
return backtests
14811500

1501+
def get_backtest_data(
1502+
self,
1503+
strategy: TradingStrategy,
1504+
backtest_date_range: BacktestDateRange,
1505+
show_progress: bool = True,
1506+
fill_missing_data: bool = True,
1507+
) -> Dict[str, Any]:
1508+
"""
1509+
Get all data sources with their corresponding data for a given
1510+
strategy and backtest window.
1511+
1512+
This method retrieves the market data for all data sources defined
1513+
in the strategy, considering the warmup window for each data source.
1514+
The data is returned as a dictionary where keys are data source
1515+
identifiers and values are the corresponding DataFrames.
1516+
1517+
Args:
1518+
strategy (TradingStrategy): The strategy containing the data
1519+
sources to retrieve data for.
1520+
backtest_date_range (BacktestDateRange): The date range for
1521+
the backtest window.
1522+
show_progress (bool): Whether to show progress bars during
1523+
data retrieval. Defaults to True.
1524+
fill_missing_data (bool): If True, missing time series data
1525+
entries will be filled automatically. Defaults to True.
1526+
1527+
Returns:
1528+
Dict[str, Any]: A dictionary where keys are data source
1529+
identifiers (e.g., "BTC/EUR_ohlcv") and values are the
1530+
corresponding data (typically pandas DataFrames).
1531+
1532+
Example:
1533+
```python
1534+
from investing_algorithm_framework import (
1535+
create_app, TradingStrategy, BacktestDateRange, DataSource
1536+
)
1537+
from datetime import datetime, timezone
1538+
1539+
class MyStrategy(TradingStrategy):
1540+
data_sources = [
1541+
DataSource(
1542+
identifier="btc_data",
1543+
symbol="BTC/EUR",
1544+
time_frame="1h",
1545+
warmup_window=100,
1546+
market="BITVAVO"
1547+
)
1548+
]
1549+
# ... strategy implementation
1550+
1551+
app = create_app()
1552+
app.add_strategy(MyStrategy)
1553+
1554+
backtest_range = BacktestDateRange(
1555+
start_date=datetime(2024, 1, 1, tzinfo=timezone.utc),
1556+
end_date=datetime(2024, 6, 1, tzinfo=timezone.utc)
1557+
)
1558+
1559+
# Get all data for the strategy
1560+
data = app.get_backtest_data(
1561+
strategy=MyStrategy(),
1562+
backtest_date_range=backtest_range
1563+
)
1564+
1565+
# Access data by identifier
1566+
btc_df = data["btc_data"]
1567+
```
1568+
1569+
Raises:
1570+
OperationalException: If no data sources are defined in the
1571+
strategy or if data cannot be retrieved for a data source.
1572+
"""
1573+
# Get data sources from the strategy
1574+
data_sources = strategy.data_sources
1575+
1576+
if data_sources is None or len(data_sources) == 0:
1577+
raise OperationalException(
1578+
"No data sources defined in the strategy. "
1579+
"Please define data sources to retrieve backtest data."
1580+
)
1581+
1582+
# Setup backtest data providers
1583+
self.initialize_data_sources_backtest(
1584+
data_sources=data_sources,
1585+
backtest_date_range=backtest_date_range,
1586+
show_progress=show_progress,
1587+
fill_missing_data=fill_missing_data,
1588+
)
1589+
1590+
# Get the data provider service
1591+
data_provider_service = self.container.data_provider_service()
1592+
1593+
# Retrieve vectorized backtest data for all data sources
1594+
data = data_provider_service.get_vectorized_backtest_data(
1595+
data_sources=data_sources,
1596+
start_date=backtest_date_range.start_date,
1597+
end_date=backtest_date_range.end_date,
1598+
)
1599+
1600+
return data
1601+
14821602
def run_backtest(
14831603
self,
14841604
backtest_date_range: BacktestDateRange,
@@ -1545,10 +1665,10 @@ def run_backtest(
15451665
the backtest. This is useful for long-running backtests.
15461666
market (str): The market to use for the backtest. This is used
15471667
to create a portfolio configuration if no portfolio
1548-
configuration is provided.
1668+
configuration is provided in the strategy.
15491669
trading_symbol (str): The trading symbol to use for the backtest.
15501670
This is used to create a portfolio configuration if no
1551-
portfolio configuration is provided.
1671+
portfolio configuration is provided in the strategy.
15521672
fill_missing_data (bool): If True (default), missing time series
15531673
data entries will be filled automatically before running the
15541674
backtest.

investing_algorithm_framework/domain/data_provider.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,11 @@ class DataProvider(ABC):
3232
time_frame (Optional[str]): The time frame for the data. This is
3333
useful for data providers that support multiple time frames.
3434
Example: "1m", "5m", "1h", "1d"
35-
window_size (Optional[int]): The window size for the data. This is
36-
useful for data providers that support multiple window sizes.
37-
Example: 100, 200, 500
35+
warmup_window (Optional[int]): The warmup window size for the data.
36+
This is useful for data providers that support multiple window
37+
sizes. Example: 100, 200, 500
38+
window_size (Optional[int]): Deprecated. Use warmup_window instead.
39+
Will be removed in release 0.8.0.
3840
storage_path (Optional[str]): The path to the storage location
3941
for the data. This is useful for data providers that support
4042
saving data to a file
@@ -84,9 +86,12 @@ def __init__(
8486
can be set later. This is useful for data providers
8587
that support multiple time frames.
8688
Example: "1m", "5m", "1h", "1d"
87-
window_size (int): The window size for the data. This is optional
88-
and can be set later. This is useful for data providers that
89-
support multiple window sizes. Example: 100, 200, 500
89+
warmup_window (int): The warmup window size for the data. This is
90+
optional and can be set later. This is useful for data
91+
providers that support multiple window sizes.
92+
Example: 100, 200, 500
93+
window_size (int): Deprecated. Use warmup_window instead.
94+
Will be removed in release 0.8.0.
9095
storage_path (str): The path to the storage location for the data.
9196
This is optional and can be set later. This is useful for data
9297
providers that support saving data to a file.

investing_algorithm_framework/domain/models/data/data_source.py

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import warnings
12
from dataclasses import dataclass
23
from datetime import datetime, timezone, timedelta
34
from typing import Union
@@ -16,15 +17,21 @@ class DataSource:
1617
DataSource(
1718
symbol="BTC/EUR",
1819
data_type="ohlcv",
19-
window_size=200,
20+
warmup_window=200,
2021
market="BITVAVO",
2122
identifier="BTC/EUR_ohlcv"
2223
)
24+
25+
.. deprecated::
26+
The `window_size` parameter is deprecated and will be removed in
27+
release 0.8.0. Please use `warmup_window` instead.
2328
"""
2429
identifier: str = None
2530
data_provider_identifier: str = None
2631
data_type: Union[DataType, str] = None
2732
symbol: str = None
33+
warmup_window: int = None
34+
# Deprecated: use warmup_window instead. Will be removed in release 0.8.0
2835
window_size: int = None
2936
time_frame: Union[TimeFrame, str] = None
3037
market: str = None
@@ -36,6 +43,19 @@ class DataSource:
3643
save: bool = False
3744

3845
def __post_init__(self):
46+
# Handle backward compatibility for window_size -> warmup_window
47+
# window_size is deprecated and will be removed in release 0.8.0
48+
if self.window_size is not None:
49+
warnings.warn(
50+
"The 'window_size' parameter is deprecated and will be "
51+
"removed in release 0.8.0. Please use 'warmup_window' instead.",
52+
DeprecationWarning,
53+
stacklevel=2
54+
)
55+
# If warmup_window is not set, use window_size value
56+
if self.warmup_window is None:
57+
object.__setattr__(self, 'warmup_window', self.window_size)
58+
3959
# Convert data_type and time_frame to their respective enums if needed
4060
if isinstance(self.data_type, str):
4161
object.__setattr__(self, 'data_type',
@@ -107,8 +127,8 @@ def get_identifier(self):
107127
if self.market is not None:
108128
identifier += f"_{self.market}"
109129

110-
if self.window_size is not None:
111-
identifier += f"_{self.window_size}"
130+
if self.warmup_window is not None:
131+
identifier += f"_{self.warmup_window}"
112132

113133
return identifier
114134

@@ -133,7 +153,7 @@ def __repr__(self):
133153
f"DataSource(identifier={self.identifier}, "
134154
f"data_provider_identifier={self.data_provider_identifier}, "
135155
f"data_type={self.data_type}, symbol={self.symbol}, "
136-
f"window_size={self.window_size}, time_frame={self.time_frame}, "
156+
f"warmup_window={self.warmup_window}, time_frame={self.time_frame}, "
137157
f"market={self.market}, storage_path={self.storage_path}, "
138158
f"pandas={self.pandas}, date={self.date}, "
139159
f"start_date={self.start_date}, end_date={self.end_date}, "
@@ -152,10 +172,10 @@ def __eq__(self, other):
152172
"""
153173
if DataType.OHLCV.equals(self.data_type):
154174

155-
if other.time_frame is None and other.window_size is None:
175+
if other.time_frame is None and other.warmup_window is None:
156176
return (self.data_type == other.data_type and
157177
self.symbol == other.symbol)
158-
elif self.time_frame is None and self.window_size is None:
178+
elif self.time_frame is None and self.warmup_window is None:
159179
return (self.data_type == other.data_type and
160180
self.symbol == other.symbol)
161181

@@ -167,7 +187,7 @@ def __eq__(self, other):
167187
elif DataType.CUSTOM.equals(self.data_type):
168188
return (self.data_type == other.data_type and
169189
self.symbol == other.symbol and
170-
self.window_size == other.window_size and
190+
self.warmup_window == other.warmup_window and
171191
self.time_frame == other.time_frame and
172192
self.market == other.market)
173193

@@ -180,11 +200,11 @@ def __eq__(self, other):
180200

181201
def create_start_date_data(self, index_date: datetime) -> datetime:
182202

183-
if self.window_size is None or self.time_frame is None:
203+
if self.warmup_window is None or self.time_frame is None:
184204
return index_date
185205

186206
return index_date - \
187-
(self.window_size * timedelta(
207+
(self.warmup_window * timedelta(
188208
minutes=self.time_frame.amount_of_minutes
189209
))
190210

@@ -216,7 +236,7 @@ def get_number_of_required_data_points(
216236
total_minutes = delta.total_seconds() / 60
217237
data_points = total_minutes / self.time_frame.amount_of_minutes
218238

219-
if self.window_size is not None:
220-
data_points += self.window_size
239+
if self.warmup_window is not None:
240+
data_points += self.warmup_window
221241

222242
return int(data_points)

investing_algorithm_framework/download_data.py

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,8 @@ def download(
7373
data_type: Union[str, DataType] = DataType.OHLCV,
7474
start_date: Union[datetime, str] = None,
7575
end_date: Union[datetime, str] = None,
76-
window_size: int = 200,
76+
warmup_window: int = 200,
77+
window_size: int = None, # Deprecated: use warmup_window instead
7778
pandas: bool = True,
7879
save: bool = True,
7980
storage_path: Union[str, Path] = None,
@@ -91,20 +92,32 @@ def download(
9192
download (e.g., "ohlcv", "ticker").
9293
start_date (str): The start date for the data download.
9394
end_date (str): The end date for the data download.
94-
window_size (int): The size of the data window.
95+
warmup_window (int): The size of the warmup window.
96+
window_size (int): Deprecated. Use warmup_window instead.
97+
Will be removed in release 0.8.0.
9598
pandas (bool): Whether to return the data as a pandas DataFrame.
9699
save (bool): Whether to save the downloaded data.
97100
storage_path (str): The directory to save the downloaded data.
98101
save_to (str): The path to save the downloaded data. If provided,
99102
it overrides storage_path.
100103
time_frame (str): The time frame for the data download.
101104
date (str): The date for the data download.
102-
window_size (int): The size of the data window.
103-
pandas (bool): Whether to return the data as a pandas DataFrame.
104105
105106
Returns:
106107
None
107108
"""
109+
import warnings
110+
111+
# Handle backward compatibility for window_size
112+
if window_size is not None:
113+
warnings.warn(
114+
"The 'window_size' parameter is deprecated and will be "
115+
"removed in release 0.8.0. Please use 'warmup_window' instead.",
116+
DeprecationWarning,
117+
stacklevel=2
118+
)
119+
if warmup_window == 200: # Default value, use window_size
120+
warmup_window = window_size
108121
configuration_service = ConfigurationService()
109122
market_credential_service = MarketCredentialService()
110123
data_provider_service = DataProviderService(
@@ -148,7 +161,7 @@ def download(
148161
date=date,
149162
start_date=start_date,
150163
end_date=end_date,
151-
window_size=window_size,
164+
warmup_window=warmup_window,
152165
pandas=pandas,
153166
save=save,
154167
storage_path=storage_path
@@ -172,7 +185,8 @@ def download_v2(
172185
data_type: Union[str, DataType] = DataType.OHLCV,
173186
start_date: Union[datetime, str] = None,
174187
end_date: Union[datetime, str] = None,
175-
window_size: int = 200,
188+
warmup_window: int = 200,
189+
window_size: int = None, # Deprecated: use warmup_window instead
176190
pandas: bool = True,
177191
save: bool = True,
178192
storage_path: Union[str, Path] = None,
@@ -191,14 +205,29 @@ def download_v2(
191205
start_date: Start date for data range
192206
end_date: End date for data range
193207
date: Specific date for data download
194-
window_size: Size of the data window
208+
warmup_window: Size of the warmup window
209+
window_size: Deprecated. Use warmup_window instead.
210+
Will be removed in release 0.8.0.
195211
pandas: Whether to return the data as a pandas DataFrame
196212
save: Whether to save the data to disk
197213
storage_path: Base directory for storing files
198214
199215
Returns:
200216
DownloadResult with .data (DataFrame) and .path (Path or None)
201217
"""
218+
import warnings
219+
220+
# Handle backward compatibility for window_size
221+
if window_size is not None:
222+
warnings.warn(
223+
"The 'window_size' parameter is deprecated and will be "
224+
"removed in release 0.8.0. Please use 'warmup_window' instead.",
225+
DeprecationWarning,
226+
stacklevel=2
227+
)
228+
if warmup_window == 200: # Default value, use window_size
229+
warmup_window = window_size
230+
202231
# Parse dates if they are strings
203232
parsed_start_date = start_date
204233
parsed_end_date = end_date
@@ -220,7 +249,7 @@ def download_v2(
220249
data_type=data_type,
221250
start_date=start_date,
222251
end_date=end_date,
223-
window_size=window_size,
252+
warmup_window=warmup_window,
224253
pandas=pandas,
225254
save=save,
226255
storage_path=storage_path,

0 commit comments

Comments
 (0)