Skip to content

Commit 0c56940

Browse files
committed
feat(backtesting): single-bundle binary persistence format (#487)
Introduce .iafbt bundle format (zstd + msgpack) and parallel I/O for backtest persistence, addressing the linear-scaling pain points in issue #487. Changes: - New domain/backtesting/bundle.py with save_bundle/open_bundle and IAFB magic + format-version header. - from_dict() classmethods on Backtest, BacktestRun, BacktestMetrics, BacktestSummaryMetrics, BacktestPermutationTest. open() delegates. - Backtest.save() / open() auto-detect .iafbt files (and sibling bundles) for full backward compatibility. - save_backtests_to_directory() / load_backtests_from_directory() now default to bundle format with ProcessPoolExecutor parallelism. - New BacktestIndex helper backed by index.parquet for filtering large batches without opening any bundles. - migrate_backtests() API + 'iaf migrate-backtests' CLI command. - Optional parallel workers= flag on recalculate_backtests(). - 9 new tests in tests/domain/backtests/test_bundle.py. Benchmarked on a real 6-strategy batch (11 runs each): size : bundle 21x smaller than legacy directory format files : 27x fewer files per backtest save : 3.6x faster at N=200 load : 2.4x faster at N=200 Full suite: 1642 tests pass (1633 existing + 9 new).
1 parent 0d0e99c commit 0c56940

16 files changed

Lines changed: 3185 additions & 1949 deletions

File tree

investing_algorithm_framework/cli/cli.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,3 +250,47 @@ def mcp(directory):
250250

251251

252252
cli.add_command(mcp)
253+
254+
255+
@click.command(name="migrate-backtests")
256+
@click.option(
257+
"--src", "-s",
258+
required=True,
259+
type=click.Path(exists=True, file_okay=False, dir_okay=True),
260+
help="Source directory containing legacy backtest sub-directories.",
261+
)
262+
@click.option(
263+
"--dst", "-d",
264+
required=True,
265+
type=click.Path(file_okay=False, dir_okay=True),
266+
help="Destination directory for the new ``.iafbt`` bundle files.",
267+
)
268+
@click.option(
269+
"--workers", "-w", type=int, default=None,
270+
help="Number of parallel workers (default: min(8, CPU count)).",
271+
)
272+
@click.option(
273+
"--no-index", is_flag=True, default=False,
274+
help="Skip writing index.parquet at the destination.",
275+
)
276+
def migrate_backtests_cmd(src, dst, workers, no_index):
277+
"""Convert a directory of legacy backtest folders into the bundled
278+
binary format introduced in issue #487.
279+
280+
The new ``.iafbt`` format is a single zstd-compressed MessagePack
281+
file per backtest. Loading bundled directories is dramatically
282+
faster than the legacy multi-file layout for large batches.
283+
"""
284+
from investing_algorithm_framework.domain import migrate_backtests
285+
286+
n = migrate_backtests(
287+
src,
288+
dst,
289+
workers=workers,
290+
show_progress=True,
291+
write_index=not no_index,
292+
)
293+
click.echo(f"Migrated {n} backtest(s) from {src} to {dst}")
294+
295+
296+
cli.add_command(migrate_backtests_cmd)

investing_algorithm_framework/domain/__init__.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,10 @@
4444
BacktestDateRange, Backtest, BacktestMetrics, combine_backtests, \
4545
BacktestPermutationTest, BacktestEvaluationFocus, \
4646
generate_backtest_summary_metrics, load_backtests_from_directory, \
47-
save_backtests_to_directory, retag_backtests
47+
save_backtests_to_directory, retag_backtests, migrate_backtests, \
48+
BacktestIndex, save_bundle, open_bundle, BUNDLE_EXT, \
49+
BUNDLE_FORMAT_VERSION
50+
from .backtesting.backtest_utils import resolve_backtest_path
4851
from .algorithm_id import generate_algorithm_id
4952
from .pipeline import (
5053
Pipeline,
@@ -173,6 +176,13 @@
173176
"load_backtests_from_directory",
174177
"save_backtests_to_directory",
175178
"retag_backtests",
179+
"migrate_backtests",
180+
"resolve_backtest_path",
181+
"BacktestIndex",
182+
"save_bundle",
183+
"open_bundle",
184+
"BUNDLE_EXT",
185+
"BUNDLE_FORMAT_VERSION",
176186
"generate_algorithm_id",
177187
# Pipeline API (Phase 1, see docs/design/pipeline-api.md)
178188
"Pipeline",

investing_algorithm_framework/domain/backtesting/__init__.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,19 @@
77
from .backtest_evaluation_focuss import BacktestEvaluationFocus
88
from .combine_backtests import combine_backtests, \
99
generate_backtest_summary_metrics
10-
from .backtest_utils import load_backtests_from_directory, \
11-
save_backtests_to_directory, retag_backtests
10+
from .backtest_utils import (
11+
load_backtests_from_directory,
12+
save_backtests_to_directory,
13+
retag_backtests,
14+
migrate_backtests,
15+
BacktestIndex,
16+
)
17+
from .bundle import (
18+
save_bundle,
19+
open_bundle,
20+
BUNDLE_EXT,
21+
BUNDLE_FORMAT_VERSION,
22+
)
1223

1324
__all__ = [
1425
"Backtest",
@@ -18,9 +29,15 @@
1829
"BacktestRun",
1930
"BacktestPermutationTest",
2031
"BacktestEvaluationFocus",
32+
"BacktestIndex",
2133
"combine_backtests",
2234
"generate_backtest_summary_metrics",
2335
"load_backtests_from_directory",
2436
"save_backtests_to_directory",
2537
"retag_backtests",
38+
"migrate_backtests",
39+
"save_bundle",
40+
"open_bundle",
41+
"BUNDLE_EXT",
42+
"BUNDLE_FORMAT_VERSION",
2643
]

investing_algorithm_framework/domain/backtesting/backtest.py

Lines changed: 125 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,11 @@ class Backtest:
5757
strategy_ids: List[int] = field(default_factory=list)
5858
parameters: Dict = field(default_factory=dict)
5959
tag: str = None
60+
# OHLCV payload optionally attached for save_bundle(include_ohlcv=True).
61+
# Keys are conventionally "<symbol>@<timeframe>" (e.g.
62+
# "BTC/EUR@1h"), values are pandas DataFrames or any object that
63+
# has ``to_pandas()``. See issue #487.
64+
ohlcv: Dict[str, object] = field(default_factory=dict, repr=False)
6065

6166
def get_all_backtest_runs(
6267
self, backtest_date_ranges=None
@@ -194,17 +199,58 @@ def to_dict(self) -> dict:
194199
"tag": self.tag,
195200
}
196201

202+
@classmethod
203+
def from_dict(cls, data: dict) -> 'Backtest':
204+
"""
205+
Reconstruct a ``Backtest`` from a plain dict produced by
206+
:py:meth:`to_dict`. Used by the binary bundle loader (#487).
207+
208+
Permutation-test entries that were serialized via
209+
:py:meth:`BacktestPermutationTest.to_dict` are accepted, but the
210+
``permutated_dataframes`` field of those tests cannot round-trip
211+
through this path and is left empty (the directory loader has
212+
the same limitation).
213+
"""
214+
if data is None:
215+
return None
216+
217+
runs = []
218+
for r in data.get("backtest_runs") or []:
219+
runs.append(BacktestRun.from_dict(r))
220+
221+
summary_dict = data.get("backtest_summary")
222+
summary = (
223+
BacktestSummaryMetrics.from_dict(summary_dict)
224+
if summary_dict else None
225+
)
226+
227+
perm_tests = []
228+
for pt in data.get("backtest_permutation_tests") or []:
229+
perm_tests.append(BacktestPermutationTest.from_dict(pt))
230+
231+
return cls(
232+
algorithm_id=data.get("algorithm_id"),
233+
backtest_runs=runs,
234+
backtest_summary=summary,
235+
backtest_permutation_tests=perm_tests,
236+
metadata=data.get("metadata") or {},
237+
risk_free_rate=data.get("risk_free_rate"),
238+
strategy_ids=data.get("strategy_ids") or [],
239+
parameters=data.get("parameters") or {},
240+
tag=data.get("tag"),
241+
)
242+
197243
@staticmethod
198244
def open(
199245
directory_path: Union[str, Path],
200246
backtest_date_ranges: List[BacktestDateRange] = None,
201247
) -> 'Backtest':
202248
"""
203-
Open a backtest report from a directory and return a Backtest instance.
249+
Open a backtest report from a directory **or** a ``.iafbt``
250+
bundle file (issue #487) and return a :class:`Backtest`.
204251
205252
Args:
206-
directory_path (str): The path to the directory containing the
207-
backtest report files.
253+
directory_path: Path to the backtest directory or bundle file.
208254
backtest_date_ranges (List[BacktestDateRange], optional): A list of
209255
date ranges to filter the backtest runs. If provided, only
210256
backtest runs matching these date ranges will be loaded.
@@ -217,6 +263,34 @@ def open(
217263
OperationalException: If the directory does not exist or if
218264
there is an error loading the files.
219265
"""
266+
path_str = str(directory_path)
267+
268+
# If the path is a bundle file (or a regular file ending in the
269+
# bundle extension), load via the bundle reader.
270+
if os.path.isfile(path_str):
271+
from .bundle import BUNDLE_EXT, is_bundle_file, open_bundle
272+
if path_str.endswith(BUNDLE_EXT) or is_bundle_file(path_str):
273+
bt = open_bundle(path_str)
274+
if backtest_date_ranges is not None:
275+
bt.backtest_runs = bt.get_all_backtest_runs(
276+
backtest_date_ranges
277+
)
278+
return bt
279+
280+
# Fallback: caller passed a path without extension but a sibling
281+
# bundle file exists (e.g. session_cache stores
282+
# "<storage>/<algorithm_id>" while the new default save format
283+
# writes "<storage>/<algorithm_id>.iafbt").
284+
if not os.path.exists(path_str):
285+
from .bundle import BUNDLE_EXT, open_bundle
286+
candidate = path_str + BUNDLE_EXT
287+
if os.path.isfile(candidate):
288+
bt = open_bundle(candidate)
289+
if backtest_date_ranges is not None:
290+
bt.backtest_runs = bt.get_all_backtest_runs(
291+
backtest_date_ranges
292+
)
293+
return bt
220294
algorithm_id = None
221295
backtest_runs = []
222296
backtest_summary_metrics = None
@@ -399,6 +473,23 @@ def save(
399473
None: This method does not return anything, it saves the
400474
metrics to a file.
401475
"""
476+
# Bundle-format dispatch (issue #487):
477+
# * If the caller passed a path ending in ``.iafbt``, save as
478+
# a bundle file.
479+
# * If the caller passed a base path (no extension) and a
480+
# sibling ``<path>.iafbt`` exists, replace it in place.
481+
# * Otherwise fall through to the legacy directory format
482+
# below (preserved for backward compatibility).
483+
from .bundle import BUNDLE_EXT, save_bundle as _save_bundle
484+
path_str = str(directory_path)
485+
if path_str.endswith(BUNDLE_EXT):
486+
_save_bundle(self, path_str)
487+
return
488+
sibling = path_str + BUNDLE_EXT
489+
if os.path.isfile(sibling):
490+
_save_bundle(self, sibling)
491+
return
492+
402493
if not os.path.exists(directory_path):
403494
os.makedirs(directory_path)
404495

@@ -523,6 +614,37 @@ def __repr__(self):
523614
self.to_dict(), indent=4, sort_keys=True, default=str
524615
)
525616

617+
def save_bundle(
618+
self,
619+
path: Union[str, Path],
620+
*,
621+
include_ohlcv: bool = False,
622+
ohlcv_store: Union[str, Path, None] = None,
623+
) -> Path:
624+
"""Persist this backtest as a single ``.iafbt`` bundle.
625+
626+
See :py:func:`investing_algorithm_framework.domain.backtesting.
627+
bundle.save_bundle` for details. This is a thin convenience
628+
wrapper.
629+
"""
630+
from .bundle import save_bundle as _save_bundle
631+
return _save_bundle(
632+
self,
633+
path,
634+
include_ohlcv=include_ohlcv,
635+
ohlcv_store=ohlcv_store,
636+
)
637+
638+
@staticmethod
639+
def open_bundle(
640+
path: Union[str, Path],
641+
*,
642+
ohlcv_store: Union[str, Path, None] = None,
643+
) -> 'Backtest':
644+
"""Load a :class:`Backtest` from a ``.iafbt`` bundle file."""
645+
from .bundle import open_bundle as _open_bundle
646+
return _open_bundle(path, ohlcv_store=ohlcv_store)
647+
526648
def merge(self, other: 'Backtest') -> 'Backtest':
527649
"""
528650
Function to merge another Backtest instance into this one.

investing_algorithm_framework/domain/backtesting/backtest_metrics.py

Lines changed: 47 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import os
22
from pathlib import Path
3-
from dataclasses import dataclass, field
3+
from dataclasses import dataclass, field, fields
44
from logging import getLogger
55
from typing import Tuple, List, Dict
66
from datetime import datetime, date
@@ -448,6 +448,21 @@ def open(file_path: str | Path) -> 'BacktestMetrics':
448448
with open(file_path, 'r') as file:
449449
data = json.load(file)
450450

451+
return BacktestMetrics.from_dict(data)
452+
453+
@classmethod
454+
def from_dict(cls, data: dict) -> 'BacktestMetrics':
455+
"""
456+
Reconstruct a ``BacktestMetrics`` from a plain dict (the inverse
457+
of :py:meth:`to_dict`). Used by the JSON loader and by the
458+
binary bundle loader (issue #487).
459+
"""
460+
if data is None:
461+
return None
462+
463+
# Work on a shallow copy to avoid mutating the caller's dict
464+
data = dict(data)
465+
451466
# Parse datetime fields
452467
data['backtest_start_date'] = datetime.fromisoformat(
453468
data['backtest_start_date']
@@ -457,37 +472,50 @@ def open(file_path: str | Path) -> 'BacktestMetrics':
457472
)
458473

459474
# Parse tuple lists with datetime
460-
data['equity_curve'] = BacktestMetrics._parse_tuple_list_datetime(
475+
data['equity_curve'] = cls._parse_tuple_list_datetime(
461476
data.get('equity_curve', [])
462477
)
463-
data['rolling_sharpe_ratio'] = BacktestMetrics\
464-
._parse_tuple_list_datetime(data.get('rolling_sharpe_ratio', []))
465-
data['monthly_returns'] = BacktestMetrics\
466-
._parse_tuple_list_datetime(data.get('monthly_returns', []))
467-
data['drawdown_series'] = BacktestMetrics\
468-
._parse_tuple_list_datetime(data.get('drawdown_series', []))
478+
data['rolling_sharpe_ratio'] = cls._parse_tuple_list_datetime(
479+
data.get('rolling_sharpe_ratio', [])
480+
)
481+
data['monthly_returns'] = cls._parse_tuple_list_datetime(
482+
data.get('monthly_returns', [])
483+
)
484+
data['drawdown_series'] = cls._parse_tuple_list_datetime(
485+
data.get('drawdown_series', [])
486+
)
487+
data['cumulative_return_series'] = cls._parse_tuple_list_datetime(
488+
data.get('cumulative_return_series', [])
489+
)
469490

470491
# Parse tuple lists with date
471-
data['yearly_returns'] = BacktestMetrics\
472-
._parse_tuple_list_date(data.get('yearly_returns', []))
492+
data['yearly_returns'] = cls._parse_tuple_list_date(
493+
data.get('yearly_returns', [])
494+
)
473495

474496
# Parse single tuples
475-
data['best_month'] = BacktestMetrics\
476-
._parse_tuple_datetime(data.get('best_month'))
477-
data['worst_month'] = BacktestMetrics\
478-
._parse_tuple_datetime(data.get('worst_month'))
479-
data['best_year'] = BacktestMetrics\
480-
._parse_tuple_date(data.get('best_year'))
481-
data['worst_year'] = BacktestMetrics\
482-
._parse_tuple_date(data.get('worst_year'))
497+
data['best_month'] = cls._parse_tuple_datetime(data.get('best_month'))
498+
data['worst_month'] = cls._parse_tuple_datetime(
499+
data.get('worst_month')
500+
)
501+
data['best_year'] = cls._parse_tuple_date(data.get('best_year'))
502+
data['worst_year'] = cls._parse_tuple_date(data.get('worst_year'))
483503

484504
# Parse Trade objects if they exist
485505
if data.get('best_trade'):
486506
data['best_trade'] = Trade.from_dict(data['best_trade'])
487507
if data.get('worst_trade'):
488508
data['worst_trade'] = Trade.from_dict(data['worst_trade'])
489509

490-
return BacktestMetrics(**data)
510+
# Drop fields computed in __post_init__ to avoid duplicate kwargs
511+
data.pop('total_number_of_days', None)
512+
513+
# Drop any unknown keys (forward-compat with new fields written
514+
# by a newer version)
515+
valid = {f.name for f in fields(cls)}
516+
data = {k: v for k, v in data.items() if k in valid}
517+
518+
return cls(**data)
491519

492520
def __repr__(self):
493521
"""

0 commit comments

Comments
 (0)