Skip to content

Commit 60cd09a

Browse files
committed
feat(backtest): make checkpoints content-aware + add force_rerun (#276)
Closes #276 - New checkpoint_manifest module computes a manifest_hash from each strategy's source code, instance parameters, data sources, and backtest date range. - BacktestService.run_vector_backtests and run_backtests now compute manifest hashes for active strategies/algorithms and persist them in checkpoints.json. Strategies whose hash differs from the stored hash are rerun automatically; matching ones are still skipped. - Both methods accept two new explicit, non-interactive override knobs: * force_rerun: bool | 'stale' = False * on_checkpoint_match: 'skip' | 'rerun' | 'warn' = 'skip' These are wired through App.run_vector_backtests and App.run_backtests. - checkpoints.json now supports both legacy list and dict shapes. Old files keep working (hash treated as None = legacy behaviour); new runs migrate the entry to dict {algorithm_id: manifest_hash}. - Adds 22 new unit tests covering hash determinism, parameter changes, date-range changes, legacy/new entry handling, force_rerun=True, on_checkpoint_match='rerun', and dict-format persistence.
1 parent 44c41e4 commit 60cd09a

4 files changed

Lines changed: 912 additions & 80 deletions

File tree

investing_algorithm_framework/app/app.py

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import threading
55
from datetime import datetime, timezone, timedelta
66
from pathlib import Path
7-
from typing import List, Optional, Any, Dict, Tuple, Callable, Union
7+
from typing import List, Literal, Optional, Any, Dict, Tuple, Callable, Union
88

99
from flask import Flask
1010

@@ -985,6 +985,8 @@ def run_vector_backtests(
985985
] = None,
986986
backtest_storage_directory: Optional[Union[str, Path]] = None,
987987
use_checkpoints: bool = False,
988+
force_rerun: Union[bool, Literal["stale"]] = False,
989+
on_checkpoint_match: Literal["skip", "rerun", "warn"] = "skip",
988990
batch_size: int = 50,
989991
checkpoint_batch_size: int = 25,
990992
n_workers: Optional[int] = None,
@@ -1087,7 +1089,24 @@ def filter_function(
10871089
instead of running a new backtest. This is useful for
10881090
long-running backtests that might take a while to complete.
10891091
When enabled, uses the optimized version with batching and
1090-
optional parallel processing.
1092+
optional parallel processing. Checkpoints are content-aware:
1093+
each entry stores a manifest hash that fingerprints the
1094+
strategy code, parameters, data sources, and date range.
1095+
Strategies whose hash differs from the stored hash are
1096+
rerun automatically.
1097+
force_rerun (Union[bool, Literal["stale"]]): Override checkpoint
1098+
skipping behaviour.
1099+
1100+
- False (default): Skip strategies with a matching checkpoint.
1101+
- "stale": Rerun only strategies whose stored checkpoint hash
1102+
differs from the current manifest hash.
1103+
- True: Ignore checkpoints entirely and rerun all strategies.
1104+
on_checkpoint_match (Literal["skip", "rerun", "warn"]): Behaviour
1105+
when a strategy's checkpoint matches.
1106+
1107+
- "skip" (default): Silently skip the strategy.
1108+
- "warn": Skip but emit a single log line per match batch.
1109+
- "rerun": Rerun the strategy anyway.
10911110
batch_size (int): Number of strategies to process in each batch
10921111
before memory cleanup. Only used when use_checkpoints=True.
10931112
Default: 100. Higher values use more memory but may be faster.
@@ -1204,6 +1223,8 @@ def filter_function(
12041223
checkpoint_batch_size=checkpoint_batch_size,
12051224
n_workers=n_workers,
12061225
use_checkpoints=use_checkpoints,
1226+
force_rerun=force_rerun,
1227+
on_checkpoint_match=on_checkpoint_match,
12071228
dynamic_position_sizing=dynamic_position_sizing,
12081229
fill_missing_data=fill_missing_data,
12091230
iterative_summary_update=iterative_summary_update,
@@ -1381,6 +1402,8 @@ def run_backtests(
13811402
metadata: Optional[Dict[str, str]] = None,
13821403
backtest_storage_directory: Optional[Union[str, Path]] = None,
13831404
use_checkpoints: bool = False,
1405+
force_rerun: Union[bool, Literal["stale"]] = False,
1406+
on_checkpoint_match: Literal["skip", "rerun", "warn"] = "skip",
13841407
show_progress: bool = False,
13851408
continue_on_error: bool = False,
13861409
window_filter_function: Optional[Callable] = None,
@@ -1420,7 +1443,16 @@ def run_backtests(
14201443
backtest_storage_directory (Union[str, Path]): Directory to save
14211444
backtests to.
14221445
use_checkpoints (bool): Whether to use checkpointing to resume
1423-
interrupted backtests.
1446+
interrupted backtests. Checkpoints are content-aware:
1447+
strategies whose code or parameters have changed since the
1448+
last run are detected and rerun automatically.
1449+
force_rerun (Union[bool, Literal["stale"]]): Override checkpoint
1450+
skipping behaviour. ``False`` (default) skips matched
1451+
checkpoints, ``"stale"`` reruns only mismatched ones, and
1452+
``True`` reruns everything.
1453+
on_checkpoint_match (Literal["skip", "rerun", "warn"]): Behaviour
1454+
on a matching checkpoint. ``"skip"`` (default) silently
1455+
skips, ``"warn"`` skips and logs, ``"rerun"`` reruns anyway.
14241456
show_progress (bool): Whether to show progress bars.
14251457
continue_on_error (bool): Whether to continue on errors.
14261458
window_filter_function: Filter function applied after each
@@ -1533,6 +1565,8 @@ def run_backtests(
15331565
final_filter_function=final_filter_function,
15341566
backtest_storage_directory=backtest_storage_directory,
15351567
use_checkpoints=use_checkpoints,
1568+
force_rerun=force_rerun,
1569+
on_checkpoint_match=on_checkpoint_match,
15361570
batch_size=batch_size,
15371571
checkpoint_batch_size=checkpoint_batch_size,
15381572
fill_missing_data=fill_missing_data,

0 commit comments

Comments
 (0)