feat: add per-strategy progress tracking for parallel backtests

MDUYN · MDUYN · commit a6f4bff77582 · 2026-04-27T18:11:55.000+02:00
Replace batch-level progress bar with strategy-level progress tracking
when using parallel workers (n_workers). Workers now increment a shared
counter after each individual strategy completes, and a monitoring thread
updates a tqdm progress bar in real time (every 500ms).

Changes:
- Use multiprocessing.Manager().Value() as shared counter across workers
- Add monitoring thread to poll counter and update tqdm bar
- Show per-strategy throughput (strategies/s) and ETA instead of
  batch-level progress
- Move multiprocessing, threading, concurrent.futures imports to top level
- Order all imports per PEP 8 (stdlib, third-party, local)
- Remove redundant inline combine_backtests import
diff --git a/investing_algorithm_framework/infrastructure/services/backtesting/backtest_service.py b/investing_algorithm_framework/infrastructure/services/backtesting/backtest_service.py
@@ -1,14 +1,18 @@
 import gc
 import json
 import logging
+import multiprocessing
 import os
-import numpy as np
-import pandas as pd
-import polars as pl
+import threading
 from collections import defaultdict
+from concurrent.futures import ProcessPoolExecutor, as_completed
 from datetime import datetime, timedelta, timezone
 from pathlib import Path
-from typing import Dict, List, Union, Optional, Callable
+from typing import Callable, Dict, List, Optional, Union
+
+import numpy as np
+import pandas as pd
+import polars as pl
 
 from investing_algorithm_framework.domain import BacktestRun, TimeUnit, \
     OperationalException, BacktestDateRange, Backtest, combine_backtests, \
@@ -902,10 +906,6 @@ def run_vector_backtests(
 
                 if use_parallel:
                     # Parallel processing of backtests (batches per worker)
-                    import multiprocessing
-                    from concurrent.futures import \
-                        ProcessPoolExecutor, as_completed
-
                     # Determine number of workers
                     if n_workers == -1:
                         n_workers = multiprocessing.cpu_count()
@@ -933,6 +933,12 @@ def run_vector_backtests(
                             show_progress
                         )
 
+                    # Shared counter for strategy-level progress
+                    # across all workers. Use Manager so the proxy
+                    # object can be pickled by ProcessPoolExecutor.
+                    manager = multiprocessing.Manager()
+                    progress_counter = manager.Value('i', 0)
+
                     worker_args = []
 
                     for batch in strategy_batches:
@@ -945,9 +951,34 @@ def run_vector_backtests(
                             continue_on_error,
                             self._data_provider_service.copy(),
                             False,
-                            dynamic_position_sizing
+                            dynamic_position_sizing,
+                            progress_counter,
                         ))
 
+                    # Start a monitoring thread that updates a
+                    # strategy-level progress bar in real time
+                    total_strategies = len(strategies_to_run)
+                    pbar = tqdm(
+                        total=total_strategies,
+                        colour="green",
+                        desc="Running backtests for "
+                             f"{start_date} to {end_date}",
+                        disable=not show_progress,
+                        unit="strategy",
+                    )
+                    stop_event = threading.Event()
+
+                    def _monitor_progress():
+                        while not stop_event.is_set():
+                            pbar.n = progress_counter.value
+                            pbar.refresh()
+                            stop_event.wait(0.5)
+
+                    monitor = threading.Thread(
+                        target=_monitor_progress, daemon=True
+                    )
+                    monitor.start()
+
                     # Execute batches in parallel
                     with ProcessPoolExecutor(max_workers=n_workers) as ex:
                         # Submit all batch tasks
@@ -961,15 +992,8 @@ def run_vector_backtests(
                         # Track completed batches for periodic cleanup
                         completed_count = 0
 
-                        # Collect results with progress bar
-                        for future in tqdm(
-                            as_completed(futures),
-                            total=len(futures),
-                            colour="green",
-                            desc="Running backtests for "
-                                 f"{start_date} to {end_date}",
-                            disable=not show_progress
-                        ):
+                        # Collect results as batches complete
+                        for future in as_completed(futures):
                             try:
                                 batch_result = future.result()
                                 if batch_result:
@@ -1006,6 +1030,15 @@ def run_vector_backtests(
                                 else:
                                     raise
 
+                    # Stop the monitoring thread and finalise
+                    # the progress bar
+                    stop_event.set()
+                    monitor.join()
+                    pbar.n = progress_counter.value
+                    pbar.refresh()
+                    pbar.close()
+                    manager.shutdown()
+
                     # Save remaining batch and create checkpoint files when
                     # storage directory provided
                     if backtest_storage_directory is not None:
@@ -1309,8 +1342,6 @@ def run_vector_backtests(
                     combined_backtests.append(backtests_list[0])
                 else:
                     # Combine multiple backtests for the same algorithm
-                    from investing_algorithm_framework.domain import (
-                        combine_backtests)
                     combined = combine_backtests(backtests_list)
                     combined_backtests.append(combined)
 
@@ -1709,23 +1740,40 @@ def _run_batch_backtest_worker(args):
                 continue_on_error,
                 data_provider_service,
                 show_progress,
-                dynamic_position_sizing
+                dynamic_position_sizing,
+                progress_counter (optional),
             )
 
         Returns:
             List[Backtest]: List of completed backtest results
         """
-        (
-            strategy_batch,
-            backtest_date_range,
-            portfolio_configuration,
-            snapshot_interval,
-            risk_free_rate,
-            continue_on_error,
-            data_provider_service,
-            show_progress,
-            dynamic_position_sizing
-        ) = args
+        # Support both old (9-element) and new (10-element) tuple
+        if len(args) == 10:
+            (
+                strategy_batch,
+                backtest_date_range,
+                portfolio_configuration,
+                snapshot_interval,
+                risk_free_rate,
+                continue_on_error,
+                data_provider_service,
+                show_progress,
+                dynamic_position_sizing,
+                progress_counter,
+            ) = args
+        else:
+            (
+                strategy_batch,
+                backtest_date_range,
+                portfolio_configuration,
+                snapshot_interval,
+                risk_free_rate,
+                continue_on_error,
+                data_provider_service,
+                show_progress,
+                dynamic_position_sizing,
+            ) = args
+            progress_counter = None
 
         vector_backtest_service = VectorBacktestService(
             data_provider_service=data_provider_service
@@ -1768,12 +1816,21 @@ def _run_batch_backtest_worker(args):
                 )
                 batch_results.append(backtest)
 
+                # Increment shared progress counter so the
+                # main process can track per-strategy progress
+                if progress_counter is not None:
+                    progress_counter.value += 1
+
             except Exception as e:
                 if continue_on_error:
                     logger.error(
                         "Worker error for strategy "
                         f"{strategy.algorithm_id}: {e}"
                     )
+                    # Still increment counter for failed strategies
+                    # so progress total stays accurate
+                    if progress_counter is not None:
+                        progress_counter.value += 1
                     continue
                 else:
                     raise
diff --git a/tests/notebook/__init__.py b/tests/notebook/__init__.py
@@ -1 +0,0 @@
-