Skip to content

Commit 1fecaa5

Browse files
committed
feat: add consistency and stability metrics
- Add 8 new fields to BacktestSummaryMetrics (return/win_rate/sharpe consistency + stability, composite scores) - CV-based consistency: 1 - CV where CV = std/|mean|, scale-invariant - Normalized-std stability: 1 - std/max_std with domain-specific max values - Add consistency_score and stability_score weights to all 4 BacktestEvaluationFocus presets
1 parent da5565b commit 1fecaa5

3 files changed

Lines changed: 152 additions & 0 deletions

File tree

investing_algorithm_framework/domain/backtesting/backtest_evaluation_focuss.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,10 @@ def get_weights(self):
153153
# Efficiency metrics
154154
"exposure_ratio": 0.5,
155155
"trades_per_year": 0.3,
156+
157+
# Cross-window consistency
158+
"consistency_score": 1.5,
159+
"stability_score": 1.5,
156160
}
157161

158162
elif self == BacktestEvaluationFocus.PROFIT:
@@ -179,6 +183,10 @@ def get_weights(self):
179183
# Monthly/yearly consistency
180184
"percentage_winning_months": 0.8,
181185
"average_monthly_return": 1.0,
186+
187+
# Cross-window consistency
188+
"consistency_score": 1.0,
189+
"stability_score": 1.0,
182190
}
183191

184192
elif self == BacktestEvaluationFocus.FREQUENCY:
@@ -204,6 +212,10 @@ def get_weights(self):
204212

205213
# Duration efficiency
206214
"average_trade_duration": -0.3, # Prefer shorter trades
215+
216+
# Cross-window consistency
217+
"consistency_score": 1.0,
218+
"stability_score": 1.0,
207219
}
208220

209221
elif self == BacktestEvaluationFocus.RISK_ADJUSTED:
@@ -234,6 +246,10 @@ def get_weights(self):
234246
# Downside protection
235247
"average_trade_loss_percentage": -1.0,
236248
"percentage_negative_trades": -1.0,
249+
250+
# Cross-window consistency (critical for risk-adjusted)
251+
"consistency_score": 2.5,
252+
"stability_score": 2.5,
237253
}
238254

239255
# Fallback to balanced if unknown focus

investing_algorithm_framework/domain/backtesting/backtest_summary_metrics.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,14 @@ class BacktestSummaryMetrics:
107107
average_loss_duration: float = None
108108
max_consecutive_wins: int = None
109109
max_consecutive_losses: int = None
110+
return_consistency: float = None
111+
win_rate_consistency: float = None
112+
sharpe_consistency: float = None
113+
consistency_score: float = None
114+
return_stability: float = None
115+
win_rate_stability: float = None
116+
sharpe_stability: float = None
117+
stability_score: float = None
110118

111119
def to_dict(self) -> dict:
112120
"""
@@ -164,6 +172,14 @@ def to_dict(self) -> dict:
164172
"cvar_95": self.cvar_95,
165173
"max_consecutive_wins": self.max_consecutive_wins,
166174
"max_consecutive_losses": self.max_consecutive_losses,
175+
"return_consistency": self.return_consistency,
176+
"win_rate_consistency": self.win_rate_consistency,
177+
"sharpe_consistency": self.sharpe_consistency,
178+
"consistency_score": self.consistency_score,
179+
"return_stability": self.return_stability,
180+
"win_rate_stability": self.win_rate_stability,
181+
"sharpe_stability": self.sharpe_stability,
182+
"stability_score": self.stability_score,
167183
}
168184

169185
def save(self, file_path: str | Path) -> None:

investing_algorithm_framework/domain/backtesting/combine_backtests.py

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
import math
23
from typing import List
34

45
from .backtest_metrics import BacktestMetrics
@@ -439,6 +440,117 @@ def generate_backtest_summary_metrics(
439440
consecutive_losses
440441
) if consecutive_losses else None
441442

443+
# === CONSISTENCY METRICS ===
444+
# Two complementary approaches to measure cross-window stability.
445+
#
446+
# 1) CV-based consistency: 1 - CV (CV = std / |mean|), capped [0, 1].
447+
# Standard statistical measure; scale-invariant.
448+
# Drawback: undefined when mean ≈ 0.
449+
#
450+
# 2) Normalized-std stability: 1 - std/max_std, capped [0, 1].
451+
# Uses a domain-specific max_std for normalization.
452+
# More intuitive for bounded metrics (win rate 0-100,
453+
# Sharpe typically -2 to +4).
454+
455+
return_consistency = None
456+
win_rate_consistency = None
457+
sharpe_consistency = None
458+
consistency_score = None
459+
return_stability = None
460+
win_rate_stability = None
461+
sharpe_stability = None
462+
stability_score = None
463+
464+
def _cv_consistency(values):
465+
"""1 - CV capped to [0, 1], or None if insufficient data."""
466+
if len(values) < 2:
467+
return None
468+
mean = sum(values) / len(values)
469+
if abs(mean) < 1e-9:
470+
return 0.0 # mean ≈ 0 → unstable
471+
var = sum((x - mean) ** 2 for x in values) / (len(values) - 1)
472+
cv = math.sqrt(var) / abs(mean)
473+
return max(0.0, min(1.0, 1.0 - cv))
474+
475+
def _norm_stability(values, max_std):
476+
"""1 - std/max_std capped to [0, 1], or None if insufficient."""
477+
if len(values) < 2:
478+
return None
479+
mean = sum(values) / len(values)
480+
var = sum((x - mean) ** 2 for x in values) / (len(values) - 1)
481+
std = math.sqrt(var)
482+
return max(0.0, min(1.0, 1.0 - std / max_std))
483+
484+
if len(valid_metrics) >= 2:
485+
# --- Per-window returns ---
486+
per_window_returns = [
487+
b.total_net_gain_percentage for b in valid_metrics
488+
if b.total_net_gain_percentage is not None
489+
]
490+
return_consistency = _cv_consistency(per_window_returns)
491+
# max_std = 100: a std of 100% of initial capital → score 0
492+
return_stability = _norm_stability(per_window_returns, 100.0)
493+
494+
# --- Per-window win rates ---
495+
per_window_win_rates = [
496+
b.win_rate for b in valid_metrics
497+
if b.win_rate is not None
498+
and b.number_of_trades_closed is not None
499+
and b.number_of_trades_closed > 0
500+
]
501+
return_consistency = _cv_consistency(per_window_returns)
502+
win_rate_consistency = _cv_consistency(per_window_win_rates)
503+
# max_std = 50: theoretical max std for a [0, 100] range
504+
win_rate_stability = _norm_stability(per_window_win_rates, 50.0)
505+
506+
# --- Per-window Sharpe ratios ---
507+
per_window_sharpe = [
508+
b.sharpe_ratio for b in valid_metrics
509+
if b.sharpe_ratio is not None
510+
and not math.isnan(b.sharpe_ratio)
511+
and not math.isinf(b.sharpe_ratio)
512+
]
513+
sharpe_consistency = _cv_consistency(per_window_sharpe)
514+
# max_std = 2: Sharpe ratios typically range -2 to +4;
515+
# a std of 2 means wildly inconsistent
516+
sharpe_stability = _norm_stability(per_window_sharpe, 2.0)
517+
518+
# --- Composite scores ---
519+
# Both use the same weighting scheme:
520+
# 35% returns, 25% win rate, 20% Sharpe, 20% profitable
521+
# window ratio.
522+
def _composite(ret_c, wr_c, sh_c):
523+
components = []
524+
weights_c = []
525+
if ret_c is not None:
526+
components.append(ret_c)
527+
weights_c.append(0.35)
528+
if wr_c is not None:
529+
components.append(wr_c)
530+
weights_c.append(0.25)
531+
if sh_c is not None:
532+
components.append(sh_c)
533+
weights_c.append(0.20)
534+
if number_of_windows and number_of_windows > 0:
535+
pw_ratio = (
536+
number_of_profitable_windows / number_of_windows
537+
)
538+
components.append(pw_ratio)
539+
weights_c.append(0.20)
540+
if not components:
541+
return None
542+
total_w = sum(weights_c)
543+
return sum(
544+
c * w for c, w in zip(components, weights_c)
545+
) / total_w
546+
547+
consistency_score = _composite(
548+
return_consistency, win_rate_consistency, sharpe_consistency
549+
)
550+
stability_score = _composite(
551+
return_stability, win_rate_stability, sharpe_stability
552+
)
553+
442554
return BacktestSummaryMetrics(
443555
total_net_gain=total_net_gain,
444556
total_net_gain_percentage=total_net_gain_percentage,
@@ -487,4 +599,12 @@ def generate_backtest_summary_metrics(
487599
average_loss_duration=average_loss_duration,
488600
max_consecutive_wins=max_consecutive_wins,
489601
max_consecutive_losses=max_consecutive_losses,
602+
return_consistency=return_consistency,
603+
win_rate_consistency=win_rate_consistency,
604+
sharpe_consistency=sharpe_consistency,
605+
consistency_score=consistency_score,
606+
return_stability=return_stability,
607+
win_rate_stability=win_rate_stability,
608+
sharpe_stability=sharpe_stability,
609+
stability_score=stability_score,
490610
)

0 commit comments

Comments
 (0)