|
1 | 1 | import logging |
| 2 | +import math |
2 | 3 | from typing import List |
3 | 4 |
|
4 | 5 | from .backtest_metrics import BacktestMetrics |
@@ -439,6 +440,117 @@ def generate_backtest_summary_metrics( |
439 | 440 | consecutive_losses |
440 | 441 | ) if consecutive_losses else None |
441 | 442 |
|
| 443 | + # === CONSISTENCY METRICS === |
| 444 | + # Two complementary approaches to measure cross-window stability. |
| 445 | + # |
| 446 | + # 1) CV-based consistency: 1 - CV (CV = std / |mean|), capped [0, 1]. |
| 447 | + # Standard statistical measure; scale-invariant. |
| 448 | + # Drawback: undefined when mean ≈ 0. |
| 449 | + # |
| 450 | + # 2) Normalized-std stability: 1 - std/max_std, capped [0, 1]. |
| 451 | + # Uses a domain-specific max_std for normalization. |
| 452 | + # More intuitive for bounded metrics (win rate 0-100, |
| 453 | + # Sharpe typically -2 to +4). |
| 454 | + |
| 455 | + return_consistency = None |
| 456 | + win_rate_consistency = None |
| 457 | + sharpe_consistency = None |
| 458 | + consistency_score = None |
| 459 | + return_stability = None |
| 460 | + win_rate_stability = None |
| 461 | + sharpe_stability = None |
| 462 | + stability_score = None |
| 463 | + |
| 464 | + def _cv_consistency(values): |
| 465 | + """1 - CV capped to [0, 1], or None if insufficient data.""" |
| 466 | + if len(values) < 2: |
| 467 | + return None |
| 468 | + mean = sum(values) / len(values) |
| 469 | + if abs(mean) < 1e-9: |
| 470 | + return 0.0 # mean ≈ 0 → unstable |
| 471 | + var = sum((x - mean) ** 2 for x in values) / (len(values) - 1) |
| 472 | + cv = math.sqrt(var) / abs(mean) |
| 473 | + return max(0.0, min(1.0, 1.0 - cv)) |
| 474 | + |
| 475 | + def _norm_stability(values, max_std): |
| 476 | + """1 - std/max_std capped to [0, 1], or None if insufficient.""" |
| 477 | + if len(values) < 2: |
| 478 | + return None |
| 479 | + mean = sum(values) / len(values) |
| 480 | + var = sum((x - mean) ** 2 for x in values) / (len(values) - 1) |
| 481 | + std = math.sqrt(var) |
| 482 | + return max(0.0, min(1.0, 1.0 - std / max_std)) |
| 483 | + |
| 484 | + if len(valid_metrics) >= 2: |
| 485 | + # --- Per-window returns --- |
| 486 | + per_window_returns = [ |
| 487 | + b.total_net_gain_percentage for b in valid_metrics |
| 488 | + if b.total_net_gain_percentage is not None |
| 489 | + ] |
| 490 | + return_consistency = _cv_consistency(per_window_returns) |
| 491 | + # max_std = 100: a std of 100% of initial capital → score 0 |
| 492 | + return_stability = _norm_stability(per_window_returns, 100.0) |
| 493 | + |
| 494 | + # --- Per-window win rates --- |
| 495 | + per_window_win_rates = [ |
| 496 | + b.win_rate for b in valid_metrics |
| 497 | + if b.win_rate is not None |
| 498 | + and b.number_of_trades_closed is not None |
| 499 | + and b.number_of_trades_closed > 0 |
| 500 | + ] |
| 501 | + return_consistency = _cv_consistency(per_window_returns) |
| 502 | + win_rate_consistency = _cv_consistency(per_window_win_rates) |
| 503 | + # max_std = 50: theoretical max std for a [0, 100] range |
| 504 | + win_rate_stability = _norm_stability(per_window_win_rates, 50.0) |
| 505 | + |
| 506 | + # --- Per-window Sharpe ratios --- |
| 507 | + per_window_sharpe = [ |
| 508 | + b.sharpe_ratio for b in valid_metrics |
| 509 | + if b.sharpe_ratio is not None |
| 510 | + and not math.isnan(b.sharpe_ratio) |
| 511 | + and not math.isinf(b.sharpe_ratio) |
| 512 | + ] |
| 513 | + sharpe_consistency = _cv_consistency(per_window_sharpe) |
| 514 | + # max_std = 2: Sharpe ratios typically range -2 to +4; |
| 515 | + # a std of 2 means wildly inconsistent |
| 516 | + sharpe_stability = _norm_stability(per_window_sharpe, 2.0) |
| 517 | + |
| 518 | + # --- Composite scores --- |
| 519 | + # Both use the same weighting scheme: |
| 520 | + # 35% returns, 25% win rate, 20% Sharpe, 20% profitable |
| 521 | + # window ratio. |
| 522 | + def _composite(ret_c, wr_c, sh_c): |
| 523 | + components = [] |
| 524 | + weights_c = [] |
| 525 | + if ret_c is not None: |
| 526 | + components.append(ret_c) |
| 527 | + weights_c.append(0.35) |
| 528 | + if wr_c is not None: |
| 529 | + components.append(wr_c) |
| 530 | + weights_c.append(0.25) |
| 531 | + if sh_c is not None: |
| 532 | + components.append(sh_c) |
| 533 | + weights_c.append(0.20) |
| 534 | + if number_of_windows and number_of_windows > 0: |
| 535 | + pw_ratio = ( |
| 536 | + number_of_profitable_windows / number_of_windows |
| 537 | + ) |
| 538 | + components.append(pw_ratio) |
| 539 | + weights_c.append(0.20) |
| 540 | + if not components: |
| 541 | + return None |
| 542 | + total_w = sum(weights_c) |
| 543 | + return sum( |
| 544 | + c * w for c, w in zip(components, weights_c) |
| 545 | + ) / total_w |
| 546 | + |
| 547 | + consistency_score = _composite( |
| 548 | + return_consistency, win_rate_consistency, sharpe_consistency |
| 549 | + ) |
| 550 | + stability_score = _composite( |
| 551 | + return_stability, win_rate_stability, sharpe_stability |
| 552 | + ) |
| 553 | + |
442 | 554 | return BacktestSummaryMetrics( |
443 | 555 | total_net_gain=total_net_gain, |
444 | 556 | total_net_gain_percentage=total_net_gain_percentage, |
@@ -487,4 +599,12 @@ def generate_backtest_summary_metrics( |
487 | 599 | average_loss_duration=average_loss_duration, |
488 | 600 | max_consecutive_wins=max_consecutive_wins, |
489 | 601 | max_consecutive_losses=max_consecutive_losses, |
| 602 | + return_consistency=return_consistency, |
| 603 | + win_rate_consistency=win_rate_consistency, |
| 604 | + sharpe_consistency=sharpe_consistency, |
| 605 | + consistency_score=consistency_score, |
| 606 | + return_stability=return_stability, |
| 607 | + win_rate_stability=win_rate_stability, |
| 608 | + sharpe_stability=sharpe_stability, |
| 609 | + stability_score=stability_score, |
490 | 610 | ) |
0 commit comments