theomgdev
diff --git a/‎examples/advanced/experiment_financial_oracle.py‎
Lines changed: 99 additions & 53 deletions b/‎examples/advanced/experiment_financial_oracle.py‎
Lines changed: 99 additions & 53 deletions
diff --git a/‎examples/advanced/img/convergence_financial_oracle_btc_overlay.png‎
-329 KB b/‎examples/advanced/img/convergence_financial_oracle_btc_overlay.png‎
-329 KB
diff --git a/‎examples/advanced/img/convergence_financial_oracle_summary.png‎
76.1 KB b/‎examples/advanced/img/convergence_financial_oracle_summary.png‎
76.1 KB
@@ -12,11 +12,11 @@
 (sub-expert pathways) for different market regimes.
 
 Architecture:
-  - 256 neurons, 0 hidden layers
-  - 5 input neurons (OHLCV), 10 output neurons (5 means + 5 log-variances)
-  - hebb_type='synapse' -- per-synapse plasticity (2*N^2 learnable params)
-    - 30 log-return lookback + 10 thinking steps = 40 total temporal steps
-  - Heteroscedastic Gaussian NLL: per-channel aleatoric uncertainty
+    - NUM_NEURONS neurons, 0 hidden layers
+    - INPUT_DIM input neurons (OHLCV), OUTPUT_DIM output neurons (means + log-variances)
+    - hebb_type='synapse' -- per-synapse plasticity (2*N^2 learnable params)
+    - WINDOW_SIZE return lookback + THINK_STEPS thinking steps per sample
+    - Heteroscedastic Gaussian NLL: per-channel aleatoric uncertainty
 
 Uncertainty Estimation (Kendall & Gal, NeurIPS 2017):
     The network predicts both mu (expected value) and log(sigma^2) (aleatoric
@@ -162,27 +162,34 @@
 # --- Model ---
 NUM_NEURONS    = 256
 INPUT_DIM      = 5       # O, H, L, C, V
-OUTPUT_DIM     = 10      # 5 means (mu) + 5 log-variances (log sigma^2)
-WINDOW_SIZE    = 12      # Input sequence length (12 return steps)
-RAW_WINDOW_SIZE = WINDOW_SIZE + 2  # 12 inputs + 1 target returns need 14 raw candles
+OUTPUT_DIM     = 10      # Means + log-variances for each OHLCV channel
+WINDOW_SIZE    = 12      # Input sequence length in return space
+RAW_WINDOW_SIZE = WINDOW_SIZE + 2  # Need previous + WINDOW_SIZE inputs + next target candle
 THINK_STEPS    = 10      # Extra thinking after last candles
-TOTAL_STEPS    = WINDOW_SIZE + THINK_STEPS  # 22
+TOTAL_STEPS    = WINDOW_SIZE + THINK_STEPS
 
 # --- Training ---
 EPOCHS         = 100
-BATCH_SIZE     = 256
-EVAL_BATCH     = 256     # Larger batch for eval (no gradients)
-LR             = 5e-5
+BATCH_SIZE     = 32
+EVAL_BATCH     = 1024    # Larger batch for eval (no gradients)
+LR             = 1e-4
 LR_MIN         = 1e-6    # Cosine annealing floor
 VAL_EVERY      = 3       # Validate every N epochs (validation is expensive)
 PATIENCE       = 20      # Early stopping patience (in validation checks)
-CONF_THRESH    = 0.55    # Trade only when P(up) is outside [0.45, 0.55]
+CONF_THRESH    = 0.55    # Trade only when directional probability is far enough from 0.5
 MAX_WINDOWS_PER_SOURCE = {
     'train': 6000,
     'val': 1200,
     'test': 2000,
 }
-MAX_ABS_NORMALIZED = 20.0  # Filter pathological spikes after return transform
+
+# Feature scaling multiplier applied to return channels before float32 training.
+# Higher values increase numerical visibility of tiny moves.
+RETURN_SCALE = 100.0
+# Base outlier threshold in unscaled return space.
+# Threshold in feature space is this base multiplied by RETURN_SCALE.
+MAX_ABS_NORMALIZED_BASE = 20.0
+MAX_ABS_NORMALIZED = MAX_ABS_NORMALIZED_BASE * RETURN_SCALE
 
 
 # ============================================================================
@@ -247,8 +254,9 @@ def normalize_window(window: np.ndarray) -> np.ndarray | None:
 
     Input shape:  (RAW_WINDOW_SIZE, 5) where RAW_WINDOW_SIZE = WINDOW_SIZE + 2.
     Output shape: (WINDOW_SIZE + 1, 5):
-      - OHLC channels: log returns ln(P_t / P_{t-1})
-      - Volume channel: log-volume differences ln(1+V_t) - ln(1+V_{t-1})
+            - OHLC channels: scaled log returns (ln(P_t / P_{t-1}) * RETURN_SCALE)
+            - Volume channel: scaled log-volume differences
+                ((ln(1+V_t) - ln(1+V_{t-1})) * RETURN_SCALE)
 
     Returns float32 features or None if invalid.
     """
@@ -258,13 +266,13 @@ def normalize_window(window: np.ndarray) -> np.ndarray | None:
     prices = window[:, :4]
     if (prices <= 0).any() or not np.isfinite(prices).all():
         return None
-    price_returns = np.log(prices[1:] / prices[:-1])
+    price_returns = np.log(prices[1:] / prices[:-1]) * RETURN_SCALE
 
     vol = window[:, 4]
     if (vol < 0).any() or not np.isfinite(vol).all():
         return None
     vol_log = np.log1p(vol)
-    vol_returns = (vol_log[1:] - vol_log[:-1]).reshape(-1, 1)
+    vol_returns = ((vol_log[1:] - vol_log[:-1]) * RETURN_SCALE).reshape(-1, 1)
 
     out = np.hstack([price_returns, vol_returns]).astype(np.float32)
     if not np.isfinite(out).all():
@@ -308,15 +316,23 @@ def build_dataset():
     master_cache_file = CACHE_DIR / "master_dataset.npz"
     today = datetime.now().strftime('%Y-%m-%d')
 
+    # Keep scale=1 payload identical to legacy so old caches/fingerprints match.
+    if float(RETURN_SCALE) == 1.0:
+        feature_mode = 'log_returns_v1'
+    else:
+        feature_mode = 'scaled_log_returns_v2'
+
     config_payload = {
         'instruments': INSTRUMENTS,
         'timeframes': TIMEFRAMES,
         'window_size': WINDOW_SIZE,
         'raw_window_size': RAW_WINDOW_SIZE,
-        'feature_mode': 'log_returns_v1',
+        'feature_mode': feature_mode,
         'max_windows_per_source': MAX_WINDOWS_PER_SOURCE,
         'max_abs_normalized': MAX_ABS_NORMALIZED,
     }
+    if float(RETURN_SCALE) != 1.0:
+        config_payload['return_scale'] = RETURN_SCALE
     config_fingerprint = hashlib.sha256(
         json.dumps(config_payload, sort_keys=True).encode('utf-8')
     ).hexdigest()
@@ -482,8 +498,8 @@ class HeteroscedasticNLL(nn.Module):
       Kendall & Gal, "What Uncertainties Do We Need in Bayesian Deep
       Learning for Computer Vision?", NeurIPS 2017.
     """
-    LOG_VAR_MIN = -10.0   # sigma_min ~= 0.007
-    LOG_VAR_MAX =  10.0   # sigma_max ~= 148.4
+    LOG_VAR_MIN = -10.0   # sigma_scaled_min ~= 0.007
+    LOG_VAR_MAX =  10.0   # sigma_scaled_max ~= 148.4
 
     def forward(self, predicted, target):
         # predicted: (B, 10)  ->  5 mu (OHLCV) + 5 log(sigma^2)
@@ -502,14 +518,19 @@ def gaussian_cdf(x):
     return 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))
 
 
+def to_raw_return(x):
+    """Convert scaled return features back to raw log-return units."""
+    return x / RETURN_SCALE
+
+
 # ============================================================================
 #  MODEL
 # ============================================================================
 
 def build_model():
-    """Build the Financial Oracle: 256 neurons, synaptic Hebbian, resonant init."""
-    input_ids  = list(range(INPUT_DIM))                              # [0..4]
-    output_ids = list(range(NUM_NEURONS - OUTPUT_DIM, NUM_NEURONS))  # [246..255]
+    """Build the Financial Oracle with config-driven neuron counts and IO mapping."""
+    input_ids  = list(range(INPUT_DIM))
+    output_ids = list(range(NUM_NEURONS - OUTPUT_DIM, NUM_NEURONS))
 
     model = OdyssNet(
         num_neurons=NUM_NEURONS,
@@ -558,8 +579,8 @@ def evaluate(trainer, data_x, data_y):
 
         Returns (avg_mse, avg_sigma_close, direction_accuracy,
                  confident_direction_accuracy, coverage).
-            - avg_mse:          Mean squared error on mu predictions
-            - avg_sigma_close:  Mean predicted sigma for Close channel
+            - avg_mse:          Mean squared error on mu predictions (raw log-return)
+            - avg_sigma_close:  Mean predicted sigma for Close channel (raw log-return)
             - direction_accuracy: % of correct next-candle Close return sign
             - confident_direction_accuracy: accuracy on confident predictions only
             - coverage: % of samples that triggered a confident signal
@@ -581,12 +602,18 @@ def evaluate(trainer, data_x, data_y):
             pred = trainer.predict(bx, thinking_steps=TOTAL_STEPS, full_sequence=False)
             by_dev = by.to(pred.device)
             mu      = pred[:, :5]
-            log_var = torch.clamp(pred[:, 5:], -10.0, 10.0)
+            log_var = torch.clamp(
+                pred[:, 5:], HeteroscedasticNLL.LOG_VAR_MIN, HeteroscedasticNLL.LOG_VAR_MAX
+            )
             sigma   = torch.exp(0.5 * log_var)      # (B, 5)
 
-            mse = ((mu - by_dev) ** 2).mean().item()
+            mu_raw = to_raw_return(mu)
+            by_raw = to_raw_return(by_dev)
+            sigma_raw = to_raw_return(sigma)
+
+            mse = ((mu_raw - by_raw) ** 2).mean().item()
             total_mse += mse
-            total_sigma_close += sigma[:, 3].mean().item()
+            total_sigma_close += sigma_raw[:, 3].mean().item()
 
             # Probabilistic next-candle direction from return distribution:
             # P(close_{t+1} > close_t) = P(r_close > 0) = Phi(mu_r / sigma_r)
@@ -716,28 +743,34 @@ def plot_results(history, trainer, test_x, test_y, test_metrics, lr_matrix):
         n_sc = min(800, len(test_x))
         idx = torch.randperm(len(test_x))[:n_sc]
         pred = trainer.predict(test_x[idx], thinking_steps=TOTAL_STEPS, full_sequence=False)
-        mu_close = pred[:, 3].cpu().numpy()
-        t_close  = test_y[idx, 3].cpu().numpy()
-        log_var_close = torch.clamp(pred[:, 8], -10.0, 10.0)
-        sigma_close = torch.exp(0.5 * log_var_close).cpu().numpy()
+        mu_close_scaled = pred[:, 3].cpu().numpy()
+        t_close_scaled  = test_y[idx, 3].cpu().numpy()
+        log_var_close = torch.clamp(
+            pred[:, 8], HeteroscedasticNLL.LOG_VAR_MIN, HeteroscedasticNLL.LOG_VAR_MAX
+        )
+        sigma_close_scaled = torch.exp(0.5 * log_var_close).cpu().numpy()
+
+        mu_close = to_raw_return(mu_close_scaled)
+        t_close  = to_raw_return(t_close_scaled)
+        sigma_close = to_raw_return(sigma_close_scaled)
 
     sc = ax.scatter(t_close, mu_close, c=sigma_close, cmap='RdYlGn_r',
                     alpha=0.4, s=8, vmin=np.percentile(sigma_close, 5),
                     vmax=np.percentile(sigma_close, 95))
     lims = [min(t_close.min(), mu_close.min()), max(t_close.max(), mu_close.max())]
     ax.plot(lims, lims, 'k--', alpha=0.3, linewidth=0.8)
-    ax.set_xlabel('True Close Return (log)')
+    ax.set_xlabel('True Close Return (raw log)')
     ax.set_ylabel('Predicted mu_close_return')
     ax.set_title(f'Close Return Prediction (Dir Acc {test_dir_acc:.1f}%)')
-    plt.colorbar(sc, ax=ax, label='sigma (uncertainty)', shrink=0.8)
+    plt.colorbar(sc, ax=ax, label='sigma (raw log-return)', shrink=0.8)
     ax.grid(True, alpha=0.3)
 
     # 3. Sigma Distribution (predicted uncertainty)
     ax = fig.add_subplot(gs[0, 2])
     ax.hist(sigma_close, bins=50, color='steelblue', alpha=0.7, edgecolor='white', linewidth=0.5)
     ax.axvline(sigma_close.mean(), color='red', linestyle='--', linewidth=1,
                label=f'Mean sigma: {sigma_close.mean():.4f}')
-    ax.set_xlabel('Predicted sigma (Close)')
+    ax.set_xlabel('Predicted sigma (Close, raw log)')
     ax.set_ylabel('Count')
     ax.set_title('Aleatoric Uncertainty Distribution')
     ax.legend(fontsize=8)
@@ -818,8 +851,9 @@ def plot_btc_overlay(trainer, n_candles=1000):
     prediction segment's opacity is proportional to the model's
     confidence (1/sigma -- low sigma = high confidence = solid line).
 
-    The model predicts next-candle close log-returns. Predictions are
-    mapped back to absolute price with: close_hat = close_t * exp(r_hat).
+    The model predicts next-candle close returns in scaled space.
+    Predictions are mapped back to absolute price using raw log-returns:
+    close_hat = close_t * exp(r_hat / RETURN_SCALE).
     """
     if not HAS_MPL:
         print("  WARNING: matplotlib not available, skipping BTC overlay.")
@@ -883,9 +917,14 @@ def plot_btc_overlay(trainer, n_candles=1000):
             )
             pred = trainer.predict(batch, thinking_steps=TOTAL_STEPS, full_sequence=False)
 
-            mu_c      = pred[:, 3].cpu().numpy()
-            log_var_c = np.clip(pred[:, 8].cpu().numpy(), -10.0, 10.0)
-            sigma_c   = np.exp(0.5 * log_var_c)
+            mu_c_scaled = pred[:, 3].cpu().numpy()
+            log_var_c = np.clip(
+                pred[:, 8].cpu().numpy(), HeteroscedasticNLL.LOG_VAR_MIN, HeteroscedasticNLL.LOG_VAR_MAX
+            )
+            sigma_c_scaled = np.exp(0.5 * log_var_c)
+
+            mu_c = to_raw_return(mu_c_scaled)
+            sigma_c = to_raw_return(sigma_c_scaled)
 
             prevs = prev_close_refs[start:end]
             pred_return[start:end] = mu_c
@@ -971,7 +1010,7 @@ def plot_btc_overlay(trainer, n_candles=1000):
     print(f"  MAE:              ${mae:,.2f}")
     print(f"  MAPE:             {mape:.2f}%")
     print(f"  Direction Acc:    {dir_acc:.1f}%")
-    print(f"  Mean sigma:       {pred_sigma.mean():.4f} (close log-return)")
+    print(f"  Mean sigma:       {pred_sigma.mean():.4f} (close raw log-return)")
     print(f"  Confidence range: [{alpha_values.min():.2f}, {alpha_values.max():.2f}]")
 
 
@@ -1020,6 +1059,7 @@ def main():
     print(f"  Total Parameters: {n_params:,}")
     print(f"  Hebbian Params:   {n_hebb:,}  (per-synapse plasticity)")
     print(f"  Window:           {WINDOW_SIZE} return steps + {THINK_STEPS} thinking = {TOTAL_STEPS} steps")
+    print(f"  Return Scale:     x{RETURN_SCALE:,.0f} (feature multiplier)")
     print(f"  Input Mode:       Stream (continuous sequential injection)")
 
     # ----------------------------------------------------------------
@@ -1083,7 +1123,7 @@ def main():
             h, m, s = int(elapsed // 3600), int((elapsed % 3600) // 60), int(elapsed % 60)
             val_str = (
                 f"Val MSE {val_mse:.6f} | Dir {val_dir_acc:5.1f}% | "
-                f"Conf {val_conf_acc:5.1f}% | Cov {val_coverage:5.1f}% | sigma {val_sigma:.4f}"
+                f"Conf {val_conf_acc:5.1f}% | Cov {val_coverage:5.1f}% | sigma(raw) {val_sigma:.4f}"
                 if do_val else "  (skip val)  "
             )
             print(
@@ -1117,27 +1157,33 @@ def main():
     print(f"  Direction Accuracy: {test_dir_acc:.1f}%")
     print(f"  Confident Acc:      {test_conf_acc:.1f}%")
     print(f"  Coverage:           {test_coverage:.1f}%")
-    print(f"  Avg sigma (Close):  {test_sigma:.4f}")
+    print(f"  Avg sigma (Close):  {test_sigma:.4f} (raw log-return)")
     print(f"  Best Val Epoch:     {best_epoch}")
 
     # Sample predictions with full Gaussian uncertainty
     print(f"\n  Sample Predictions (Test Set):")
-    print(f"  {'#':>3s}  {'last_r':>10s}  {'mu_r':>10s}  {'true_r':>10s}  {'|err|':>8s}  {'sigma':>8s}  {'P(up)':>7s}  {'Dir':>3s}")
-    print(f"  {'_'*74}")
+    print(f"  {'#':>3s}  {'last_r':>11s}  {'mu_r':>11s}  {'true_r':>11s}  {'|err|':>10s}  {'sigma':>10s}  {'P(up)':>7s}  {'Dir':>3s}")
+    print(f"  {'_'*86}")
 
     with torch.no_grad():
         n_show = min(20, len(test_x))
         pred = trainer.predict(test_x[:n_show], thinking_steps=TOTAL_STEPS, full_sequence=False)
         for i in range(n_show):
-            mu_c     = pred[i, 3].item()
-            log_var_c = max(-10.0, min(10.0, pred[i, 8].item()))
-            sigma_c  = math.exp(0.5 * log_var_c)
-            last_r   = test_x[i, -1, 3].item()
-            t_val    = test_y[i, 3].item()
+            mu_c_scaled = pred[i, 3].item()
+            log_var_c = max(
+                HeteroscedasticNLL.LOG_VAR_MIN,
+                min(HeteroscedasticNLL.LOG_VAR_MAX, pred[i, 8].item()),
+            )
+            sigma_c_scaled = math.exp(0.5 * log_var_c)
+
+            mu_c = to_raw_return(mu_c_scaled)
+            sigma_c = to_raw_return(sigma_c_scaled)
+            last_r = to_raw_return(test_x[i, -1, 3].item())
+            t_val = to_raw_return(test_y[i, 3].item())
             err      = abs(mu_c - t_val)
-            p_up_val = 0.5 * (1.0 + math.erf(mu_c / (sigma_c * math.sqrt(2.0) + 1e-8)))
+            p_up_val = 0.5 * (1.0 + math.erf(mu_c_scaled / (sigma_c_scaled * math.sqrt(2.0) + 1e-8)))
             d        = "OK" if (mu_c > 0.0) == (t_val > 0.0) else "--"
-            print(f"  {i+1:3d}  {last_r:+10.6f}  {mu_c:+10.6f}  {t_val:+10.6f}  {err:8.6f}  {sigma_c:8.4f}  {p_up_val:6.1%}   {d}")
+            print(f"  {i+1:3d}  {last_r:+11.3e}  {mu_c:+11.3e}  {t_val:+11.3e}  {err:10.3e}  {sigma_c:10.3e}  {p_up_val:6.1%}   {d}")
 
     # ----------------------------------------------------------------
     #  WORKBENCH ANALYSIS