1212(sub-expert pathways) for different market regimes.
1313
1414Architecture:
15- - 256 neurons, 0 hidden layers
16- - 5 input neurons (OHLCV), 10 output neurons (5 means + 5 log-variances)
17- - hebb_type='synapse' -- per-synapse plasticity (2*N^2 learnable params)
18- - 30 log- return lookback + 10 thinking steps = 40 total temporal steps
19- - Heteroscedastic Gaussian NLL: per-channel aleatoric uncertainty
15+ - NUM_NEURONS neurons, 0 hidden layers
16+ - INPUT_DIM input neurons (OHLCV), OUTPUT_DIM output neurons (means + log-variances)
17+ - hebb_type='synapse' -- per-synapse plasticity (2*N^2 learnable params)
18+ - WINDOW_SIZE return lookback + THINK_STEPS thinking steps per sample
19+ - Heteroscedastic Gaussian NLL: per-channel aleatoric uncertainty
2020
2121Uncertainty Estimation (Kendall & Gal, NeurIPS 2017):
2222 The network predicts both mu (expected value) and log(sigma^2) (aleatoric
162162# --- Model ---
163163NUM_NEURONS = 256
164164INPUT_DIM = 5 # O, H, L, C, V
165- OUTPUT_DIM = 10 # 5 means (mu) + 5 log-variances (log sigma^2)
166- WINDOW_SIZE = 12 # Input sequence length (12 return steps)
167- RAW_WINDOW_SIZE = WINDOW_SIZE + 2 # 12 inputs + 1 target returns need 14 raw candles
165+ OUTPUT_DIM = 10 # Means + log-variances for each OHLCV channel
166+ WINDOW_SIZE = 12 # Input sequence length in return space
167+ RAW_WINDOW_SIZE = WINDOW_SIZE + 2 # Need previous + WINDOW_SIZE inputs + next target candle
168168THINK_STEPS = 10 # Extra thinking after last candles
169- TOTAL_STEPS = WINDOW_SIZE + THINK_STEPS # 22
169+ TOTAL_STEPS = WINDOW_SIZE + THINK_STEPS
170170
171171# --- Training ---
172172EPOCHS = 100
173- BATCH_SIZE = 256
174- EVAL_BATCH = 256 # Larger batch for eval (no gradients)
175- LR = 5e-5
173+ BATCH_SIZE = 32
174+ EVAL_BATCH = 1024 # Larger batch for eval (no gradients)
175+ LR = 1e-4
176176LR_MIN = 1e-6 # Cosine annealing floor
177177VAL_EVERY = 3 # Validate every N epochs (validation is expensive)
178178PATIENCE = 20 # Early stopping patience (in validation checks)
179- CONF_THRESH = 0.55 # Trade only when P(up) is outside [0.45, 0.55]
179+ CONF_THRESH = 0.55 # Trade only when directional probability is far enough from 0.5
180180MAX_WINDOWS_PER_SOURCE = {
181181 'train' : 6000 ,
182182 'val' : 1200 ,
183183 'test' : 2000 ,
184184}
185- MAX_ABS_NORMALIZED = 20.0 # Filter pathological spikes after return transform
185+
186+ # Feature scaling multiplier applied to return channels before float32 training.
187+ # Higher values increase numerical visibility of tiny moves.
188+ RETURN_SCALE = 100.0
189+ # Base outlier threshold in unscaled return space.
190+ # Threshold in feature space is this base multiplied by RETURN_SCALE.
191+ MAX_ABS_NORMALIZED_BASE = 20.0
192+ MAX_ABS_NORMALIZED = MAX_ABS_NORMALIZED_BASE * RETURN_SCALE
186193
187194
188195# ============================================================================
@@ -247,8 +254,9 @@ def normalize_window(window: np.ndarray) -> np.ndarray | None:
247254
248255 Input shape: (RAW_WINDOW_SIZE, 5) where RAW_WINDOW_SIZE = WINDOW_SIZE + 2.
249256 Output shape: (WINDOW_SIZE + 1, 5):
250- - OHLC channels: log returns ln(P_t / P_{t-1})
251- - Volume channel: log-volume differences ln(1+V_t) - ln(1+V_{t-1})
257+ - OHLC channels: scaled log returns (ln(P_t / P_{t-1}) * RETURN_SCALE)
258+ - Volume channel: scaled log-volume differences
259+ ((ln(1+V_t) - ln(1+V_{t-1})) * RETURN_SCALE)
252260
253261 Returns float32 features or None if invalid.
254262 """
@@ -258,13 +266,13 @@ def normalize_window(window: np.ndarray) -> np.ndarray | None:
258266 prices = window [:, :4 ]
259267 if (prices <= 0 ).any () or not np .isfinite (prices ).all ():
260268 return None
261- price_returns = np .log (prices [1 :] / prices [:- 1 ])
269+ price_returns = np .log (prices [1 :] / prices [:- 1 ]) * RETURN_SCALE
262270
263271 vol = window [:, 4 ]
264272 if (vol < 0 ).any () or not np .isfinite (vol ).all ():
265273 return None
266274 vol_log = np .log1p (vol )
267- vol_returns = (vol_log [1 :] - vol_log [:- 1 ]).reshape (- 1 , 1 )
275+ vol_returns = (( vol_log [1 :] - vol_log [:- 1 ]) * RETURN_SCALE ).reshape (- 1 , 1 )
268276
269277 out = np .hstack ([price_returns , vol_returns ]).astype (np .float32 )
270278 if not np .isfinite (out ).all ():
@@ -308,15 +316,23 @@ def build_dataset():
308316 master_cache_file = CACHE_DIR / "master_dataset.npz"
309317 today = datetime .now ().strftime ('%Y-%m-%d' )
310318
319+ # Keep scale=1 payload identical to legacy so old caches/fingerprints match.
320+ if float (RETURN_SCALE ) == 1.0 :
321+ feature_mode = 'log_returns_v1'
322+ else :
323+ feature_mode = 'scaled_log_returns_v2'
324+
311325 config_payload = {
312326 'instruments' : INSTRUMENTS ,
313327 'timeframes' : TIMEFRAMES ,
314328 'window_size' : WINDOW_SIZE ,
315329 'raw_window_size' : RAW_WINDOW_SIZE ,
316- 'feature_mode' : 'log_returns_v1' ,
330+ 'feature_mode' : feature_mode ,
317331 'max_windows_per_source' : MAX_WINDOWS_PER_SOURCE ,
318332 'max_abs_normalized' : MAX_ABS_NORMALIZED ,
319333 }
334+ if float (RETURN_SCALE ) != 1.0 :
335+ config_payload ['return_scale' ] = RETURN_SCALE
320336 config_fingerprint = hashlib .sha256 (
321337 json .dumps (config_payload , sort_keys = True ).encode ('utf-8' )
322338 ).hexdigest ()
@@ -482,8 +498,8 @@ class HeteroscedasticNLL(nn.Module):
482498 Kendall & Gal, "What Uncertainties Do We Need in Bayesian Deep
483499 Learning for Computer Vision?", NeurIPS 2017.
484500 """
485- LOG_VAR_MIN = - 10.0 # sigma_min ~= 0.007
486- LOG_VAR_MAX = 10.0 # sigma_max ~= 148.4
501+ LOG_VAR_MIN = - 10.0 # sigma_scaled_min ~= 0.007
502+ LOG_VAR_MAX = 10.0 # sigma_scaled_max ~= 148.4
487503
488504 def forward (self , predicted , target ):
489505 # predicted: (B, 10) -> 5 mu (OHLCV) + 5 log(sigma^2)
@@ -502,14 +518,19 @@ def gaussian_cdf(x):
502518 return 0.5 * (1.0 + torch .erf (x / math .sqrt (2.0 )))
503519
504520
521+ def to_raw_return (x ):
522+ """Convert scaled return features back to raw log-return units."""
523+ return x / RETURN_SCALE
524+
525+
505526# ============================================================================
506527# MODEL
507528# ============================================================================
508529
509530def build_model ():
510- """Build the Financial Oracle: 256 neurons, synaptic Hebbian, resonant init ."""
511- input_ids = list (range (INPUT_DIM )) # [0..4]
512- output_ids = list (range (NUM_NEURONS - OUTPUT_DIM , NUM_NEURONS )) # [246..255]
531+ """Build the Financial Oracle with config-driven neuron counts and IO mapping ."""
532+ input_ids = list (range (INPUT_DIM ))
533+ output_ids = list (range (NUM_NEURONS - OUTPUT_DIM , NUM_NEURONS ))
513534
514535 model = OdyssNet (
515536 num_neurons = NUM_NEURONS ,
@@ -558,8 +579,8 @@ def evaluate(trainer, data_x, data_y):
558579
559580 Returns (avg_mse, avg_sigma_close, direction_accuracy,
560581 confident_direction_accuracy, coverage).
561- - avg_mse: Mean squared error on mu predictions
562- - avg_sigma_close: Mean predicted sigma for Close channel
582+ - avg_mse: Mean squared error on mu predictions (raw log-return)
583+ - avg_sigma_close: Mean predicted sigma for Close channel (raw log-return)
563584 - direction_accuracy: % of correct next-candle Close return sign
564585 - confident_direction_accuracy: accuracy on confident predictions only
565586 - coverage: % of samples that triggered a confident signal
@@ -581,12 +602,18 @@ def evaluate(trainer, data_x, data_y):
581602 pred = trainer .predict (bx , thinking_steps = TOTAL_STEPS , full_sequence = False )
582603 by_dev = by .to (pred .device )
583604 mu = pred [:, :5 ]
584- log_var = torch .clamp (pred [:, 5 :], - 10.0 , 10.0 )
605+ log_var = torch .clamp (
606+ pred [:, 5 :], HeteroscedasticNLL .LOG_VAR_MIN , HeteroscedasticNLL .LOG_VAR_MAX
607+ )
585608 sigma = torch .exp (0.5 * log_var ) # (B, 5)
586609
587- mse = ((mu - by_dev ) ** 2 ).mean ().item ()
610+ mu_raw = to_raw_return (mu )
611+ by_raw = to_raw_return (by_dev )
612+ sigma_raw = to_raw_return (sigma )
613+
614+ mse = ((mu_raw - by_raw ) ** 2 ).mean ().item ()
588615 total_mse += mse
589- total_sigma_close += sigma [:, 3 ].mean ().item ()
616+ total_sigma_close += sigma_raw [:, 3 ].mean ().item ()
590617
591618 # Probabilistic next-candle direction from return distribution:
592619 # P(close_{t+1} > close_t) = P(r_close > 0) = Phi(mu_r / sigma_r)
@@ -716,28 +743,34 @@ def plot_results(history, trainer, test_x, test_y, test_metrics, lr_matrix):
716743 n_sc = min (800 , len (test_x ))
717744 idx = torch .randperm (len (test_x ))[:n_sc ]
718745 pred = trainer .predict (test_x [idx ], thinking_steps = TOTAL_STEPS , full_sequence = False )
719- mu_close = pred [:, 3 ].cpu ().numpy ()
720- t_close = test_y [idx , 3 ].cpu ().numpy ()
721- log_var_close = torch .clamp (pred [:, 8 ], - 10.0 , 10.0 )
722- sigma_close = torch .exp (0.5 * log_var_close ).cpu ().numpy ()
746+ mu_close_scaled = pred [:, 3 ].cpu ().numpy ()
747+ t_close_scaled = test_y [idx , 3 ].cpu ().numpy ()
748+ log_var_close = torch .clamp (
749+ pred [:, 8 ], HeteroscedasticNLL .LOG_VAR_MIN , HeteroscedasticNLL .LOG_VAR_MAX
750+ )
751+ sigma_close_scaled = torch .exp (0.5 * log_var_close ).cpu ().numpy ()
752+
753+ mu_close = to_raw_return (mu_close_scaled )
754+ t_close = to_raw_return (t_close_scaled )
755+ sigma_close = to_raw_return (sigma_close_scaled )
723756
724757 sc = ax .scatter (t_close , mu_close , c = sigma_close , cmap = 'RdYlGn_r' ,
725758 alpha = 0.4 , s = 8 , vmin = np .percentile (sigma_close , 5 ),
726759 vmax = np .percentile (sigma_close , 95 ))
727760 lims = [min (t_close .min (), mu_close .min ()), max (t_close .max (), mu_close .max ())]
728761 ax .plot (lims , lims , 'k--' , alpha = 0.3 , linewidth = 0.8 )
729- ax .set_xlabel ('True Close Return (log)' )
762+ ax .set_xlabel ('True Close Return (raw log)' )
730763 ax .set_ylabel ('Predicted mu_close_return' )
731764 ax .set_title (f'Close Return Prediction (Dir Acc { test_dir_acc :.1f} %)' )
732- plt .colorbar (sc , ax = ax , label = 'sigma (uncertainty )' , shrink = 0.8 )
765+ plt .colorbar (sc , ax = ax , label = 'sigma (raw log-return )' , shrink = 0.8 )
733766 ax .grid (True , alpha = 0.3 )
734767
735768 # 3. Sigma Distribution (predicted uncertainty)
736769 ax = fig .add_subplot (gs [0 , 2 ])
737770 ax .hist (sigma_close , bins = 50 , color = 'steelblue' , alpha = 0.7 , edgecolor = 'white' , linewidth = 0.5 )
738771 ax .axvline (sigma_close .mean (), color = 'red' , linestyle = '--' , linewidth = 1 ,
739772 label = f'Mean sigma: { sigma_close .mean ():.4f} ' )
740- ax .set_xlabel ('Predicted sigma (Close)' )
773+ ax .set_xlabel ('Predicted sigma (Close, raw log )' )
741774 ax .set_ylabel ('Count' )
742775 ax .set_title ('Aleatoric Uncertainty Distribution' )
743776 ax .legend (fontsize = 8 )
@@ -818,8 +851,9 @@ def plot_btc_overlay(trainer, n_candles=1000):
818851 prediction segment's opacity is proportional to the model's
819852 confidence (1/sigma -- low sigma = high confidence = solid line).
820853
821- The model predicts next-candle close log-returns. Predictions are
822- mapped back to absolute price with: close_hat = close_t * exp(r_hat).
854+ The model predicts next-candle close returns in scaled space.
855+ Predictions are mapped back to absolute price using raw log-returns:
856+ close_hat = close_t * exp(r_hat / RETURN_SCALE).
823857 """
824858 if not HAS_MPL :
825859 print (" WARNING: matplotlib not available, skipping BTC overlay." )
@@ -883,9 +917,14 @@ def plot_btc_overlay(trainer, n_candles=1000):
883917 )
884918 pred = trainer .predict (batch , thinking_steps = TOTAL_STEPS , full_sequence = False )
885919
886- mu_c = pred [:, 3 ].cpu ().numpy ()
887- log_var_c = np .clip (pred [:, 8 ].cpu ().numpy (), - 10.0 , 10.0 )
888- sigma_c = np .exp (0.5 * log_var_c )
920+ mu_c_scaled = pred [:, 3 ].cpu ().numpy ()
921+ log_var_c = np .clip (
922+ pred [:, 8 ].cpu ().numpy (), HeteroscedasticNLL .LOG_VAR_MIN , HeteroscedasticNLL .LOG_VAR_MAX
923+ )
924+ sigma_c_scaled = np .exp (0.5 * log_var_c )
925+
926+ mu_c = to_raw_return (mu_c_scaled )
927+ sigma_c = to_raw_return (sigma_c_scaled )
889928
890929 prevs = prev_close_refs [start :end ]
891930 pred_return [start :end ] = mu_c
@@ -971,7 +1010,7 @@ def plot_btc_overlay(trainer, n_candles=1000):
9711010 print (f" MAE: ${ mae :,.2f} " )
9721011 print (f" MAPE: { mape :.2f} %" )
9731012 print (f" Direction Acc: { dir_acc :.1f} %" )
974- print (f" Mean sigma: { pred_sigma .mean ():.4f} (close log-return)" )
1013+ print (f" Mean sigma: { pred_sigma .mean ():.4f} (close raw log-return)" )
9751014 print (f" Confidence range: [{ alpha_values .min ():.2f} , { alpha_values .max ():.2f} ]" )
9761015
9771016
@@ -1020,6 +1059,7 @@ def main():
10201059 print (f" Total Parameters: { n_params :,} " )
10211060 print (f" Hebbian Params: { n_hebb :,} (per-synapse plasticity)" )
10221061 print (f" Window: { WINDOW_SIZE } return steps + { THINK_STEPS } thinking = { TOTAL_STEPS } steps" )
1062+ print (f" Return Scale: x{ RETURN_SCALE :,.0f} (feature multiplier)" )
10231063 print (f" Input Mode: Stream (continuous sequential injection)" )
10241064
10251065 # ----------------------------------------------------------------
@@ -1083,7 +1123,7 @@ def main():
10831123 h , m , s = int (elapsed // 3600 ), int ((elapsed % 3600 ) // 60 ), int (elapsed % 60 )
10841124 val_str = (
10851125 f"Val MSE { val_mse :.6f} | Dir { val_dir_acc :5.1f} % | "
1086- f"Conf { val_conf_acc :5.1f} % | Cov { val_coverage :5.1f} % | sigma { val_sigma :.4f} "
1126+ f"Conf { val_conf_acc :5.1f} % | Cov { val_coverage :5.1f} % | sigma(raw) { val_sigma :.4f} "
10871127 if do_val else " (skip val) "
10881128 )
10891129 print (
@@ -1117,27 +1157,33 @@ def main():
11171157 print (f" Direction Accuracy: { test_dir_acc :.1f} %" )
11181158 print (f" Confident Acc: { test_conf_acc :.1f} %" )
11191159 print (f" Coverage: { test_coverage :.1f} %" )
1120- print (f" Avg sigma (Close): { test_sigma :.4f} " )
1160+ print (f" Avg sigma (Close): { test_sigma :.4f} (raw log-return) " )
11211161 print (f" Best Val Epoch: { best_epoch } " )
11221162
11231163 # Sample predictions with full Gaussian uncertainty
11241164 print (f"\n Sample Predictions (Test Set):" )
1125- print (f" { '#' :>3s} { 'last_r' :>10s } { 'mu_r' :>10s } { 'true_r' :>10s } { '|err|' :>8s } { 'sigma' :>8s } { 'P(up)' :>7s} { 'Dir' :>3s} " )
1126- print (f" { '_' * 74 } " )
1165+ print (f" { '#' :>3s} { 'last_r' :>11s } { 'mu_r' :>11s } { 'true_r' :>11s } { '|err|' :>10s } { 'sigma' :>10s } { 'P(up)' :>7s} { 'Dir' :>3s} " )
1166+ print (f" { '_' * 86 } " )
11271167
11281168 with torch .no_grad ():
11291169 n_show = min (20 , len (test_x ))
11301170 pred = trainer .predict (test_x [:n_show ], thinking_steps = TOTAL_STEPS , full_sequence = False )
11311171 for i in range (n_show ):
1132- mu_c = pred [i , 3 ].item ()
1133- log_var_c = max (- 10.0 , min (10.0 , pred [i , 8 ].item ()))
1134- sigma_c = math .exp (0.5 * log_var_c )
1135- last_r = test_x [i , - 1 , 3 ].item ()
1136- t_val = test_y [i , 3 ].item ()
1172+ mu_c_scaled = pred [i , 3 ].item ()
1173+ log_var_c = max (
1174+ HeteroscedasticNLL .LOG_VAR_MIN ,
1175+ min (HeteroscedasticNLL .LOG_VAR_MAX , pred [i , 8 ].item ()),
1176+ )
1177+ sigma_c_scaled = math .exp (0.5 * log_var_c )
1178+
1179+ mu_c = to_raw_return (mu_c_scaled )
1180+ sigma_c = to_raw_return (sigma_c_scaled )
1181+ last_r = to_raw_return (test_x [i , - 1 , 3 ].item ())
1182+ t_val = to_raw_return (test_y [i , 3 ].item ())
11371183 err = abs (mu_c - t_val )
1138- p_up_val = 0.5 * (1.0 + math .erf (mu_c / (sigma_c * math .sqrt (2.0 ) + 1e-8 )))
1184+ p_up_val = 0.5 * (1.0 + math .erf (mu_c_scaled / (sigma_c_scaled * math .sqrt (2.0 ) + 1e-8 )))
11391185 d = "OK" if (mu_c > 0.0 ) == (t_val > 0.0 ) else "--"
1140- print (f" { i + 1 :3d} { last_r :+10.6f } { mu_c :+10.6f } { t_val :+10.6f } { err :8.6f } { sigma_c :8.4f } { p_up_val :6.1%} { d } " )
1186+ print (f" { i + 1 :3d} { last_r :+11.3e } { mu_c :+11.3e } { t_val :+11.3e } { err :10.3e } { sigma_c :10.3e } { p_up_val :6.1%} { d } " )
11411187
11421188 # ----------------------------------------------------------------
11431189 # WORKBENCH ANALYSIS
0 commit comments