Skip to content

Commit 0f831c8

Browse files
committed
fix(financial-oracle): make RETURN_SCALE transition safe and comments scale-agnostic
Preserve legacy behavior when RETURN_SCALE is 1 Define outlier filtering as base threshold multiplied by RETURN_SCALE Keep cache fingerprint behavior backward-compatible for RETURN_SCALE=1 Replace scale-specific/hardcoded comment and log wording with parameter-driven text
1 parent a247245 commit 0f831c8

3 files changed

Lines changed: 99 additions & 53 deletions

File tree

examples/advanced/experiment_financial_oracle.py

Lines changed: 99 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@
1212
(sub-expert pathways) for different market regimes.
1313
1414
Architecture:
15-
- 256 neurons, 0 hidden layers
16-
- 5 input neurons (OHLCV), 10 output neurons (5 means + 5 log-variances)
17-
- hebb_type='synapse' -- per-synapse plasticity (2*N^2 learnable params)
18-
- 30 log-return lookback + 10 thinking steps = 40 total temporal steps
19-
- Heteroscedastic Gaussian NLL: per-channel aleatoric uncertainty
15+
- NUM_NEURONS neurons, 0 hidden layers
16+
- INPUT_DIM input neurons (OHLCV), OUTPUT_DIM output neurons (means + log-variances)
17+
- hebb_type='synapse' -- per-synapse plasticity (2*N^2 learnable params)
18+
- WINDOW_SIZE return lookback + THINK_STEPS thinking steps per sample
19+
- Heteroscedastic Gaussian NLL: per-channel aleatoric uncertainty
2020
2121
Uncertainty Estimation (Kendall & Gal, NeurIPS 2017):
2222
The network predicts both mu (expected value) and log(sigma^2) (aleatoric
@@ -162,27 +162,34 @@
162162
# --- Model ---
163163
NUM_NEURONS = 256
164164
INPUT_DIM = 5 # O, H, L, C, V
165-
OUTPUT_DIM = 10 # 5 means (mu) + 5 log-variances (log sigma^2)
166-
WINDOW_SIZE = 12 # Input sequence length (12 return steps)
167-
RAW_WINDOW_SIZE = WINDOW_SIZE + 2 # 12 inputs + 1 target returns need 14 raw candles
165+
OUTPUT_DIM = 10 # Means + log-variances for each OHLCV channel
166+
WINDOW_SIZE = 12 # Input sequence length in return space
167+
RAW_WINDOW_SIZE = WINDOW_SIZE + 2 # Need previous + WINDOW_SIZE inputs + next target candle
168168
THINK_STEPS = 10 # Extra thinking after last candles
169-
TOTAL_STEPS = WINDOW_SIZE + THINK_STEPS # 22
169+
TOTAL_STEPS = WINDOW_SIZE + THINK_STEPS
170170

171171
# --- Training ---
172172
EPOCHS = 100
173-
BATCH_SIZE = 256
174-
EVAL_BATCH = 256 # Larger batch for eval (no gradients)
175-
LR = 5e-5
173+
BATCH_SIZE = 32
174+
EVAL_BATCH = 1024 # Larger batch for eval (no gradients)
175+
LR = 1e-4
176176
LR_MIN = 1e-6 # Cosine annealing floor
177177
VAL_EVERY = 3 # Validate every N epochs (validation is expensive)
178178
PATIENCE = 20 # Early stopping patience (in validation checks)
179-
CONF_THRESH = 0.55 # Trade only when P(up) is outside [0.45, 0.55]
179+
CONF_THRESH = 0.55 # Trade only when directional probability is far enough from 0.5
180180
MAX_WINDOWS_PER_SOURCE = {
181181
'train': 6000,
182182
'val': 1200,
183183
'test': 2000,
184184
}
185-
MAX_ABS_NORMALIZED = 20.0 # Filter pathological spikes after return transform
185+
186+
# Feature scaling multiplier applied to return channels before float32 training.
187+
# Higher values increase numerical visibility of tiny moves.
188+
RETURN_SCALE = 100.0
189+
# Base outlier threshold in unscaled return space.
190+
# Threshold in feature space is this base multiplied by RETURN_SCALE.
191+
MAX_ABS_NORMALIZED_BASE = 20.0
192+
MAX_ABS_NORMALIZED = MAX_ABS_NORMALIZED_BASE * RETURN_SCALE
186193

187194

188195
# ============================================================================
@@ -247,8 +254,9 @@ def normalize_window(window: np.ndarray) -> np.ndarray | None:
247254
248255
Input shape: (RAW_WINDOW_SIZE, 5) where RAW_WINDOW_SIZE = WINDOW_SIZE + 2.
249256
Output shape: (WINDOW_SIZE + 1, 5):
250-
- OHLC channels: log returns ln(P_t / P_{t-1})
251-
- Volume channel: log-volume differences ln(1+V_t) - ln(1+V_{t-1})
257+
- OHLC channels: scaled log returns (ln(P_t / P_{t-1}) * RETURN_SCALE)
258+
- Volume channel: scaled log-volume differences
259+
((ln(1+V_t) - ln(1+V_{t-1})) * RETURN_SCALE)
252260
253261
Returns float32 features or None if invalid.
254262
"""
@@ -258,13 +266,13 @@ def normalize_window(window: np.ndarray) -> np.ndarray | None:
258266
prices = window[:, :4]
259267
if (prices <= 0).any() or not np.isfinite(prices).all():
260268
return None
261-
price_returns = np.log(prices[1:] / prices[:-1])
269+
price_returns = np.log(prices[1:] / prices[:-1]) * RETURN_SCALE
262270

263271
vol = window[:, 4]
264272
if (vol < 0).any() or not np.isfinite(vol).all():
265273
return None
266274
vol_log = np.log1p(vol)
267-
vol_returns = (vol_log[1:] - vol_log[:-1]).reshape(-1, 1)
275+
vol_returns = ((vol_log[1:] - vol_log[:-1]) * RETURN_SCALE).reshape(-1, 1)
268276

269277
out = np.hstack([price_returns, vol_returns]).astype(np.float32)
270278
if not np.isfinite(out).all():
@@ -308,15 +316,23 @@ def build_dataset():
308316
master_cache_file = CACHE_DIR / "master_dataset.npz"
309317
today = datetime.now().strftime('%Y-%m-%d')
310318

319+
# Keep scale=1 payload identical to legacy so old caches/fingerprints match.
320+
if float(RETURN_SCALE) == 1.0:
321+
feature_mode = 'log_returns_v1'
322+
else:
323+
feature_mode = 'scaled_log_returns_v2'
324+
311325
config_payload = {
312326
'instruments': INSTRUMENTS,
313327
'timeframes': TIMEFRAMES,
314328
'window_size': WINDOW_SIZE,
315329
'raw_window_size': RAW_WINDOW_SIZE,
316-
'feature_mode': 'log_returns_v1',
330+
'feature_mode': feature_mode,
317331
'max_windows_per_source': MAX_WINDOWS_PER_SOURCE,
318332
'max_abs_normalized': MAX_ABS_NORMALIZED,
319333
}
334+
if float(RETURN_SCALE) != 1.0:
335+
config_payload['return_scale'] = RETURN_SCALE
320336
config_fingerprint = hashlib.sha256(
321337
json.dumps(config_payload, sort_keys=True).encode('utf-8')
322338
).hexdigest()
@@ -482,8 +498,8 @@ class HeteroscedasticNLL(nn.Module):
482498
Kendall & Gal, "What Uncertainties Do We Need in Bayesian Deep
483499
Learning for Computer Vision?", NeurIPS 2017.
484500
"""
485-
LOG_VAR_MIN = -10.0 # sigma_min ~= 0.007
486-
LOG_VAR_MAX = 10.0 # sigma_max ~= 148.4
501+
LOG_VAR_MIN = -10.0 # sigma_scaled_min ~= 0.007
502+
LOG_VAR_MAX = 10.0 # sigma_scaled_max ~= 148.4
487503

488504
def forward(self, predicted, target):
489505
# predicted: (B, 10) -> 5 mu (OHLCV) + 5 log(sigma^2)
@@ -502,14 +518,19 @@ def gaussian_cdf(x):
502518
return 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0)))
503519

504520

521+
def to_raw_return(x):
522+
"""Convert scaled return features back to raw log-return units."""
523+
return x / RETURN_SCALE
524+
525+
505526
# ============================================================================
506527
# MODEL
507528
# ============================================================================
508529

509530
def build_model():
510-
"""Build the Financial Oracle: 256 neurons, synaptic Hebbian, resonant init."""
511-
input_ids = list(range(INPUT_DIM)) # [0..4]
512-
output_ids = list(range(NUM_NEURONS - OUTPUT_DIM, NUM_NEURONS)) # [246..255]
531+
"""Build the Financial Oracle with config-driven neuron counts and IO mapping."""
532+
input_ids = list(range(INPUT_DIM))
533+
output_ids = list(range(NUM_NEURONS - OUTPUT_DIM, NUM_NEURONS))
513534

514535
model = OdyssNet(
515536
num_neurons=NUM_NEURONS,
@@ -558,8 +579,8 @@ def evaluate(trainer, data_x, data_y):
558579
559580
Returns (avg_mse, avg_sigma_close, direction_accuracy,
560581
confident_direction_accuracy, coverage).
561-
- avg_mse: Mean squared error on mu predictions
562-
- avg_sigma_close: Mean predicted sigma for Close channel
582+
- avg_mse: Mean squared error on mu predictions (raw log-return)
583+
- avg_sigma_close: Mean predicted sigma for Close channel (raw log-return)
563584
- direction_accuracy: % of correct next-candle Close return sign
564585
- confident_direction_accuracy: accuracy on confident predictions only
565586
- coverage: % of samples that triggered a confident signal
@@ -581,12 +602,18 @@ def evaluate(trainer, data_x, data_y):
581602
pred = trainer.predict(bx, thinking_steps=TOTAL_STEPS, full_sequence=False)
582603
by_dev = by.to(pred.device)
583604
mu = pred[:, :5]
584-
log_var = torch.clamp(pred[:, 5:], -10.0, 10.0)
605+
log_var = torch.clamp(
606+
pred[:, 5:], HeteroscedasticNLL.LOG_VAR_MIN, HeteroscedasticNLL.LOG_VAR_MAX
607+
)
585608
sigma = torch.exp(0.5 * log_var) # (B, 5)
586609

587-
mse = ((mu - by_dev) ** 2).mean().item()
610+
mu_raw = to_raw_return(mu)
611+
by_raw = to_raw_return(by_dev)
612+
sigma_raw = to_raw_return(sigma)
613+
614+
mse = ((mu_raw - by_raw) ** 2).mean().item()
588615
total_mse += mse
589-
total_sigma_close += sigma[:, 3].mean().item()
616+
total_sigma_close += sigma_raw[:, 3].mean().item()
590617

591618
# Probabilistic next-candle direction from return distribution:
592619
# P(close_{t+1} > close_t) = P(r_close > 0) = Phi(mu_r / sigma_r)
@@ -716,28 +743,34 @@ def plot_results(history, trainer, test_x, test_y, test_metrics, lr_matrix):
716743
n_sc = min(800, len(test_x))
717744
idx = torch.randperm(len(test_x))[:n_sc]
718745
pred = trainer.predict(test_x[idx], thinking_steps=TOTAL_STEPS, full_sequence=False)
719-
mu_close = pred[:, 3].cpu().numpy()
720-
t_close = test_y[idx, 3].cpu().numpy()
721-
log_var_close = torch.clamp(pred[:, 8], -10.0, 10.0)
722-
sigma_close = torch.exp(0.5 * log_var_close).cpu().numpy()
746+
mu_close_scaled = pred[:, 3].cpu().numpy()
747+
t_close_scaled = test_y[idx, 3].cpu().numpy()
748+
log_var_close = torch.clamp(
749+
pred[:, 8], HeteroscedasticNLL.LOG_VAR_MIN, HeteroscedasticNLL.LOG_VAR_MAX
750+
)
751+
sigma_close_scaled = torch.exp(0.5 * log_var_close).cpu().numpy()
752+
753+
mu_close = to_raw_return(mu_close_scaled)
754+
t_close = to_raw_return(t_close_scaled)
755+
sigma_close = to_raw_return(sigma_close_scaled)
723756

724757
sc = ax.scatter(t_close, mu_close, c=sigma_close, cmap='RdYlGn_r',
725758
alpha=0.4, s=8, vmin=np.percentile(sigma_close, 5),
726759
vmax=np.percentile(sigma_close, 95))
727760
lims = [min(t_close.min(), mu_close.min()), max(t_close.max(), mu_close.max())]
728761
ax.plot(lims, lims, 'k--', alpha=0.3, linewidth=0.8)
729-
ax.set_xlabel('True Close Return (log)')
762+
ax.set_xlabel('True Close Return (raw log)')
730763
ax.set_ylabel('Predicted mu_close_return')
731764
ax.set_title(f'Close Return Prediction (Dir Acc {test_dir_acc:.1f}%)')
732-
plt.colorbar(sc, ax=ax, label='sigma (uncertainty)', shrink=0.8)
765+
plt.colorbar(sc, ax=ax, label='sigma (raw log-return)', shrink=0.8)
733766
ax.grid(True, alpha=0.3)
734767

735768
# 3. Sigma Distribution (predicted uncertainty)
736769
ax = fig.add_subplot(gs[0, 2])
737770
ax.hist(sigma_close, bins=50, color='steelblue', alpha=0.7, edgecolor='white', linewidth=0.5)
738771
ax.axvline(sigma_close.mean(), color='red', linestyle='--', linewidth=1,
739772
label=f'Mean sigma: {sigma_close.mean():.4f}')
740-
ax.set_xlabel('Predicted sigma (Close)')
773+
ax.set_xlabel('Predicted sigma (Close, raw log)')
741774
ax.set_ylabel('Count')
742775
ax.set_title('Aleatoric Uncertainty Distribution')
743776
ax.legend(fontsize=8)
@@ -818,8 +851,9 @@ def plot_btc_overlay(trainer, n_candles=1000):
818851
prediction segment's opacity is proportional to the model's
819852
confidence (1/sigma -- low sigma = high confidence = solid line).
820853
821-
The model predicts next-candle close log-returns. Predictions are
822-
mapped back to absolute price with: close_hat = close_t * exp(r_hat).
854+
The model predicts next-candle close returns in scaled space.
855+
Predictions are mapped back to absolute price using raw log-returns:
856+
close_hat = close_t * exp(r_hat / RETURN_SCALE).
823857
"""
824858
if not HAS_MPL:
825859
print(" WARNING: matplotlib not available, skipping BTC overlay.")
@@ -883,9 +917,14 @@ def plot_btc_overlay(trainer, n_candles=1000):
883917
)
884918
pred = trainer.predict(batch, thinking_steps=TOTAL_STEPS, full_sequence=False)
885919

886-
mu_c = pred[:, 3].cpu().numpy()
887-
log_var_c = np.clip(pred[:, 8].cpu().numpy(), -10.0, 10.0)
888-
sigma_c = np.exp(0.5 * log_var_c)
920+
mu_c_scaled = pred[:, 3].cpu().numpy()
921+
log_var_c = np.clip(
922+
pred[:, 8].cpu().numpy(), HeteroscedasticNLL.LOG_VAR_MIN, HeteroscedasticNLL.LOG_VAR_MAX
923+
)
924+
sigma_c_scaled = np.exp(0.5 * log_var_c)
925+
926+
mu_c = to_raw_return(mu_c_scaled)
927+
sigma_c = to_raw_return(sigma_c_scaled)
889928

890929
prevs = prev_close_refs[start:end]
891930
pred_return[start:end] = mu_c
@@ -971,7 +1010,7 @@ def plot_btc_overlay(trainer, n_candles=1000):
9711010
print(f" MAE: ${mae:,.2f}")
9721011
print(f" MAPE: {mape:.2f}%")
9731012
print(f" Direction Acc: {dir_acc:.1f}%")
974-
print(f" Mean sigma: {pred_sigma.mean():.4f} (close log-return)")
1013+
print(f" Mean sigma: {pred_sigma.mean():.4f} (close raw log-return)")
9751014
print(f" Confidence range: [{alpha_values.min():.2f}, {alpha_values.max():.2f}]")
9761015

9771016

@@ -1020,6 +1059,7 @@ def main():
10201059
print(f" Total Parameters: {n_params:,}")
10211060
print(f" Hebbian Params: {n_hebb:,} (per-synapse plasticity)")
10221061
print(f" Window: {WINDOW_SIZE} return steps + {THINK_STEPS} thinking = {TOTAL_STEPS} steps")
1062+
print(f" Return Scale: x{RETURN_SCALE:,.0f} (feature multiplier)")
10231063
print(f" Input Mode: Stream (continuous sequential injection)")
10241064

10251065
# ----------------------------------------------------------------
@@ -1083,7 +1123,7 @@ def main():
10831123
h, m, s = int(elapsed // 3600), int((elapsed % 3600) // 60), int(elapsed % 60)
10841124
val_str = (
10851125
f"Val MSE {val_mse:.6f} | Dir {val_dir_acc:5.1f}% | "
1086-
f"Conf {val_conf_acc:5.1f}% | Cov {val_coverage:5.1f}% | sigma {val_sigma:.4f}"
1126+
f"Conf {val_conf_acc:5.1f}% | Cov {val_coverage:5.1f}% | sigma(raw) {val_sigma:.4f}"
10871127
if do_val else " (skip val) "
10881128
)
10891129
print(
@@ -1117,27 +1157,33 @@ def main():
11171157
print(f" Direction Accuracy: {test_dir_acc:.1f}%")
11181158
print(f" Confident Acc: {test_conf_acc:.1f}%")
11191159
print(f" Coverage: {test_coverage:.1f}%")
1120-
print(f" Avg sigma (Close): {test_sigma:.4f}")
1160+
print(f" Avg sigma (Close): {test_sigma:.4f} (raw log-return)")
11211161
print(f" Best Val Epoch: {best_epoch}")
11221162

11231163
# Sample predictions with full Gaussian uncertainty
11241164
print(f"\n Sample Predictions (Test Set):")
1125-
print(f" {'#':>3s} {'last_r':>10s} {'mu_r':>10s} {'true_r':>10s} {'|err|':>8s} {'sigma':>8s} {'P(up)':>7s} {'Dir':>3s}")
1126-
print(f" {'_'*74}")
1165+
print(f" {'#':>3s} {'last_r':>11s} {'mu_r':>11s} {'true_r':>11s} {'|err|':>10s} {'sigma':>10s} {'P(up)':>7s} {'Dir':>3s}")
1166+
print(f" {'_'*86}")
11271167

11281168
with torch.no_grad():
11291169
n_show = min(20, len(test_x))
11301170
pred = trainer.predict(test_x[:n_show], thinking_steps=TOTAL_STEPS, full_sequence=False)
11311171
for i in range(n_show):
1132-
mu_c = pred[i, 3].item()
1133-
log_var_c = max(-10.0, min(10.0, pred[i, 8].item()))
1134-
sigma_c = math.exp(0.5 * log_var_c)
1135-
last_r = test_x[i, -1, 3].item()
1136-
t_val = test_y[i, 3].item()
1172+
mu_c_scaled = pred[i, 3].item()
1173+
log_var_c = max(
1174+
HeteroscedasticNLL.LOG_VAR_MIN,
1175+
min(HeteroscedasticNLL.LOG_VAR_MAX, pred[i, 8].item()),
1176+
)
1177+
sigma_c_scaled = math.exp(0.5 * log_var_c)
1178+
1179+
mu_c = to_raw_return(mu_c_scaled)
1180+
sigma_c = to_raw_return(sigma_c_scaled)
1181+
last_r = to_raw_return(test_x[i, -1, 3].item())
1182+
t_val = to_raw_return(test_y[i, 3].item())
11371183
err = abs(mu_c - t_val)
1138-
p_up_val = 0.5 * (1.0 + math.erf(mu_c / (sigma_c * math.sqrt(2.0) + 1e-8)))
1184+
p_up_val = 0.5 * (1.0 + math.erf(mu_c_scaled / (sigma_c_scaled * math.sqrt(2.0) + 1e-8)))
11391185
d = "OK" if (mu_c > 0.0) == (t_val > 0.0) else "--"
1140-
print(f" {i+1:3d} {last_r:+10.6f} {mu_c:+10.6f} {t_val:+10.6f} {err:8.6f} {sigma_c:8.4f} {p_up_val:6.1%} {d}")
1186+
print(f" {i+1:3d} {last_r:+11.3e} {mu_c:+11.3e} {t_val:+11.3e} {err:10.3e} {sigma_c:10.3e} {p_up_val:6.1%} {d}")
11411187

11421188
# ----------------------------------------------------------------
11431189
# WORKBENCH ANALYSIS
-329 KB
Loading
76.1 KB
Loading

0 commit comments

Comments
 (0)