Skip to content

Commit b93d683

Browse files
franklinicclaude
andcommitted
fix: two more PR #1155 review comments on NBEATSBlock
1. (Major) Interpretable theta sizes weren't validated against polynomialDegree + 1. ComputeBasisTensor + ApplyBasisExpansion both cap usable rows at polynomialDegree + 1, so oversized thetaSizeBackcast / thetaSizeForecast silently allocated dead trainable weights that couldn't influence the output. Added explicit checks for interpretable mode. 2. (Critical) ForwardInternal's generic branch in ApplyBasisExpansion returned theta directly instead of multiplying by the learned V_b / V_f basis tensors. With the Phase 1 fix that made those bases round-trip through GetParameters/SetParameters, PredictSingle would diverge from both Forward() (which uses _basisBackcast/_basisForecast via matmul) and from loaded-model state. Changed ApplyBasisExpansion to take a basis tensor argument and multiply by it in the generic branch, matching training and tape paths per Oreshkin et al. 2020 §3.2. Build: net10.0 clean, 0 errors. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 9087cc7 commit b93d683

1 file changed

Lines changed: 47 additions & 6 deletions

File tree

src/TimeSeries/NBEATSBlock.cs

Lines changed: 47 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,24 @@ public NBEATSBlock(
158158
{
159159
throw new ArgumentException("Polynomial degree must be non-negative for interpretable basis.", nameof(polynomialDegree));
160160
}
161+
// Interpretable-basis builders cap usable theta at polynomialDegree + 1
162+
// (ComputeBasisTensor populates only that many rows; ApplyBasisExpansion
163+
// slices to the same count). Silently accepting oversized theta sizes
164+
// would allocate trainable weights that are mathematically disconnected
165+
// from the output — dead parameters that waste memory and mask bugs
166+
// during gradient checks.
167+
if (useInterpretableBasis && thetaSizeBackcast > polynomialDegree + 1)
168+
{
169+
throw new ArgumentException(
170+
$"Backcast theta size ({thetaSizeBackcast}) cannot exceed polynomialDegree + 1 ({polynomialDegree + 1}) for interpretable basis.",
171+
nameof(thetaSizeBackcast));
172+
}
173+
if (useInterpretableBasis && thetaSizeForecast > polynomialDegree + 1)
174+
{
175+
throw new ArgumentException(
176+
$"Forecast theta size ({thetaSizeForecast}) cannot exceed polynomialDegree + 1 ({polynomialDegree + 1}) for interpretable basis.",
177+
nameof(thetaSizeForecast));
178+
}
161179

162180
_lookbackWindow = lookbackWindow;
163181
_forecastHorizon = forecastHorizon;
@@ -458,9 +476,12 @@ public override void UpdateParameters(T learningRate)
458476
thetaForecast[i] = NumOps.Add(fcBiasVec[i], fcWx[i, 0]);
459477
}
460478

461-
// Apply basis expansion
462-
Vector<T> backcast = ApplyBasisExpansion(thetaBackcast, _lookbackWindow);
463-
Vector<T> forecast = ApplyBasisExpansion(thetaForecast, _forecastHorizon);
479+
// Apply basis expansion. Pass the matching basis tensor so generic
480+
// blocks multiply by their learned V_b / V_f matrices (keeping this
481+
// path consistent with Forward() / ForwardTape() and with the
482+
// parameter export/import of _basisBackcast / _basisForecast).
483+
Vector<T> backcast = ApplyBasisExpansion(thetaBackcast, _basisBackcast, _lookbackWindow);
484+
Vector<T> forecast = ApplyBasisExpansion(thetaForecast, _basisForecast, _forecastHorizon);
464485

465486
return (backcast, forecast);
466487
}
@@ -537,7 +558,17 @@ private Matrix<T> ComputeBasisMatrix(int thetaSize, int outputLength)
537558
return basis;
538559
}
539560

540-
private Vector<T> ApplyBasisExpansion(Vector<T> theta, int outputLength)
561+
/// <summary>
562+
/// Expands the theta coefficients into an output time series of the requested length.
563+
/// </summary>
564+
/// <param name="theta">The theta coefficient vector produced by the fc head.</param>
565+
/// <param name="basis">
566+
/// The basis matrix for the generic branch — shape [outputLength, theta.Length].
567+
/// Ignored when <see cref="_useInterpretableBasis"/> is <c>true</c> (the closed-form
568+
/// polynomial basis is computed on the fly from <see cref="_polynomialDegree"/>).
569+
/// </param>
570+
/// <param name="outputLength">Length of the expanded output vector.</param>
571+
private Vector<T> ApplyBasisExpansion(Vector<T> theta, Tensor<T> basis, int outputLength)
541572
{
542573
Vector<T> output = new Vector<T>(outputLength);
543574

@@ -563,10 +594,20 @@ private Vector<T> ApplyBasisExpansion(Vector<T> theta, int outputLength)
563594
}
564595
else
565596
{
566-
// Generic basis: identity — theta[t] maps directly to output[t]
597+
// Generic basis: output = basis · theta. Must use the learned V_b/V_f
598+
// matrices per Oreshkin et al. 2020 Section 3.2 — they round-trip through
599+
// GetParameters/SetParameters as trainable weights, and the tape-based
600+
// Forward path multiplies by the same tensors. Returning theta directly
601+
// here (as the pre-fix code did) made PredictSingle diverge from both
602+
// training and model-load state.
567603
for (int t = 0; t < outputLength; t++)
568604
{
569-
output[t] = (t < theta.Length) ? theta[t] : NumOps.Zero;
605+
T value = NumOps.Zero;
606+
for (int k = 0; k < theta.Length; k++)
607+
{
608+
value = NumOps.Add(value, NumOps.Multiply(basis[t, k], theta[k]));
609+
}
610+
output[t] = value;
570611
}
571612
}
572613

0 commit comments

Comments
 (0)