Skip to content

Commit 159db1b

Browse files
authored
fix(#1407): rbf scaling-equivariance — deterministic k-means++ center seeding (#1410)
`RadialBasisFunctionRegressionTests.ScalingEquivariance_ScalingTargets_ScalesPredictions` was flaky in CI: the test creates two model instances on the same X with y scaled by k=100 and asserts pred2/pred1 ≈ k. Reported failure was ratio = 49.66 (pred_original=8.7966, pred_scaled=436.7967 — model1's prediction was ~2× model2's, so the ratio drifted below 100). root cause: SelectCenters used new Random()-style seeding via `_options.Seed.HasValue ? CreateSeededRandom : CreateSecureRandom()`, so two RBF models built from the same X picked different starting centers, ran k-means to different local minima, and produced different weights. That broke the algebra: with a deterministic feature matrix Φ the solve w = (ΦᵀΦ + λI)⁻¹ Φᵀy is linear in y, so pred2 = k·pred1 exactly. With random Φ, that linearity goes away. fix: replace the random init + random empty-cluster fallback with a purely deterministic k-means++ farthest-point seeding (centers[0]=x[0], each next center = argmax of min-distance² to existing centers), and make the empty-cluster fallback also a farthest-point pick. The whole SelectCenters path is now a pure function of X, so two models trained on the same X get identical Φ and identical-up-to-y-scale weights. This also removes the hidden non-determinism that made debugging RBF behavior frustrating in unrelated tests. Closes #1407
1 parent 2ce0b30 commit 159db1b

1 file changed

Lines changed: 74 additions & 11 deletions

File tree

src/Regression/RadialBasisFunctionRegression.cs

Lines changed: 74 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -270,21 +270,64 @@ protected override T PredictSingle(Vector<T> input)
270270
private Matrix<T> SelectCenters(Matrix<T> x)
271271
{
272272
int numCenters = Math.Min(_options.NumberOfCenters, x.Rows);
273-
var random = _options.Seed.HasValue ? RandomHelper.CreateSeededRandom(_options.Seed.Value) : RandomHelper.CreateSecureRandom();
274273

275-
// Initialize centers randomly
274+
// Deterministic k-means++ farthest-point seeding: centers are a pure
275+
// function of X (no RNG). This is essential for invariants like
276+
// scaling/translation-equivariance, where two model instances trained
277+
// on the same X with differently-scaled y must produce predictions
278+
// that scale linearly with y. Random init breaks that property
279+
// because two separate models pick different starting centers and
280+
// converge to different local minima, even though the math says
281+
// weights = (XᵀX + λI)⁻¹ Xᵀy is linear in y for any *fixed* X-derived
282+
// feature matrix.
283+
//
284+
// Algorithm: first center is x[0]; each subsequent center is the
285+
// point with the largest min-distance to the centers selected so far
286+
// (argmax of d²-min over current centers). This is the deterministic
287+
// farthest-point variant of k-means++ initialization.
276288
var centers = new Matrix<T>(numCenters, x.Columns);
277-
var selectedIndices = new HashSet<int>();
278-
while (selectedIndices.Count < numCenters)
289+
centers.SetRow(0, x.GetRow(0));
290+
291+
if (numCenters > 1)
279292
{
280-
int index = random.Next(x.Rows);
281-
if (selectedIndices.Add(index))
293+
// minDistSq[i] = squared distance from x[i] to its closest current center.
294+
var minDistSq = new double[x.Rows];
295+
for (int i = 0; i < x.Rows; i++)
282296
{
283-
centers.SetRow(selectedIndices.Count - 1, x.GetRow(index));
297+
T d = VectorHelper.EuclideanDistance(x.GetRow(i), centers.GetRow(0));
298+
double dd = NumOps.ToDouble(d);
299+
minDistSq[i] = dd * dd;
300+
}
301+
302+
for (int c = 1; c < numCenters; c++)
303+
{
304+
int farthest = 0;
305+
double farthestDist = minDistSq[0];
306+
for (int i = 1; i < x.Rows; i++)
307+
{
308+
if (minDistSq[i] > farthestDist)
309+
{
310+
farthestDist = minDistSq[i];
311+
farthest = i;
312+
}
313+
}
314+
centers.SetRow(c, x.GetRow(farthest));
315+
316+
// Update minDistSq with the new center's contribution.
317+
for (int i = 0; i < x.Rows; i++)
318+
{
319+
T d = VectorHelper.EuclideanDistance(x.GetRow(i), centers.GetRow(c));
320+
double dd = NumOps.ToDouble(d);
321+
double dSq = dd * dd;
322+
if (dSq < minDistSq[i])
323+
minDistSq[i] = dSq;
324+
}
284325
}
285326
}
286327

287-
// Perform K-means clustering
328+
// Perform K-means clustering. With deterministic init above and
329+
// deterministic empty-cluster fallback below, the entire SelectCenters
330+
// path is now a pure function of X.
288331
const int maxIterations = 100;
289332
var assignments = new int[x.Rows];
290333
var newCenters = new Matrix<T>(numCenters, x.Columns);
@@ -343,9 +386,29 @@ private Matrix<T> SelectCenters(Matrix<T> x)
343386
}
344387
else
345388
{
346-
// If a center has no assigned points, reinitialize it randomly
347-
int randomIndex = random.Next(x.Rows);
348-
newCenters.SetRow(i, x.GetRow(randomIndex));
389+
// Deterministic empty-cluster fallback: reseed with the
390+
// point farthest from any current non-empty center, which
391+
// mirrors the k-means++ seeding above and keeps training
392+
// a pure function of X.
393+
int farthest = 0;
394+
double farthestDist = -1;
395+
for (int rowIdx = 0; rowIdx < x.Rows; rowIdx++)
396+
{
397+
double minDistToCenter = double.MaxValue;
398+
for (int c = 0; c < numCenters; c++)
399+
{
400+
if (c == i || counts[c] == 0) continue;
401+
T d = VectorHelper.EuclideanDistance(x.GetRow(rowIdx), newCenters.GetRow(c));
402+
double dd = NumOps.ToDouble(d);
403+
if (dd < minDistToCenter) minDistToCenter = dd;
404+
}
405+
if (minDistToCenter > farthestDist)
406+
{
407+
farthestDist = minDistToCenter;
408+
farthest = rowIdx;
409+
}
410+
}
411+
newCenters.SetRow(i, x.GetRow(farthest));
349412
}
350413
}
351414

0 commit comments

Comments
 (0)