Skip to content

Commit 309bac3

Browse files
rocketmarkclaude
andcommitted
mpfit_nan_guard: sanitize non-finite numerical derivatives in mp_fdjac2
mp_fdjac2's finite-difference Jacobian writes were only protected by assert(isfinite(fjac[ij])), which -DNDEBUG (the Pi's Release build) strips entirely. A degenerate residual (e.g. right after a Reinit-triggered scene reset) can make wa[i]/fvec[i] non-finite, writing NaN/Inf straight into fjac. That poisons fnorm/ratio in mpfit()'s outer Levenberg-Marquardt loop; since every comparison against a NaN ratio is false, the iteration counter (gated on the ratio>=p0001 success branch) never advances and the maxiter check never fires. Confirmed via reproduction: ~465k spin iterations/sec with zero progress, matching the gdb captures of agent threads stuck at 95-100% CPU for minutes after Reinit. Sanitize fjac to 0 on a non-finite derivative in both the one-sided and two-sided (which had no assert at all) paths, so the column is treated the same way mp_qrfac already handles a zero-norm column. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent e777d6f commit 309bac3

3 files changed

Lines changed: 121 additions & 1 deletion

File tree

redist/mpfit/mpfit.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1228,6 +1228,23 @@ static int mp_fdjac2(mp_func funct, int m, int n, int *ifree, int npar, FLT *x,
12281228
/* Non-debug path for speed */
12291229
for (i = 0; i < m; i++, ij++) {
12301230
fjac[ij] = (wa[i] - fvec[i]) / h; /* fjac[i+m*j] */
1231+
/* Stagehand patch: the assert below is the only protection
1232+
* against a non-finite derivative here, and asserts are
1233+
* compiled out under -DNDEBUG (the Pi's Release build). A
1234+
* degenerate user function (e.g. a residual that goes Inf
1235+
* right after a Reinit-triggered scene reset) then writes
1236+
* NaN/Inf straight into fjac, which poisons ratio/fnorm in
1237+
* mpfit()'s outer loop. Every comparison against a NaN
1238+
* ratio is false, so the iteration counter -- which only
1239+
* advances inside the ratio>=p0001 success branch -- never
1240+
* increments and the maxiter check never fires: an
1241+
* unbounded CPU-spin hang instead of a clean failure
1242+
* return. Sanitize to zero so the column is treated as
1243+
* having no measurable gradient here, same outcome
1244+
* mp_qrfac already handles safely via its ajnorm==0
1245+
* guard. */
1246+
if (!isfinite(fjac[ij]))
1247+
fjac[ij] = 0;
12311248
assert(isfinite(fjac[ij]));
12321249
}
12331250
} else {
@@ -1264,6 +1281,11 @@ static int mp_fdjac2(mp_func funct, int m, int n, int *ifree, int npar, FLT *x,
12641281
/* Non-debug path for speed */
12651282
for (i = 0; i < m; i++, ij++) {
12661283
fjac[ij] = (fjac[ij] - wa[i]) / (2 * h); /* fjac[i+m*j] */
1284+
/* Stagehand patch: same non-finite-derivative hang as the
1285+
* one-sided path above -- this path had no assert at
1286+
* all, debug or release. */
1287+
if (!isfinite(fjac[ij]))
1288+
fjac[ij] = 0;
12671289
}
12681290
} else {
12691291
/* Debug path for correctness */

src/test_cases/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ SET(SURVIVE_TESTS
77
reproject_residual_props event_queue_props
88
residual_cascade_props
99
variance_gate_props
10-
normal_filter_props)
10+
normal_filter_props mpfit_props)
1111

1212
set(barycentric_svd_ADDITIONAL_SRCS ../barycentric_svd/barycentric_svd.c)
1313
set(reproject_residual_props_ADDITIONAL_SRCS ../barycentric_svd/barycentric_svd.c)

src/test_cases/mpfit_props.c

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
// Regression test for the mpfit numerical-derivative NaN hang (Stagehand patch).
2+
//
3+
// redist/mpfit/mpfit.c's mp_fdjac2() computes a finite-difference Jacobian
4+
// when no analytic derivative is supplied. Its only protection against a
5+
// degenerate (non-finite) residual poisoning fjac was assert(isfinite(...)),
6+
// which -DNDEBUG (the Pi's Release build) strips entirely. A NaN/Inf in fjac
7+
// poisons fnorm/ratio in mpfit()'s outer Levenberg-Marquardt loop; since the
8+
// iteration counter only advances inside the `ratio >= p0001` success branch,
9+
// and every comparison against a NaN ratio is false, the loop never
10+
// increments `iter`, the maxiter check never fires, and mpfit() spins at
11+
// ~100% CPU forever instead of returning a clean failure status.
12+
//
13+
// This test drives mpfit() through the numerical-derivative path with a user
14+
// function that returns a non-finite residual once perturbed by the
15+
// finite-difference step, and checks mpfit() returns promptly with a finite
16+
// result. It also confirms a well-conditioned fit on the same code path still
17+
// converges normally (no regression on the happy path).
18+
19+
#include "test_case.h"
20+
#include <math.h>
21+
#include "mpfit/mpfit.h"
22+
#include <stdio.h>
23+
24+
// y = p0 * x, with no analytic derivative supplied (dvec ignored) so mpfit
25+
// falls back to mp_fdjac2's numerical derivative. Once p0 is perturbed past
26+
// a threshold the residual goes non-finite, simulating a degenerate
27+
// real-world residual (e.g. right after a Reinit-triggered scene reset).
28+
struct LinearData {
29+
FLT *x;
30+
FLT *y;
31+
int n;
32+
};
33+
34+
static int linear_degenerate(int m, int n, FLT *p, FLT *dy, FLT **dvec, void *priv) {
35+
struct LinearData *d = (struct LinearData *)priv;
36+
for (int i = 0; i < m; i++) {
37+
if (p[0] > 1e10) {
38+
dy[i] = INFINITY;
39+
} else {
40+
dy[i] = d->y[i] - p[0] * d->x[i];
41+
}
42+
}
43+
return 0;
44+
}
45+
46+
static int linear_well_conditioned(int m, int n, FLT *p, FLT *dy, FLT **dvec, void *priv) {
47+
struct LinearData *d = (struct LinearData *)priv;
48+
for (int i = 0; i < m; i++) {
49+
dy[i] = d->y[i] - (p[0] * d->x[i] + p[1]);
50+
}
51+
return 0;
52+
}
53+
54+
TEST(MpfitProps, NonFiniteResidualDoesNotHang) {
55+
FLT x[5] = {0, 1, 2, 3, 4};
56+
FLT y[5] = {0, 2, 4, 6, 8};
57+
struct LinearData d = {x, y, 5};
58+
59+
// Start absurdly large so the finite-difference probe step pushes p[0]
60+
// past the 1e10 threshold and the user function returns Inf.
61+
FLT p[1] = {1e15};
62+
mp_result result = {0};
63+
mp_config cfg = {0};
64+
cfg.maxiter = 50;
65+
66+
int status = mpfit(linear_degenerate, 5, 1, p, 0, &cfg, &d, &result);
67+
68+
// The point of the patch is that this returns at all (no hang) and
69+
// doesn't leave NaN/Inf in the solver's outputs.
70+
if (!isfinite(result.bestnorm) || !isfinite(p[0])) {
71+
fprintf(stderr, "NonFiniteResidualDoesNotHang FAILED: status=%d bestnorm=%f p0=%f\n", status, result.bestnorm,
72+
p[0]);
73+
return -1;
74+
}
75+
return 0;
76+
}
77+
78+
TEST(MpfitProps, WellConditionedFitStillConverges) {
79+
// y = 2x + 1, same numerical-derivative code path as the test above.
80+
FLT x[6] = {0, 1, 2, 3, 4, 5};
81+
FLT y[6] = {1, 3, 5, 7, 9, 11};
82+
struct LinearData d = {x, y, 6};
83+
84+
FLT p[2] = {0, 0};
85+
mp_result result = {0};
86+
87+
int status = mpfit(linear_well_conditioned, 6, 2, p, 0, 0, &d, &result);
88+
89+
if (status <= 0) {
90+
fprintf(stderr, "WellConditionedFitStillConverges FAILED: mpfit status=%d\n", status);
91+
return -1;
92+
}
93+
if (fabs(p[0] - 2.0) > 1e-4 || fabs(p[1] - 1.0) > 1e-4) {
94+
fprintf(stderr, "WellConditionedFitStillConverges FAILED: p=[%.6f,%.6f], want [2,1]\n", p[0], p[1]);
95+
return -1;
96+
}
97+
return 0;
98+
}

0 commit comments

Comments
 (0)