Skip to content

Commit 0b8a5c1

Browse files
committed
Add regression test for Jenks float32 precision (#1100)
test_natural_breaks_large_offset_1100: five tight clusters offset by 100,000 must be separated into 5 distinct classes. With float32 internals, the variance calculation lost all signal and merged clusters.
1 parent 5a72629 commit 0b8a5c1

File tree

1 file changed

+33
-0
lines changed

1 file changed

+33
-0
lines changed

xrspatial/tests/test_classify.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,39 @@ def test_natural_breaks_cupy_matches_numpy():
488488
)
489489

490490

491+
def test_natural_breaks_large_offset_1100():
492+
"""Jenks should separate tight clusters even when data has a large offset.
493+
494+
Regression test for #1100: float32 internals caused the variance
495+
formula to lose all significant digits for offset data.
496+
"""
497+
rng = np.random.default_rng(0)
498+
centers = np.array([100_000, 100_010, 100_020, 100_030, 100_040])
499+
data = np.concatenate([c + rng.uniform(-1, 1, 200) for c in centers])
500+
agg = xr.DataArray(data.reshape(10, 100))
501+
502+
result = natural_breaks(agg, k=5, num_sample=None)
503+
result_data = result.data
504+
if hasattr(result_data, 'compute'):
505+
result_data = result_data.compute()
506+
507+
# All 5 classes should be present
508+
unique_classes = np.unique(result_data[~np.isnan(result_data)])
509+
assert len(unique_classes) == 5, (
510+
f"Expected 5 classes, got {len(unique_classes)}: {unique_classes}"
511+
)
512+
513+
# Each center's 200 points should be in a single class
514+
flat = result_data.ravel()
515+
for i, center in enumerate(centers):
516+
start = i * 200
517+
end = start + 200
518+
chunk_classes = np.unique(flat[start:end])
519+
assert len(chunk_classes) == 1, (
520+
f"Center {center}: expected 1 class, got {len(chunk_classes)}"
521+
)
522+
523+
491524
@dask_array_available
492525
def test_natural_breaks_dask_matches_numpy():
493526
elevation = np.arange(100, dtype=np.float64).reshape(10, 10)

0 commit comments

Comments
 (0)