Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions xrspatial/classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,12 +547,12 @@ def quantile(agg: xr.DataArray,
def _run_numpy_jenks_matrices(data, n_classes):
n_data = data.shape[0]
lower_class_limits = np.zeros(
(n_data + 1, n_classes + 1), dtype=np.float32
(n_data + 1, n_classes + 1), dtype=np.float64
)
lower_class_limits[1, 1:n_classes + 1] = 1.0

var_combinations = np.zeros(
(n_data + 1, n_classes + 1), dtype=np.float32
(n_data + 1, n_classes + 1), dtype=np.float64
)
var_combinations[2:n_data + 1, 1:n_classes + 1] = np.inf

Expand All @@ -568,7 +568,7 @@ def _run_numpy_jenks_matrices(data, n_classes):
lower_class_limit = l - m
i4 = lower_class_limit - 1

val = np.float32(data[i4])
val = data[i4]

# here we're estimating variance for each potential classing
# of the data, for each potential number of classes. `w`
Expand Down Expand Up @@ -610,7 +610,7 @@ def _run_jenks(data, n_classes):
lower_class_limits, _ = _run_numpy_jenks_matrices(data, n_classes)

k = data.shape[0]
kclass = np.zeros(n_classes + 1, dtype=np.float32)
kclass = np.zeros(n_classes + 1, dtype=np.float64)
kclass[0] = data[0]
kclass[-1] = data[-1]
count_num = n_classes
Expand Down
33 changes: 33 additions & 0 deletions xrspatial/tests/test_classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,39 @@ def test_natural_breaks_cupy_matches_numpy():
)


def test_natural_breaks_large_offset_1100():
"""Jenks should separate tight clusters even when data has a large offset.

Regression test for #1100: float32 internals caused the variance
formula to lose all significant digits for offset data.
"""
rng = np.random.default_rng(0)
centers = np.array([100_000, 100_010, 100_020, 100_030, 100_040])
data = np.concatenate([c + rng.uniform(-1, 1, 200) for c in centers])
agg = xr.DataArray(data.reshape(10, 100))

result = natural_breaks(agg, k=5, num_sample=None)
result_data = result.data
if hasattr(result_data, 'compute'):
result_data = result_data.compute()

# All 5 classes should be present
unique_classes = np.unique(result_data[~np.isnan(result_data)])
assert len(unique_classes) == 5, (
f"Expected 5 classes, got {len(unique_classes)}: {unique_classes}"
)

# Each center's 200 points should be in a single class
flat = result_data.ravel()
for i, center in enumerate(centers):
start = i * 200
end = start + 200
chunk_classes = np.unique(flat[start:end])
assert len(chunk_classes) == 1, (
f"Center {center}: expected 1 class, got {len(chunk_classes)}"
)


@dask_array_available
def test_natural_breaks_dask_matches_numpy():
elevation = np.arange(100, dtype=np.float64).reshape(10, 10)
Expand Down
Loading