Skip to content

Commit 5a72629

Browse files
committed
Use float64 in Jenks natural breaks internals (#1100)
The Jenks matrices and bin edge array used float32, causing the naive variance formula (sum_squares - sum*sum/w) to lose all significant digits when data had a large offset relative to its spread. Changed lower_class_limits, var_combinations, val cast, and kclass to float64.
1 parent 443ed78 commit 5a72629

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

xrspatial/classify.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -547,12 +547,12 @@ def quantile(agg: xr.DataArray,
547547
def _run_numpy_jenks_matrices(data, n_classes):
548548
n_data = data.shape[0]
549549
lower_class_limits = np.zeros(
550-
(n_data + 1, n_classes + 1), dtype=np.float32
550+
(n_data + 1, n_classes + 1), dtype=np.float64
551551
)
552552
lower_class_limits[1, 1:n_classes + 1] = 1.0
553553

554554
var_combinations = np.zeros(
555-
(n_data + 1, n_classes + 1), dtype=np.float32
555+
(n_data + 1, n_classes + 1), dtype=np.float64
556556
)
557557
var_combinations[2:n_data + 1, 1:n_classes + 1] = np.inf
558558

@@ -568,7 +568,7 @@ def _run_numpy_jenks_matrices(data, n_classes):
568568
lower_class_limit = l - m
569569
i4 = lower_class_limit - 1
570570

571-
val = np.float32(data[i4])
571+
val = data[i4]
572572

573573
# here we're estimating variance for each potential classing
574574
# of the data, for each potential number of classes. `w`
@@ -610,7 +610,7 @@ def _run_jenks(data, n_classes):
610610
lower_class_limits, _ = _run_numpy_jenks_matrices(data, n_classes)
611611

612612
k = data.shape[0]
613-
kclass = np.zeros(n_classes + 1, dtype=np.float32)
613+
kclass = np.zeros(n_classes + 1, dtype=np.float64)
614614
kclass[0] = data[0]
615615
kclass[-1] = data[-1]
616616
count_num = n_classes

0 commit comments

Comments
 (0)