fixed cupy and dask cases for the unit mismatch heuristic

brendancol · brendancol · commit ccbb894feaa3 · 2026-01-09T11:18:51.000-08:00
diff --git a/xrspatial/utils.py b/xrspatial/utils.py
@@ -452,7 +452,6 @@ def _convert_color(c):
     return tf.Image(f(agg.data))
 
 
-
 def _infer_coord_unit_type(coord: xr.DataArray, cellsize: float) -> str:
     """
     Heuristic to classify a spatial coordinate axis as:
@@ -504,50 +503,40 @@ def _infer_coord_unit_type(coord: xr.DataArray, cellsize: float) -> str:
     return "unknown"
 
 
-def _infer_vertical_unit_type(agg: xr.DataArray) -> str:
-    """
-    Heuristic to classify the DataArray values as:
-    - 'elevation'  (meters/feet etc)
-    - 'angle'      (degrees/radians)
-    - 'unknown'
-    """
+def _infer_vertical_unit_type(agg):
     units = str(agg.attrs.get("units", "")).lower()
 
-    # 1) Explicit units
-    if any(k in units for k in ("degree", "deg")):
-        return "angle"
-    if "rad" in units:
+    # Cheap / reliable first
+    if any(k in units for k in ("degree", "deg")) or "rad" in units:
         return "angle"
     if units in ("m", "meter", "metre", "meters", "metres",
                  "km", "kilometer", "kilometre", "kilometers", "kilometres",
                  "ft", "foot", "feet"):
         return "elevation"
 
-    # 2) Numeric heuristics on data range
-    data = agg.values
-    if not np.issubdtype(data.dtype, np.number):
+    # Numeric fallback: sample only (never full compute)
+    data = agg.data
+    try:
+        vmin, vmax = _sample_windows_min_max(data, max_window_elems=65536, windows=5)
+    except Exception:
         return "unknown"
 
-    finite = np.isfinite(data)
-    if not np.any(finite):
+    if not np.isfinite(vmin) or not np.isfinite(vmax):
         return "unknown"
 
-    vmin = float(data[finite].min())
-    vmax = float(data[finite].max())
     span = vmax - vmin
 
-    # Elevation-like: tens–thousands of units, typical DEM ranges.
+    # Elevation-ish heuristic
     if 10.0 <= span <= 20000.0 and vmin > -500.0:
         return "elevation"
 
-    # Angle-like: often 0–360, -180–180, or small (-pi, pi)
-    if -360.0 <= vmin <= 360.0 and -360.0 <= vmax <= 360.0:
-        # If the span is not huge, treat as angle-ish
-        if span <= 720.0:
-            return "angle"
+    # Angle-ish heuristic
+    if -360.0 <= vmin <= 360.0 and -360.0 <= vmax <= 360.0 and span <= 720.0:
+        return "angle"
 
     return "unknown"
 
+
 def warn_if_unit_mismatch(agg: xr.DataArray) -> None:
     """
     Heuristic check for horizontal vs vertical unit mismatch.
@@ -599,3 +588,106 @@ def warn_if_unit_mismatch(agg: xr.DataArray) -> None:
             "meter-based coordinates before calling `slope`.",
             UserWarning,
         )
+
+
+def _to_float_scalar(x) -> float:
+    """Convert numpy/cupy scalar or 0-d array to python float safely."""
+    if cupy is not None:
+        # cupy.ndarray scalar
+        if isinstance(x, cupy.ndarray):
+            return float(cupy.asnumpy(x).item())
+        # cupy scalar type
+        if x.__class__.__module__.startswith("cupy") and hasattr(x, "item"):
+            return float(x.item())
+
+    if hasattr(x, "item"):
+        return float(x.item())
+    return float(x)
+
+
+def _sample_windows_min_max(
+    data,
+    *,
+    max_window_elems: int = 65536,   # e.g. 256x256
+    windows: int = 5,                # corners + center default
+) -> tuple[float, float]:
+    """
+    Estimate (nanmin, nanmax) from a small sample of windows.
+
+    Works for numpy, cupy, dask+numpy, dask+cupy. Only computes on the sampled
+    windows, not the full array.
+    """
+    # Normalize to last-2D sampling (y,x). For higher dims, sample first index.
+    if hasattr(data, "ndim") and data.ndim >= 3:
+        prefix = (0,) * (data.ndim - 2)
+    else:
+        prefix = ()
+
+    # Determine y/x sizes
+    shape = data.shape
+    ny, nx = shape[-2], shape[-1]
+
+    if ny == 0 or nx == 0:
+        return np.nan, np.nan
+
+    # Choose a square-ish window size bounded by array shape
+    w = int(np.sqrt(max_window_elems))
+    w = max(1, min(w, ny, nx))
+
+    # Define window anchor positions: (top-left), (top-right), (bottom-left), (bottom-right), (center)
+    anchors = [
+        (0, 0),
+        (0, max(0, nx - w)),
+        (max(0, ny - w), 0),
+        (max(0, ny - w), max(0, nx - w)),
+    ]
+    if windows >= 5:
+        anchors.append((max(0, ny // 2 - w // 2), max(0, nx // 2 - w // 2)))
+
+    # If windows > 5, sprinkle additional evenly-spaced anchors (optional)
+    if windows > 5:
+        extra = windows - 5
+        ys = np.linspace(0, max(0, ny - w), extra + 2, dtype=int)[1:-1]
+        xs = np.linspace(0, max(0, nx - w), extra + 2, dtype=int)[1:-1]
+        for y0, x0 in zip(ys, xs):
+            anchors.append((int(y0), int(x0)))
+
+    # Reduce min/max across sampled windows
+    mins = []
+    maxs = []
+
+    for y0, x0 in anchors:
+        sl = prefix + (slice(y0, y0 + w), slice(x0, x0 + w))
+        win = data[sl]
+
+        if da is not None and isinstance(win, da.Array):
+            # Compute scalars only on this window
+            mins.append(da.nanmin(win))
+            maxs.append(da.nanmax(win))
+        elif cupy is not None and isinstance(win, cupy.ndarray):
+            mins.append(cupy.nanmin(win))
+            maxs.append(cupy.nanmax(win))
+        else:
+            mins.append(np.nanmin(win))
+            maxs.append(np.nanmax(win))
+
+    # Finalize: if dask, compute the scalar graph now (still tiny)
+    if da is not None and any(isinstance(m, da.Array) for m in mins):
+        mn = da.nanmin(da.stack(mins)).compute()
+        mx = da.nanmax(da.stack(maxs)).compute()
+        return _to_float_scalar(mn), _to_float_scalar(mx)
+
+    # If cupy scalars, convert safely
+    if cupy is not None and (any(isinstance(m, cupy.ndarray) for m in mins) or
+                             any(getattr(m.__class__, "__module__", "").startswith("cupy") for m in mins)):
+        mn = mins[0]
+        mx = maxs[0]
+        # reduce on device
+        for m in mins[1:]:
+            mn = cupy.minimum(mn, m)
+        for m in maxs[1:]:
+            mx = cupy.maximum(mx, m)
+        return _to_float_scalar(mn), _to_float_scalar(mx)
+
+    # numpy scalars
+    return float(np.nanmin(np.array(mins, dtype=float))), float(np.nanmax(np.array(maxs, dtype=float)))