|
| 1 | +.. _local-migration: |
| 2 | + |
| 3 | +*************************************************** |
| 4 | +Migrating from ``xrspatial.local`` to native xarray |
| 5 | +*************************************************** |
| 6 | + |
| 7 | +The ``xrspatial.local`` module was removed in v0.6. The functions it provided |
| 8 | +(``cell_stats``, ``combine``, ``lesser_frequency``, ``equal_frequency``, |
| 9 | +``greater_frequency``, ``lowest_position``, ``highest_position``, |
| 10 | +``popularity``, ``rank``) were thin wrappers around operations that xarray and |
| 11 | +NumPy already support natively. |
| 12 | + |
| 13 | +The xarray equivalents below are vectorized, support Dask for lazy/parallel |
| 14 | +evaluation, and work with CuPy-backed arrays for GPU acceleration — none of |
| 15 | +which the old ``xrspatial.local`` functions supported. |
| 16 | + |
| 17 | +Setup used in all examples |
| 18 | +========================== |
| 19 | + |
| 20 | +.. code-block:: python |
| 21 | +
|
| 22 | + import numpy as np |
| 23 | + import xarray as xr |
| 24 | +
|
| 25 | + arr1 = xr.DataArray([[np.nan, 4, 2, 0], |
| 26 | + [2, 3, np.nan, 1], |
| 27 | + [5, 1, 2, 0], |
| 28 | + [1, 3, 2, np.nan]], name="arr1") |
| 29 | +
|
| 30 | + arr2 = xr.DataArray([[3, 1, 1, 2], |
| 31 | + [4, 1, 2, 5], |
| 32 | + [0, 0, 0, 0], |
| 33 | + [np.nan, 1, 1, 1]], name="arr2") |
| 34 | +
|
| 35 | + arr3 = xr.DataArray([[3, 3, 2, 0], |
| 36 | + [4, 1, 3, 1], |
| 37 | + [6, 1, 2, 2], |
| 38 | + [0, 0, 1, 1]], name="arr3") |
| 39 | +
|
| 40 | + ds = xr.merge([arr1, arr2, arr3]) |
| 41 | +
|
| 42 | + # Stack all variables into a single DataArray with a "var" dimension. |
| 43 | + # This is the key building block for all the replacements below. |
| 44 | + stacked = ds.to_array(dim="var") |
| 45 | +
|
| 46 | +
|
| 47 | +Cell Statistics |
| 48 | +=============== |
| 49 | + |
| 50 | +``cell_stats(ds, func='sum')`` computed per-cell statistics across variables. |
| 51 | + |
| 52 | +.. code-block:: python |
| 53 | +
|
| 54 | + # sum (default) |
| 55 | + stacked.sum(dim="var") |
| 56 | +
|
| 57 | + # max / mean / median / min / std |
| 58 | + stacked.max(dim="var") |
| 59 | + stacked.mean(dim="var") |
| 60 | + stacked.median(dim="var") |
| 61 | + stacked.min(dim="var") |
| 62 | + stacked.std(dim="var") |
| 63 | +
|
| 64 | +
|
| 65 | +Combine |
| 66 | +======= |
| 67 | + |
| 68 | +``combine(ds)`` assigned a unique integer ID to each distinct combination of |
| 69 | +values across variables. |
| 70 | + |
| 71 | +.. code-block:: python |
| 72 | +
|
| 73 | + import numpy as np |
| 74 | + import xarray as xr |
| 75 | +
|
| 76 | + # Build a structured view of each cell's value-tuple, then use np.unique |
| 77 | + vals = np.stack([ds[v].values for v in ds.data_vars], axis=-1) # (H, W, N) |
| 78 | + shape = vals.shape[:2] |
| 79 | + flat = vals.reshape(-1, vals.shape[-1]) |
| 80 | +
|
| 81 | + # Mask rows containing any NaN |
| 82 | + has_nan = np.isnan(flat).any(axis=1) |
| 83 | + _, inverse = np.unique(flat[~has_nan], axis=0, return_inverse=True) |
| 84 | +
|
| 85 | + result = np.full(flat.shape[0], np.nan) |
| 86 | + result[~has_nan] = inverse + 1 # 1-based IDs, matching old behaviour |
| 87 | +
|
| 88 | + combined = xr.DataArray(result.reshape(shape)) |
| 89 | +
|
| 90 | +
|
| 91 | +Lesser / Equal / Greater Frequency |
| 92 | +=================================== |
| 93 | + |
| 94 | +``lesser_frequency(ds, ref_var)``, ``equal_frequency(ds, ref_var)``, and |
| 95 | +``greater_frequency(ds, ref_var)`` counted how many variables had values less |
| 96 | +than, equal to, or greater than a reference variable at each cell. |
| 97 | + |
| 98 | +.. code-block:: python |
| 99 | +
|
| 100 | + ref = ds["arr1"] |
| 101 | +
|
| 102 | + # lesser_frequency — count of variables whose value < ref |
| 103 | + (stacked < ref).sum(dim="var") |
| 104 | +
|
| 105 | + # equal_frequency |
| 106 | + (stacked == ref).sum(dim="var") |
| 107 | +
|
| 108 | + # greater_frequency |
| 109 | + (stacked > ref).sum(dim="var") |
| 110 | +
|
| 111 | +.. note:: |
| 112 | + |
| 113 | + If ``ref_var`` was one of the ``data_vars``, the old function excluded it |
| 114 | + from the comparison set. To replicate that, drop it from the stack first:: |
| 115 | + |
| 116 | + others = ds.drop_vars("arr1").to_array(dim="var") |
| 117 | + (others < ds["arr1"]).sum(dim="var") |
| 118 | + |
| 119 | + |
| 120 | +Lowest / Highest Position |
| 121 | +========================= |
| 122 | + |
| 123 | +``lowest_position(ds)`` and ``highest_position(ds)`` returned the 1-based |
| 124 | +index of the variable with the minimum or maximum value at each cell. |
| 125 | + |
| 126 | +.. code-block:: python |
| 127 | +
|
| 128 | + # lowest_position (1-based) |
| 129 | + stacked.argmin(dim="var") + 1 |
| 130 | +
|
| 131 | + # highest_position (1-based) |
| 132 | + stacked.argmax(dim="var") + 1 |
| 133 | +
|
| 134 | +
|
| 135 | +Popularity |
| 136 | +========== |
| 137 | + |
| 138 | +``popularity(ds, ref_var)`` returned the *n*-th most common unique value across |
| 139 | +the other variables, where *n* came from the reference variable. |
| 140 | + |
| 141 | +There is no single-expression xarray equivalent — use a small NumPy helper: |
| 142 | + |
| 143 | +.. code-block:: python |
| 144 | +
|
| 145 | + import numpy as np |
| 146 | + import xarray as xr |
| 147 | +
|
| 148 | + def popularity(ds, ref_var, data_vars=None): |
| 149 | + if data_vars is None: |
| 150 | + data_vars = [v for v in ds.data_vars if v != ref_var] |
| 151 | + vals = np.stack([ds[v].values for v in data_vars], axis=-1) |
| 152 | + ref = ds[ref_var].values |
| 153 | + out = np.full(ref.shape, np.nan) |
| 154 | + for idx in np.ndindex(ref.shape): |
| 155 | + cell = vals[idx] |
| 156 | + if np.isnan(cell).any(): |
| 157 | + continue |
| 158 | + unique_sorted = np.unique(cell) |
| 159 | + n = int(ref[idx]) - 1 |
| 160 | + if 0 <= n < len(unique_sorted) and len(unique_sorted) < len(cell): |
| 161 | + out[idx] = unique_sorted[n] |
| 162 | + return xr.DataArray(out) |
| 163 | +
|
| 164 | +
|
| 165 | +Rank |
| 166 | +==== |
| 167 | + |
| 168 | +``rank(ds, ref_var)`` returned the value at the *n*-th sorted position across |
| 169 | +the other variables, where *n* came from the reference variable. |
| 170 | + |
| 171 | +.. code-block:: python |
| 172 | +
|
| 173 | + import numpy as np |
| 174 | + import xarray as xr |
| 175 | +
|
| 176 | + def rank(ds, ref_var, data_vars=None): |
| 177 | + if data_vars is None: |
| 178 | + data_vars = [v for v in ds.data_vars if v != ref_var] |
| 179 | + vals = np.stack([ds[v].values for v in data_vars], axis=-1) |
| 180 | + ref = ds[ref_var].values |
| 181 | + out = np.full(ref.shape, np.nan) |
| 182 | + for idx in np.ndindex(ref.shape): |
| 183 | + cell = vals[idx] |
| 184 | + if np.isnan(cell).any(): |
| 185 | + continue |
| 186 | + n = int(ref[idx]) - 1 |
| 187 | + if 0 <= n < len(cell): |
| 188 | + out[idx] = np.sort(cell)[n] |
| 189 | + return xr.DataArray(out) |
0 commit comments