Skip to content

Commit a140823

Browse files
mokashangdcherian
andauthored
Preserve step in RangeIndex.arange and slicing (#11362)
Co-authored-by: Deepak Cherian <dcherian@users.noreply.github.com>
1 parent 4b22e98 commit a140823

3 files changed

Lines changed: 39 additions & 14 deletions

File tree

doc/whats-new.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,12 @@ Bug Fixes
3333
a ``zarr_format=3`` store with ``use_zarr_fill_value_as_mask=False``, so it is no
3434
longer silently lost on round-trip (:issue:`10269`).
3535
By `Davis Bennett <https://github.com/d-v-b>`_.
36+
- :py:meth:`~xarray.indexes.RangeIndex.arange` now preserves the requested
37+
``step`` instead of silently re-deriving it from ``(stop - start) / size``, so
38+
its values match :py:func:`numpy.arange` when ``step`` does not evenly divide
39+
the interval. Strided slicing of a :py:class:`~xarray.indexes.RangeIndex` now
40+
preserves the step as well (:issue:`11325`).
41+
By `mokashang <https://github.com/mokashang>`_.
3642
- Fix :py:func:`decode_cf` failing on integer-encoded time arrays that contain
3743
NaT when running against numpy 2.5+.
3844
By `Ian Hunt-Isaak <https://github.com/ianhi>`_.

xarray/indexes/range_index.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ def __init__(
3232
coord_name: Hashable,
3333
dim: str,
3434
dtype: Any = None,
35+
step: float | None = None,
3536
):
3637
if dtype is None:
3738
dtype = np.dtype(np.float64)
@@ -40,7 +41,7 @@ def __init__(
4041

4142
self.start = start
4243
self.stop = stop
43-
self._step = None # Will be calculated by property
44+
self._step = step
4445

4546
@property
4647
def coord_name(self) -> Hashable:
@@ -121,21 +122,23 @@ def slice(self, sl: slice) -> "RangeCoordinateTransform":
121122
new_range = range(self.size)[sl]
122123
new_size = len(new_range)
123124

125+
# A slice scales the spacing by its own step, e.g. ``[::2]`` doubles it.
126+
# Preserve the exact resulting step instead of letting it be re-derived
127+
# from ``(stop - start) / size``, which would be wrong whenever the
128+
# spacing does not evenly divide the interval. See GH11325.
129+
new_step = self.step * new_range.step
124130
new_start = self.start + new_range.start * self.step
125-
new_stop = self.start + new_range.stop * self.step
131+
new_stop = new_start + new_size * new_step
126132

127-
result = type(self)(
133+
return type(self)(
128134
new_start,
129135
new_stop,
130136
new_size,
131137
self.coord_name,
132138
self.dim,
133139
dtype=self.dtype,
140+
step=new_step,
134141
)
135-
if new_size == 0:
136-
# For empty slices, preserve step from parent
137-
result._step = self.step
138-
return result
139142

140143

141144
class RangeIndex(CoordinateTransformIndex):
@@ -278,8 +281,13 @@ def arange(
278281

279282
size = math.ceil((stop - start) / step)
280283

284+
# Snap ``stop`` to ``start + size * step`` and keep the exact ``step`` so
285+
# that the materialized values match ``numpy.arange`` even when ``step``
286+
# does not evenly divide ``stop - start``. See GH11325.
287+
stop = start + size * step
288+
# Snap `stop` to `start + size * step`
281289
transform = RangeCoordinateTransform(
282-
start, stop, size, coord_name, dim, dtype=dtype
290+
start, stop, size, coord_name, dim, dtype=dtype, step=step
283291
)
284292

285293
return cls(transform)

xarray/tests/test_range_index.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,19 @@ def test_range_index_arange_properties() -> None:
6464
assert index.step == 0.1
6565

6666

67+
def test_range_index_arange_step_not_dividing_interval() -> None:
68+
# GH11325: when ``step`` does not evenly divide ``stop - start`` the
69+
# requested step must still be honored and the materialized values must
70+
# match ``numpy.arange`` (previously the step was silently re-derived from
71+
# ``(stop - start) / size``, e.g. 0.25 instead of the requested 0.3).
72+
index = RangeIndex.arange(0.0, 1.0, 0.3, dim="x")
73+
assert index.step == 0.3
74+
assert index.size == 4
75+
actual = xr.Coordinates.from_xindex(index)
76+
expected = xr.Coordinates({"x": np.arange(0.0, 1.0, 0.3)})
77+
assert_equal(actual, expected, check_default_indexes=False)
78+
79+
6780
def test_range_index_linspace() -> None:
6881
index = RangeIndex.linspace(0.0, 1.0, num=10, endpoint=False, dim="x")
6982
actual = xr.Coordinates.from_xindex(index)
@@ -141,7 +154,8 @@ def test_range_index_isel() -> None:
141154
ds2 = create_dataset_arange(0.0, 3.0, 0.1)
142155
actual = ds2.isel(x=slice(4, None, 3))
143156
expected = create_dataset_arange(0.4, 3.0, 0.3)
144-
assert_identical(actual, expected, check_default_indexes=False, check_indexes=True)
157+
assert actual.xindexes["x"].equals(expected.xindexes["x"])
158+
np.testing.assert_allclose(actual["x"].values, np.arange(0.0, 3.0, 0.1)[4::3])
145159

146160
# scalar
147161
actual = ds.isel(x=0)
@@ -372,11 +386,8 @@ def test_range_index_equals_exact() -> None:
372386
# Create an index directly
373387
index1 = RangeIndex.arange(0.0, 0.3, 0.1, dim="x")
374388

375-
# Create the same index by slicing - this accumulates floating point error
376-
index_large = RangeIndex.arange(0.0, 1.0, 0.1, dim="x")
377-
ds_large = xr.Dataset(coords=xr.Coordinates.from_xindex(index_large))
378-
ds_sliced = ds_large.isel(x=slice(3))
379-
index2 = ds_sliced.xindexes["x"]
389+
# Create an index whose start differs by a tiny floating point amount
390+
index2 = RangeIndex.arange(1e-12, 0.3 + 1e-12, 0.1, dim="x")
380391

381392
# Default (exact=False) should be equal due to np.isclose tolerance
382393
assert index1.equals(index2)

0 commit comments

Comments
 (0)