Skip to content

Commit ee9ec9b

Browse files
emmanuel-ferdmanjsignell
authored andcommitted
fix: support datetime variables in Dataset.interp (pydata#11081)
`Dataset.interp()` silently dropped `datetime64` and `timedelta64` variables. Now they are interpolated by converting to `float64` and back, with `NaT` handled like `NaN`.
1 parent e6cef1c commit ee9ec9b

4 files changed

Lines changed: 106 additions & 3 deletions

File tree

doc/whats-new.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ Bug Fixes
4747
By `Ian Hunt-Isaak <https://github.com/ianhi>`_
4848
- Coerce masked dask arrays to filled (:issue:`9374` :pull:`11157`).
4949
By `Julia Signell <https://github.com/jsignell>`_
50+
- Fix :py:meth:`Dataset.interp` silently dropping datetime64 and timedelta64
51+
variables, through enabling their interpolation (:issue:`10900`, :pull:`11081`).
52+
By `Emmanuel Ferdman <https://github.com/emmanuel-ferdman>`_.
5053

5154
Documentation
5255
~~~~~~~~~~~~~

xarray/core/dataarray.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2407,7 +2407,7 @@ def interp(
24072407
* x (x) float64 32B 0.0 0.75 1.25 1.75
24082408
* y (y) int64 24B 11 13 15
24092409
"""
2410-
if self.dtype.kind not in "uifc":
2410+
if self.dtype.kind not in "uifcMm":
24112411
raise TypeError(
24122412
f"interp only works for a numeric type array. Given {self.dtype}."
24132413
)
@@ -2548,7 +2548,7 @@ def interp_like(
25482548
* y (y) int64 24B 70 80 90
25492549
"""
25502550

2551-
if self.dtype.kind not in "uifc":
2551+
if self.dtype.kind not in "uifcMm":
25522552
raise TypeError(
25532553
f"interp only works for a numeric type array. Given {self.dtype}."
25542554
)

xarray/core/dataset.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
from xarray.coding.calendar_ops import convert_calendar, interp_calendar
3535
from xarray.coding.cftimeindex import CFTimeIndex, _parse_array_of_cftime_strings
3636
from xarray.compat.array_api_compat import to_like_array
37-
from xarray.computation import ops
37+
from xarray.computation import computation, ops
3838
from xarray.computation.arithmetic import DatasetArithmetic
3939
from xarray.core import dtypes as xrdtypes
4040
from xarray.core import duck_array_ops, formatting, formatting_html, utils
@@ -3940,6 +3940,21 @@ def _validate_interp_indexer(x, new_x):
39403940
# For normal number types do the interpolation:
39413941
var_indexers = {k: v for k, v in use_indexers.items() if k in var.dims}
39423942
variables[name] = missing.interp(var, var_indexers, method, **kwargs)
3943+
elif dtype_kind in "Mm" and (use_indexers.keys() & var.dims):
3944+
# For datetime-like types, interpolate as float64:
3945+
var_indexers = {k: v for k, v in use_indexers.items() if k in var.dims}
3946+
int_data = var.astype(np.int64)
3947+
nat = np.iinfo(np.int64).min
3948+
as_float = computation.where(
3949+
int_data != nat, int_data.astype(np.float64), np.nan
3950+
)
3951+
result = missing.interp(as_float, var_indexers, method, **kwargs)
3952+
as_int = computation.where(
3953+
~result.isnull(),
3954+
result.fillna(0).round().astype(np.int64),
3955+
nat,
3956+
)
3957+
variables[name] = as_int.astype(var.dtype)
39433958
elif dtype_kind in "ObU" and (use_indexers.keys() & var.dims):
39443959
if all(var.sizes[d] == 1 for d in (use_indexers.keys() & var.dims)):
39453960
# Broadcastable, can be handled quickly without reindex:

xarray/tests/test_interp.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1189,3 +1189,88 @@ def test_interp_vectorized_shared_dims(chunk: bool) -> None:
11891189
coords={"u": [45, 55], "t": [10, 12], "x": dx, "y": dy},
11901190
)
11911191
assert_identical(actual, expected)
1192+
1193+
1194+
@requires_scipy
1195+
def test_dataset_interp_datetime_variable() -> None:
1196+
# GH#10900
1197+
ds = xr.Dataset(
1198+
data_vars={
1199+
"something": (["x", "y"], np.arange(25, dtype=float).reshape(5, 5)),
1200+
"time": (
1201+
["x", "y"],
1202+
np.datetime64("2024-01-01")
1203+
+ np.arange(25).reshape(5, 5) * np.timedelta64(1, "D"),
1204+
),
1205+
},
1206+
coords={"x": np.arange(5), "y": np.arange(5)},
1207+
)
1208+
1209+
result = ds.interp(x=[0.5, 1.5], y=[0.5, 1.5])
1210+
1211+
assert "time" in result.data_vars
1212+
expected_time = np.datetime64("2024-01-01") + np.timedelta64(3, "D")
1213+
np.testing.assert_equal(result["time"].values[0, 0], expected_time)
1214+
1215+
1216+
@requires_scipy
1217+
def test_dataset_interp_timedelta_variable() -> None:
1218+
# GH#10900
1219+
ds = xr.Dataset(
1220+
data_vars={
1221+
"duration": (["x"], np.array([1, 2, 3, 4, 5], dtype="timedelta64[D]")),
1222+
},
1223+
coords={"x": np.arange(5)},
1224+
)
1225+
1226+
result = ds.interp(x=[0.5, 1.5, 2.5])
1227+
1228+
assert "duration" in result.data_vars
1229+
expected_seconds = np.array([1.5, 2.5, 3.5]) * 86400
1230+
actual_seconds = result["duration"].values.astype("timedelta64[s]").astype(float)
1231+
np.testing.assert_allclose(actual_seconds, expected_seconds, rtol=1e-10)
1232+
1233+
1234+
@requires_scipy
1235+
def test_dataset_interp_datetime_nat() -> None:
1236+
# GH#10900 - NaT propagates like NaN
1237+
time_data = np.array(
1238+
["2024-01-01", "2024-01-02", "NaT", "2024-01-04", "2024-01-05"],
1239+
dtype="datetime64[D]",
1240+
)
1241+
ds = xr.Dataset(
1242+
data_vars={"time": (["x"], time_data)},
1243+
coords={"x": np.arange(5)},
1244+
)
1245+
1246+
result = ds.interp(x=[0.5, 1.5, 2.5, 3.5])
1247+
1248+
assert not np.isnat(result["time"].values[0])
1249+
assert np.isnat(result["time"].values[1])
1250+
assert np.isnat(result["time"].values[2])
1251+
assert not np.isnat(result["time"].values[3])
1252+
1253+
1254+
@requires_scipy
1255+
@requires_dask
1256+
def test_dataset_interp_datetime_dask() -> None:
1257+
# GH#10900
1258+
ds = xr.Dataset(
1259+
data_vars={
1260+
"something": (["x", "y"], np.arange(25, dtype=float).reshape(5, 5)),
1261+
"time": (
1262+
["x", "y"],
1263+
np.datetime64("2024-01-01")
1264+
+ np.arange(25).reshape(5, 5) * np.timedelta64(1, "D"),
1265+
),
1266+
},
1267+
coords={"x": np.arange(5), "y": np.arange(5)},
1268+
).chunk({"x": 2, "y": 2})
1269+
1270+
with raise_if_dask_computes():
1271+
result = ds.interp(x=[0.5, 1.5], y=[0.5, 1.5])
1272+
1273+
assert "time" in result.data_vars
1274+
computed = result.compute()
1275+
expected_time = np.datetime64("2024-01-01") + np.timedelta64(3, "D")
1276+
np.testing.assert_equal(computed["time"].values[0, 0], expected_time)

0 commit comments

Comments
 (0)