Move take_along_axis() to dpctl_ext/tensor and reuse it in dpnp

vlad-perevezentsev · vlad-perevezentsev · commit 6fecefe2ee88 · 2026-02-19T06:37:40.000-08:00
diff --git a/dpctl_ext/tensor/__init__.py b/dpctl_ext/tensor/__init__.py
@@ -46,6 +46,7 @@
     put,
     put_along_axis,
     take,
+    take_along_axis,
 )
 from dpctl_ext.tensor._manipulation_functions import (
     roll,
@@ -66,6 +67,7 @@
     "reshape",
     "roll",
     "take",
+    "take_along_axis",
     "to_numpy",
     "tril",
     "triu",
diff --git a/dpctl_ext/tensor/_copy_utils.py b/dpctl_ext/tensor/_copy_utils.py
@@ -384,6 +384,58 @@ def _put_multi_index(ary, inds, p, vals, mode=0):
     return
 
 
+def _take_multi_index(ary, inds, p, mode=0):
+    if not isinstance(ary, dpt.usm_ndarray):
+        raise TypeError(
+            f"Expecting type dpctl.tensor.usm_ndarray, got {type(ary)}"
+        )
+    ary_nd = ary.ndim
+    p = normalize_axis_index(operator.index(p), ary_nd)
+    mode = operator.index(mode)
+    if mode not in [0, 1]:
+        raise ValueError(
+            "Invalid value for mode keyword, only 0 or 1 is supported"
+        )
+    if not isinstance(inds, (list, tuple)):
+        inds = (inds,)
+
+    exec_q, res_usm_type = _get_indices_queue_usm_type(
+        inds, ary.sycl_queue, ary.usm_type
+    )
+    if exec_q is None:
+        raise dpctl.utils.ExecutionPlacementError(
+            "Can not automatically determine where to allocate the "
+            "result or performance execution. "
+            "Use `usm_ndarray.to_device` method to migrate data to "
+            "be associated with the same queue."
+        )
+
+    inds = _prepare_indices_arrays(inds, exec_q, res_usm_type)
+
+    ind0 = inds[0]
+    ary_sh = ary.shape
+    p_end = p + len(inds)
+    if 0 in ary_sh[p:p_end] and ind0.size != 0:
+        raise IndexError("cannot take non-empty indices from an empty axis")
+    res_shape = ary_sh[:p] + ind0.shape + ary_sh[p_end:]
+    res = dpt.empty(
+        res_shape, dtype=ary.dtype, usm_type=res_usm_type, sycl_queue=exec_q
+    )
+    _manager = dpctl.utils.SequentialOrderManager[exec_q]
+    dep_ev = _manager.submitted_events
+    hev, take_ev = ti._take(
+        src=ary,
+        ind=inds,
+        dst=res,
+        axis_start=p,
+        mode=mode,
+        sycl_queue=exec_q,
+        depends=dep_ev,
+    )
+    _manager.add_event_pair(hev, take_ev)
+    return res
+
+
 def from_numpy(np_ary, /, *, device=None, usm_type="device", sycl_queue=None):
     """
     from_numpy(arg, device=None, usm_type="device", sycl_queue=None)
diff --git a/dpctl_ext/tensor/_indexing_functions.py b/dpctl_ext/tensor/_indexing_functions.py
@@ -41,6 +41,7 @@
     _extract_impl,
     _nonzero_impl,
     _put_multi_index,
+    _take_multi_index,
 )
 from ._numpy_helper import normalize_axis_index
 
@@ -561,3 +562,80 @@ def take(x, indices, /, *, axis=None, out=None, mode="wrap"):
         out = orig_out
 
     return out
+
+
+def take_along_axis(x, indices, /, *, axis=-1, mode="wrap"):
+    """
+    Returns elements from an array at the one-dimensional indices specified
+    by ``indices`` along a provided ``axis``.
+
+    Args:
+        x (usm_ndarray):
+            input array. Must be compatible with ``indices``, except for the
+            axis (dimension) specified by ``axis``.
+        indices (usm_ndarray):
+            array indices. Must have the same rank (i.e., number of dimensions)
+            as ``x``.
+        axis: int
+            axis along which to select values. If ``axis`` is negative, the
+            function determines the axis along which to select values by
+            counting from the last dimension. Default: ``-1``.
+        mode (str, optional):
+            How out-of-bounds indices will be handled. Possible values
+            are:
+
+            - ``"wrap"``: clamps indices to (``-n <= i < n``), then wraps
+              negative indices.
+            - ``"clip"``: clips indices to (``0 <= i < n``).
+
+            Default: ``"wrap"``.
+
+    Returns:
+        usm_ndarray:
+            an array having the same data type as ``x``. The returned array has
+            the same rank (i.e., number of dimensions) as ``x`` and a shape
+            determined according to broadcasting rules, except for the axis
+            (dimension) specified by ``axis`` whose size must equal the size
+            of the corresponding axis (dimension) in ``indices``.
+
+    Note:
+        Treatment of the out-of-bound indices in ``indices`` array is controlled
+        by the value of ``mode`` keyword.
+    """
+    if not isinstance(x, dpt.usm_ndarray):
+        raise TypeError(f"Expected dpctl.tensor.usm_ndarray, got {type(x)}")
+    if not isinstance(indices, dpt.usm_ndarray):
+        raise TypeError(
+            f"Expected dpctl.tensor.usm_ndarray, got {type(indices)}"
+        )
+    x_nd = x.ndim
+    if x_nd != indices.ndim:
+        raise ValueError(
+            "Number of dimensions in the first and the second "
+            "argument arrays must be equal"
+        )
+    pp = normalize_axis_index(operator.index(axis), x_nd)
+    out_usm_type = dpctl.utils.get_coerced_usm_type(
+        (x.usm_type, indices.usm_type)
+    )
+    exec_q = dpctl.utils.get_execution_queue((x.sycl_queue, indices.sycl_queue))
+    if exec_q is None:
+        raise dpctl.utils.ExecutionPlacementError(
+            "Execution placement can not be unambiguously inferred "
+            "from input arguments. "
+        )
+    mode_i = _get_indexing_mode(mode)
+    indexes_dt = (
+        dpt.uint64
+        if indices.dtype == dpt.uint64
+        else ti.default_device_index_type(exec_q.sycl_device)
+    )
+    _ind = tuple(
+        (
+            indices
+            if i == pp
+            else _range(x.shape[i], i, x_nd, exec_q, out_usm_type, indexes_dt)
+        )
+        for i in range(x_nd)
+    )
+    return _take_multi_index(x, _ind, 0, mode=mode_i)
diff --git a/dpnp/dpnp_iface_indexing.py b/dpnp/dpnp_iface_indexing.py
@@ -2295,7 +2295,7 @@ def take_along_axis(a, indices, axis=-1, mode="wrap"):
     usm_a = dpnp.get_usm_ndarray(a)
     usm_ind = dpnp.get_usm_ndarray(indices)
 
-    usm_res = dpt.take_along_axis(usm_a, usm_ind, axis=axis, mode=mode)
+    usm_res = dpt_ext.take_along_axis(usm_a, usm_ind, axis=axis, mode=mode)
     return dpnp_array._create_from_usm_ndarray(usm_res)