Add dedicated InterpolateFunctor to the kernels

antonwolfy · antonwolfy · commit e7bf84c3d80f · 2026-03-17T12:47:27.000+01:00
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp
@@ -41,12 +41,12 @@
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 
+#include "kernels/elementwise_functions/interpolate.hpp"
+
 // dpctl tensor headers
 #include "utils/type_dispatch.hpp"
 #include "utils/type_utils.hpp"
 
-#include "kernels/elementwise_functions/interpolate.hpp"
-
 // utils extension headers
 #include "ext/common.hpp"
 #include "ext/validation_utils.hpp"
@@ -57,7 +57,6 @@ namespace type_utils = dpctl::tensor::type_utils;
 
 using ext::common::value_type_of;
 using ext::validation::array_names;
-using ext::validation::array_ptr;
 
 using ext::common::dtype_from_typenum;
 using ext::validation::check_has_dtype;
@@ -68,7 +67,6 @@ using ext::validation::common_checks;
 
 namespace dpnp::extensions::ufunc
 {
-
 namespace impl
 {
 using ext::common::init_dispatch_vector;
@@ -88,8 +86,10 @@ typedef sycl::event (*interpolate_fn_ptr_t)(sycl::queue &,
                                             const std::size_t, // xp_size
                                             const std::vector<sycl::event> &);
 
+interpolate_fn_ptr_t interpolate_dispatch_vector[td_ns::num_types];
+
 template <typename T, typename TIdx = std::int64_t>
-sycl::event interpolate_call(sycl::queue &exec_q,
+sycl::event interpolate_impl(sycl::queue &q,
                              const void *vx,
                              const void *vidx,
                              const void *vxp,
@@ -101,6 +101,8 @@ sycl::event interpolate_call(sycl::queue &exec_q,
                              const std::size_t xp_size,
                              const std::vector<sycl::event> &depends)
 {
+    dpctl::tensor::type_utils::validate_type_for_device<T>(q);
+
     using type_utils::is_complex_v;
     using TCoord = std::conditional_t<is_complex_v<T>, value_type_of_t<T>, T>;
 
@@ -112,23 +114,62 @@ sycl::event interpolate_call(sycl::queue &exec_q,
     const T *right = static_cast<const T *>(vright);
     T *out = static_cast<T *>(vout);
 
-    using dpnp::kernels::interpolate::interpolate_impl;
-    sycl::event interpolate_ev = interpolate_impl<TCoord, T>(
-        exec_q, x, idx, xp, fp, left, right, out, n, xp_size, depends);
+    sycl::event interpolate_ev = q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(depends);
+
+        using InterpolateFunc =
+            dpnp::kernels::interpolate::InterpolateFunctor<TCoord, T>;
+
+        cgh.parallel_for<InterpolateFunc>(
+            sycl::range<1>(n),
+            InterpolateFunc(x, idx, xp, fp, left, right, out, xp_size));
+    });
 
     return interpolate_ev;
 }
 
-interpolate_fn_ptr_t interpolate_dispatch_vector[td_ns::num_types];
+/**
+ * @brief A factory to define pairs of supported types for which
+ * interpolate function is available.
+ *
+ * @tparam T Type of input vector `a` and of result vector `y`.
+ */
+template <typename T>
+struct InterpolateOutputType
+{
+    using value_type = typename std::disjunction<
+        td_ns::TypeMapResultEntry<T, float>,
+        td_ns::TypeMapResultEntry<T, double>,
+        td_ns::TypeMapResultEntry<T, std::complex<float>>,
+        td_ns::TypeMapResultEntry<T, std::complex<double>>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+};
 
-void common_interpolate_checks(
-    const dpctl::tensor::usm_ndarray &x,
-    const dpctl::tensor::usm_ndarray &idx,
-    const dpctl::tensor::usm_ndarray &xp,
-    const dpctl::tensor::usm_ndarray &fp,
-    const dpctl::tensor::usm_ndarray &out,
-    const std::optional<const dpctl::tensor::usm_ndarray> &left,
-    const std::optional<const dpctl::tensor::usm_ndarray> &right)
+template <typename fnT, typename T>
+struct InterpolateFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<
+                          typename InterpolateOutputType<T>::value_type, void>)
+        {
+            return nullptr;
+        }
+        else {
+            return interpolate_impl<T>;
+        }
+    }
+};
+
+namespace detail
+{
+void validate(const dpctl::tensor::usm_ndarray &x,
+              const dpctl::tensor::usm_ndarray &idx,
+              const dpctl::tensor::usm_ndarray &xp,
+              const dpctl::tensor::usm_ndarray &fp,
+              const dpctl::tensor::usm_ndarray &out,
+              const std::optional<const dpctl::tensor::usm_ndarray> &left,
+              const std::optional<const dpctl::tensor::usm_ndarray> &right)
 {
     array_names names = {{&x, "x"}, {&xp, "xp"}, {&fp, "fp"}, {&out, "out"}};
 
@@ -158,6 +199,7 @@ void common_interpolate_checks(
         throw py::value_error("array of sample points is empty");
     }
 }
+} // namespace detail
 
 std::pair<sycl::event, sycl::event>
     py_interpolate(const dpctl::tensor::usm_ndarray &x,
@@ -170,7 +212,7 @@ std::pair<sycl::event, sycl::event>
                    sycl::queue &exec_q,
                    const std::vector<sycl::event> &depends)
 {
-    common_interpolate_checks(x, idx, xp, fp, out, left, right);
+    detail::validate(x, idx, xp, fp, out, left, right);
 
     int out_typenum = out.get_typenum();
 
@@ -214,56 +256,20 @@ std::pair<sycl::event, sycl::event>
     return std::make_pair(args_ev, ev);
 }
 
-/**
- * @brief A factory to define pairs of supported types for which
- * interpolate function is available.
- *
- * @tparam T Type of input vector `a` and of result vector `y`.
- */
-template <typename T>
-struct InterpolateOutputType
-{
-    using value_type = typename std::disjunction<
-        td_ns::TypeMapResultEntry<T, float>,
-        td_ns::TypeMapResultEntry<T, double>,
-        td_ns::TypeMapResultEntry<T, std::complex<float>>,
-        td_ns::TypeMapResultEntry<T, std::complex<double>>,
-        td_ns::DefaultResultEntry<void>>::result_type;
-};
-
-template <typename fnT, typename T>
-struct InterpolateFactory
-{
-    fnT get()
-    {
-        if constexpr (std::is_same_v<
-                          typename InterpolateOutputType<T>::value_type, void>)
-        {
-            return nullptr;
-        }
-        else {
-            return interpolate_call<T>;
-        }
-    }
-};
-
 static void init_interpolate_dispatch_vectors()
 {
-    init_dispatch_vector<interpolate_fn_ptr_t, InterpolateFactory>(
+    init_dispatch_vector<interpolate_fn_ptr_t, impl::InterpolateFactory>(
         interpolate_dispatch_vector);
 }
-
 } // namespace impl
 
 void init_interpolate(py::module_ m)
 {
     impl::init_interpolate_dispatch_vectors();
 
-    using impl::py_interpolate;
-    m.def("_interpolate", &py_interpolate, "", py::arg("x"), py::arg("idx"),
-          py::arg("xp"), py::arg("fp"), py::arg("left"), py::arg("right"),
-          py::arg("out"), py::arg("sycl_queue"),
+    m.def("_interpolate", &impl::py_interpolate, "", py::arg("x"),
+          py::arg("idx"), py::arg("xp"), py::arg("fp"), py::arg("left"),
+          py::arg("right"), py::arg("out"), py::arg("sycl_queue"),
           py::arg("depends") = py::list());
 }
-
 } // namespace dpnp::extensions::ufunc
diff --git a/dpnp/backend/kernels/elementwise_functions/interpolate.hpp b/dpnp/backend/kernels/elementwise_functions/interpolate.hpp
@@ -28,67 +28,79 @@
 
 #pragma once
 
+#include <cstddef>
+#include <cstdint>
+
 #include <sycl/sycl.hpp>
-#include <vector>
 
 #include "ext/common.hpp"
 
-using ext::common::IsNan;
-
 namespace dpnp::kernels::interpolate
 {
+using ext::common::IsNan;
+
 template <typename TCoord, typename TValue, typename TIdx = std::int64_t>
-sycl::event interpolate_impl(sycl::queue &q,
-                             const TCoord *x,
-                             const TIdx *idx,
-                             const TCoord *xp,
-                             const TValue *fp,
-                             const TValue *left,
-                             const TValue *right,
-                             TValue *out,
-                             const std::size_t n,
-                             const std::size_t xp_size,
-                             const std::vector<sycl::event> &depends)
+class InterpolateFunctor
 {
+private:
+    const TCoord *x = nullptr;
+    const TIdx *idx = nullptr;
+    const TCoord *xp = nullptr;
+    const TValue *fp = nullptr;
+    const TValue *left = nullptr;
+    const TValue *right = nullptr;
+    TValue *out = nullptr;
+    const std::size_t xp_size;
+
+public:
+    InterpolateFunctor(const TCoord *x_,
+                       const TIdx *idx_,
+                       const TCoord *xp_,
+                       const TValue *fp_,
+                       const TValue *left_,
+                       const TValue *right_,
+                       TValue *out_,
+                       const std::size_t xp_size_)
+        : x(x_), idx(idx_), xp(xp_), fp(fp_), left(left_), right(right_),
+          out(out_), xp_size(xp_size_)
+    {
+    }
+
     // Selected over the work-group version
     // due to simpler execution and slightly better performance.
-    return q.submit([&](sycl::handler &h) {
-        h.depends_on(depends);
-        h.parallel_for(sycl::range<1>(n), [=](sycl::id<1> i) {
-            TValue left_val = left ? *left : fp[0];
-            TValue right_val = right ? *right : fp[xp_size - 1];
+    void operator()(sycl::id<1> id) const
+    {
+        TValue left_val = left ? *left : fp[0];
+        TValue right_val = right ? *right : fp[xp_size - 1];
 
-            TCoord x_val = x[i];
-            TIdx x_idx = idx[i] - 1;
+        TCoord x_val = x[id];
+        TIdx x_idx = idx[id] - 1;
 
-            if (IsNan<TCoord>::isnan(x_val)) {
-                out[i] = x_val;
-            }
-            else if (x_idx < 0) {
-                out[i] = left_val;
-            }
-            else if (x_val == xp[xp_size - 1]) {
-                out[i] = fp[xp_size - 1];
-            }
-            else if (x_idx >= static_cast<TIdx>(xp_size - 1)) {
-                out[i] = right_val;
-            }
-            else {
-                TValue slope =
-                    (fp[x_idx + 1] - fp[x_idx]) / (xp[x_idx + 1] - xp[x_idx]);
-                TValue res = slope * (x_val - xp[x_idx]) + fp[x_idx];
+        if (IsNan<TCoord>::isnan(x_val)) {
+            out[id] = x_val;
+        }
+        else if (x_idx < 0) {
+            out[id] = left_val;
+        }
+        else if (x_val == xp[xp_size - 1]) {
+            out[id] = fp[xp_size - 1];
+        }
+        else if (x_idx >= static_cast<TIdx>(xp_size - 1)) {
+            out[id] = right_val;
+        }
+        else {
+            TValue slope =
+                (fp[x_idx + 1] - fp[x_idx]) / (xp[x_idx + 1] - xp[x_idx]);
+            TValue res = slope * (x_val - xp[x_idx]) + fp[x_idx];
 
-                if (IsNan<TValue>::isnan(res)) {
-                    res = slope * (x_val - xp[x_idx + 1]) + fp[x_idx + 1];
-                    if (IsNan<TValue>::isnan(res) &&
-                        (fp[x_idx] == fp[x_idx + 1])) {
-                        res = fp[x_idx];
-                    }
+            if (IsNan<TValue>::isnan(res)) {
+                res = slope * (x_val - xp[x_idx + 1]) + fp[x_idx + 1];
+                if (IsNan<TValue>::isnan(res) && (fp[x_idx] == fp[x_idx + 1])) {
+                    res = fp[x_idx];
                 }
-                out[i] = res;
             }
-        });
-    });
-}
-
+            out[id] = res;
+        }
+    }
+};
 } // namespace dpnp::kernels::interpolate