From 57a9b311c803199a59c889450bdb7d02ad29d4db Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 12 Mar 2026 14:11:18 +0100 Subject: [PATCH 01/17] Move ChooseFunctor to dpnp/backend/kernels/indexing/choose.hpp --- .../extensions/indexing/CMakeLists.txt | 2 +- .../extensions/indexing/choose_kernel.hpp | 62 +++---------- dpnp/backend/kernels/indexing/choose.hpp | 87 +++++++++++++++++++ 3 files changed, 98 insertions(+), 53 deletions(-) create mode 100644 dpnp/backend/kernels/indexing/choose.hpp diff --git a/dpnp/backend/extensions/indexing/CMakeLists.txt b/dpnp/backend/extensions/indexing/CMakeLists.txt index 370d59f95585..e1bc34c9ae8b 100644 --- a/dpnp/backend/extensions/indexing/CMakeLists.txt +++ b/dpnp/backend/extensions/indexing/CMakeLists.txt @@ -62,7 +62,7 @@ set_target_properties( target_include_directories( ${python_module_name} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../ ${CMAKE_CURRENT_SOURCE_DIR}/../common ) # treat below headers as system to suppress the warnings there during the build diff --git a/dpnp/backend/extensions/indexing/choose_kernel.hpp b/dpnp/backend/extensions/indexing/choose_kernel.hpp index 6b1ac8005054..dbee4f1c2172 100644 --- a/dpnp/backend/extensions/indexing/choose_kernel.hpp +++ b/dpnp/backend/extensions/indexing/choose_kernel.hpp @@ -42,7 +42,11 @@ #include "utils/strided_iters.hpp" #include "utils/type_utils.hpp" -namespace dpnp::extensions::indexing::strides_detail +#include "kernels/indexing/choose.hpp" + +namespace dpnp::extensions::indexing +{ +namespace strides_detail { struct NthStrideOffsetUnpacked @@ -78,59 +82,12 @@ struct NthStrideOffsetUnpacked static_assert(sycl::is_device_copyable_v); -} // namespace dpnp::extensions::indexing::strides_detail - -namespace dpnp::extensions::indexing::kernels -{ +} // namespace strides_detail -template -class ChooseFunctor +namespace kernels { -private: - const IndT *ind = nullptr; - T *dst = nullptr; - char **chcs = nullptr; - dpctl::tensor::ssize_t n_chcs; - const IndOutIndexerT ind_out_indexer; - const ChoicesIndexerT chcs_indexer; - -public: - ChooseFunctor(const IndT *ind_, - T *dst_, - char **chcs_, - dpctl::tensor::ssize_t n_chcs_, - const IndOutIndexerT &ind_out_indexer_, - const ChoicesIndexerT &chcs_indexer_) - : ind(ind_), dst(dst_), chcs(chcs_), n_chcs(n_chcs_), - ind_out_indexer(ind_out_indexer_), chcs_indexer(chcs_indexer_) - { - } - - void operator()(sycl::id<1> id) const - { - const ProjectorT proj{}; - - dpctl::tensor::ssize_t i = id[0]; - - auto ind_dst_offsets = ind_out_indexer(i); - dpctl::tensor::ssize_t ind_offset = ind_dst_offsets.get_first_offset(); - dpctl::tensor::ssize_t dst_offset = ind_dst_offsets.get_second_offset(); - - IndT chc_idx = ind[ind_offset]; - // proj produces an index in the range of n_chcs - dpctl::tensor::ssize_t projected_idx = proj(n_chcs, chc_idx); - dpctl::tensor::ssize_t chc_offset = chcs_indexer(i, projected_idx); - - T *chc = reinterpret_cast(chcs[projected_idx]); - - dst[dst_offset] = chc[chc_offset]; - } -}; +using dpnp::kernels::choose::ChooseFunctor; typedef sycl::event (*choose_fn_ptr_t)(sycl::queue &, size_t, @@ -188,4 +145,5 @@ sycl::event choose_impl(sycl::queue &q, return choose_ev; } -} // namespace dpnp::extensions::indexing::kernels +} // namespace kernels +} // namespace dpnp::extensions::indexing diff --git a/dpnp/backend/kernels/indexing/choose.hpp b/dpnp/backend/kernels/indexing/choose.hpp new file mode 100644 index 000000000000..98a7ab4a0f62 --- /dev/null +++ b/dpnp/backend/kernels/indexing/choose.hpp @@ -0,0 +1,87 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +#include "kernels/dpctl_tensor_types.hpp" + +namespace dpnp::kernels::choose +{ +using dpctl::tensor::ssize_t; + +template +class ChooseFunctor +{ +private: + const IndT *ind = nullptr; + T *dst = nullptr; + char **chcs = nullptr; + ssize_t n_chcs; + const IndOutIndexerT ind_out_indexer; + const ChoicesIndexerT chcs_indexer; + +public: + ChooseFunctor(const IndT *ind_, + T *dst_, + char **chcs_, + ssize_t n_chcs_, + const IndOutIndexerT &ind_out_indexer_, + const ChoicesIndexerT &chcs_indexer_) + : ind(ind_), dst(dst_), chcs(chcs_), n_chcs(n_chcs_), + ind_out_indexer(ind_out_indexer_), chcs_indexer(chcs_indexer_) + { + } + + void operator()(sycl::id<1> id) const + { + const ProjectorT proj{}; + + ssize_t i = id[0]; + + auto ind_dst_offsets = ind_out_indexer(i); + ssize_t ind_offset = ind_dst_offsets.get_first_offset(); + ssize_t dst_offset = ind_dst_offsets.get_second_offset(); + + IndT chc_idx = ind[ind_offset]; + // proj produces an index in the range of n_chcs + ssize_t projected_idx = proj(n_chcs, chc_idx); + + ssize_t chc_offset = chcs_indexer(i, projected_idx); + + T *chc = reinterpret_cast(chcs[projected_idx]); + + dst[dst_offset] = chc[chc_offset]; + } +}; +} // namespace dpnp::kernels::choose From 1d468da7da4dce972af15d5e676eedd7bd507ea2 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 12 Mar 2026 15:34:45 +0100 Subject: [PATCH 02/17] Get rid of dpnp/backend/extensions/indexing/choose_kernel.hpp --- dpnp/backend/extensions/indexing/choose.cpp | 118 ++++++++++---- .../extensions/indexing/choose_kernel.hpp | 149 ------------------ dpnp/backend/kernels/indexing/choose.hpp | 39 +++++ 3 files changed, 126 insertions(+), 180 deletions(-) delete mode 100644 dpnp/backend/extensions/indexing/choose_kernel.hpp diff --git a/dpnp/backend/extensions/indexing/choose.cpp b/dpnp/backend/extensions/indexing/choose.cpp index 99d91744366f..91dcf6b00213 100644 --- a/dpnp/backend/extensions/indexing/choose.cpp +++ b/dpnp/backend/extensions/indexing/choose.cpp @@ -30,41 +30,116 @@ #include #include #include -#include -#include -#include #include #include #include -#include "choose_kernel.hpp" +#include + #include "dpctl4pybind11.hpp" +#include +#include -// utils extension header #include "ext/common.hpp" +#include "kernels/indexing/choose.hpp" // dpctl tensor headers #include "utils/indexing_utils.hpp" #include "utils/memory_overlap.hpp" +#include "utils/offset_utils.hpp" // #include "utils/output_validation.hpp" #include "utils/sycl_alloc_utils.hpp" #include "utils/type_dispatch.hpp" namespace dpnp::extensions::indexing { - +namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; -static kernels::choose_fn_ptr_t choose_clip_dispatch_table[td_ns::num_types] - [td_ns::num_types]; -static kernels::choose_fn_ptr_t choose_wrap_dispatch_table[td_ns::num_types] - [td_ns::num_types]; +using dpctl::tensor::ssize_t; + +typedef sycl::event (*choose_fn_ptr_t)(sycl::queue &, + size_t, + ssize_t, + int, + const ssize_t *, + const char *, + char *, + char **, + ssize_t, + ssize_t, + const ssize_t *, + const std::vector &); + +static choose_fn_ptr_t choose_clip_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static choose_fn_ptr_t choose_wrap_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +template +sycl::event choose_impl(sycl::queue &q, + size_t nelems, + ssize_t n_chcs, + int nd, + const ssize_t *shape_and_strides, + const char *ind_cp, + char *dst_cp, + char **chcs_cp, + ssize_t ind_offset, + ssize_t dst_offset, + const ssize_t *chc_offsets, + const std::vector &depends) +{ + dpctl::tensor::type_utils::validate_type_for_device(q); + + const indTy *ind_tp = reinterpret_cast(ind_cp); + Ty *dst_tp = reinterpret_cast(dst_cp); -namespace py = pybind11; + sycl::event choose_ev = q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); -namespace detail + using InOutIndexerT = + dpctl::tensor::offset_utils::TwoOffsets_StridedIndexer; + const InOutIndexerT ind_out_indexer{nd, ind_offset, dst_offset, + shape_and_strides}; + + using NthChoiceIndexerT = + dpnp::kernels::choose::strides::NthStrideOffsetUnpacked; + const NthChoiceIndexerT choices_indexer{ + nd, chc_offsets, shape_and_strides, shape_and_strides + 3 * nd}; + + using ChooseFunc = + dpnp::kernels::choose::ChooseFunctor; + + cgh.parallel_for(sycl::range<1>(nelems), + ChooseFunc(ind_tp, dst_tp, chcs_cp, n_chcs, + ind_out_indexer, + choices_indexer)); + }); + + return choose_ev; +} + +template +struct ChooseFactory { + fnT get() + { + if constexpr (std::is_integral::value && + !std::is_same::value) { + fnT fn = choose_impl; + return fn; + } + else { + fnT fn = nullptr; + return fn; + } + } +}; +namespace detail +{ using host_ptrs_allocator_t = dpctl::tensor::alloc_utils::usm_host_allocator; using ptrs_t = std::vector; @@ -191,7 +266,6 @@ std::vector parse_py_chcs(const sycl::queue &q, return res; } - } // namespace detail std::pair @@ -412,23 +486,6 @@ std::pair return std::make_pair(arg_cleanup_ev, choose_generic_ev); } -template -struct ChooseFactory -{ - fnT get() - { - if constexpr (std::is_integral::value && - !std::is_same::value) { - fnT fn = kernels::choose_impl; - return fn; - } - else { - fnT fn = nullptr; - return fn; - } - } -}; - using dpctl::tensor::indexing_utils::ClipIndex; using dpctl::tensor::indexing_utils::WrapIndex; @@ -441,7 +498,6 @@ using ChooseClipFactory = ChooseFactory>; void init_choose_dispatch_tables(void) { using ext::common::init_dispatch_table; - using kernels::choose_fn_ptr_t; init_dispatch_table( choose_clip_dispatch_table); diff --git a/dpnp/backend/extensions/indexing/choose_kernel.hpp b/dpnp/backend/extensions/indexing/choose_kernel.hpp deleted file mode 100644 index dbee4f1c2172..000000000000 --- a/dpnp/backend/extensions/indexing/choose_kernel.hpp +++ /dev/null @@ -1,149 +0,0 @@ -//***************************************************************************** -// Copyright (c) 2025, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// - Neither the name of the copyright holder nor the names of its contributors -// may be used to endorse or promote products derived from this software -// without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -// THE POSSIBILITY OF SUCH DAMAGE. -//***************************************************************************** - -#pragma once - -#include -#include -#include -#include -#include - -#include - -#include "kernels/dpctl_tensor_types.hpp" -#include "utils/indexing_utils.hpp" -#include "utils/offset_utils.hpp" -#include "utils/strided_iters.hpp" -#include "utils/type_utils.hpp" - -#include "kernels/indexing/choose.hpp" - -namespace dpnp::extensions::indexing -{ -namespace strides_detail -{ - -struct NthStrideOffsetUnpacked -{ - NthStrideOffsetUnpacked(int common_nd, - dpctl::tensor::ssize_t const *_offsets, - dpctl::tensor::ssize_t const *_shape, - dpctl::tensor::ssize_t const *_strides) - : _ind(common_nd), nd(common_nd), offsets(_offsets), shape(_shape), - strides(_strides) - { - } - - template - size_t operator()(dpctl::tensor::ssize_t gid, nT n) const - { - dpctl::tensor::ssize_t relative_offset(0); - _ind.get_displacement( - gid, shape, strides + (n * nd), relative_offset); - - return relative_offset + offsets[n]; - } - -private: - dpctl::tensor::strides::CIndexer_vector _ind; - - int nd; - dpctl::tensor::ssize_t const *offsets; - dpctl::tensor::ssize_t const *shape; - dpctl::tensor::ssize_t const *strides; -}; - -static_assert(sycl::is_device_copyable_v); - -} // namespace strides_detail - -namespace kernels -{ - -using dpnp::kernels::choose::ChooseFunctor; - -typedef sycl::event (*choose_fn_ptr_t)(sycl::queue &, - size_t, - dpctl::tensor::ssize_t, - int, - const dpctl::tensor::ssize_t *, - const char *, - char *, - char **, - dpctl::tensor::ssize_t, - dpctl::tensor::ssize_t, - const dpctl::tensor::ssize_t *, - const std::vector &); - -template -sycl::event choose_impl(sycl::queue &q, - size_t nelems, - dpctl::tensor::ssize_t n_chcs, - int nd, - const dpctl::tensor::ssize_t *shape_and_strides, - const char *ind_cp, - char *dst_cp, - char **chcs_cp, - dpctl::tensor::ssize_t ind_offset, - dpctl::tensor::ssize_t dst_offset, - const dpctl::tensor::ssize_t *chc_offsets, - const std::vector &depends) -{ - dpctl::tensor::type_utils::validate_type_for_device(q); - - const indTy *ind_tp = reinterpret_cast(ind_cp); - Ty *dst_tp = reinterpret_cast(dst_cp); - - sycl::event choose_ev = q.submit([&](sycl::handler &cgh) { - cgh.depends_on(depends); - - using InOutIndexerT = - dpctl::tensor::offset_utils::TwoOffsets_StridedIndexer; - const InOutIndexerT ind_out_indexer{nd, ind_offset, dst_offset, - shape_and_strides}; - - using NthChoiceIndexerT = strides_detail::NthStrideOffsetUnpacked; - const NthChoiceIndexerT choices_indexer{ - nd, chc_offsets, shape_and_strides, shape_and_strides + 3 * nd}; - - using ChooseFunc = ChooseFunctor; - - cgh.parallel_for(sycl::range<1>(nelems), - ChooseFunc(ind_tp, dst_tp, chcs_cp, n_chcs, - ind_out_indexer, - choices_indexer)); - }); - - return choose_ev; -} - -} // namespace kernels -} // namespace dpnp::extensions::indexing diff --git a/dpnp/backend/kernels/indexing/choose.hpp b/dpnp/backend/kernels/indexing/choose.hpp index 98a7ab4a0f62..7129ca7bce1e 100644 --- a/dpnp/backend/kernels/indexing/choose.hpp +++ b/dpnp/backend/kernels/indexing/choose.hpp @@ -31,6 +31,7 @@ #include #include "kernels/dpctl_tensor_types.hpp" +#include "utils/strided_iters.hpp" namespace dpnp::kernels::choose { @@ -84,4 +85,42 @@ class ChooseFunctor dst[dst_offset] = chc[chc_offset]; } }; + +namespace strides +{ +using dpctl::tensor::strides::CIndexer_vector; + +struct NthStrideOffsetUnpacked +{ + NthStrideOffsetUnpacked(int common_nd, + ssize_t const *_offsets, + ssize_t const *_shape, + ssize_t const *_strides) + : _ind(common_nd), nd(common_nd), offsets(_offsets), shape(_shape), + strides(_strides) + { + } + + template + size_t operator()(ssize_t gid, nT n) const + { + ssize_t relative_offset(0); + _ind.get_displacement( + gid, shape, strides + (n * nd), relative_offset); + + return relative_offset + offsets[n]; + } + +private: + CIndexer_vector _ind; + + int nd; + ssize_t const *offsets; + ssize_t const *shape; + ssize_t const *strides; +}; + +static_assert(sycl::is_device_copyable_v); + +} // namespace strides } // namespace dpnp::kernels::choose From f908e3adc6348a9276688d2fd5f7a7d8b77e5f04 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 12 Mar 2026 15:36:23 +0100 Subject: [PATCH 03/17] Disable indexing kernels from coverage report --- scripts/gen_coverage.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/gen_coverage.py b/scripts/gen_coverage.py index 588345d91b2e..d245c5d31060 100644 --- a/scripts/gen_coverage.py +++ b/scripts/gen_coverage.py @@ -259,6 +259,7 @@ def find_objects(): "-format=lcov", "-ignore-filename-regex=/tmp/icpx*", r"-ignore-filename-regex=.*/backend/kernels/elementwise_functions/.*\.hpp$", + r"-ignore-filename-regex=.*/backend/kernels/indexing/.*\.hpp$", "-instr-profile=" + instr_profile_fn, ] + objects From 0b186561da04631f02c65ec36c6e7fac989142ff Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 16 Mar 2026 13:02:51 +0100 Subject: [PATCH 04/17] Add dedicated InterpolateFunctor to the kernels --- .../elementwise_functions/interpolate.cpp | 122 +++++++++--------- .../elementwise_functions/interpolate.hpp | 108 +++++++++------- 2 files changed, 124 insertions(+), 106 deletions(-) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp index fca8c43f816e..021e1941dcaf 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp @@ -41,12 +41,12 @@ #include #include +#include "kernels/elementwise_functions/interpolate.hpp" + // dpctl tensor headers #include "utils/type_dispatch.hpp" #include "utils/type_utils.hpp" -#include "kernels/elementwise_functions/interpolate.hpp" - // utils extension headers #include "ext/common.hpp" #include "ext/validation_utils.hpp" @@ -57,7 +57,6 @@ namespace type_utils = dpctl::tensor::type_utils; using ext::common::value_type_of; using ext::validation::array_names; -using ext::validation::array_ptr; using ext::common::dtype_from_typenum; using ext::validation::check_has_dtype; @@ -68,7 +67,6 @@ using ext::validation::common_checks; namespace dpnp::extensions::ufunc { - namespace impl { using ext::common::init_dispatch_vector; @@ -88,8 +86,10 @@ typedef sycl::event (*interpolate_fn_ptr_t)(sycl::queue &, const std::size_t, // xp_size const std::vector &); +interpolate_fn_ptr_t interpolate_dispatch_vector[td_ns::num_types]; + template -sycl::event interpolate_call(sycl::queue &exec_q, +sycl::event interpolate_impl(sycl::queue &q, const void *vx, const void *vidx, const void *vxp, @@ -101,6 +101,8 @@ sycl::event interpolate_call(sycl::queue &exec_q, const std::size_t xp_size, const std::vector &depends) { + dpctl::tensor::type_utils::validate_type_for_device(q); + using type_utils::is_complex_v; using TCoord = std::conditional_t, value_type_of_t, T>; @@ -112,23 +114,62 @@ sycl::event interpolate_call(sycl::queue &exec_q, const T *right = static_cast(vright); T *out = static_cast(vout); - using dpnp::kernels::interpolate::interpolate_impl; - sycl::event interpolate_ev = interpolate_impl( - exec_q, x, idx, xp, fp, left, right, out, n, xp_size, depends); + sycl::event interpolate_ev = q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + + using InterpolateFunc = + dpnp::kernels::interpolate::InterpolateFunctor; + + cgh.parallel_for( + sycl::range<1>(n), + InterpolateFunc(x, idx, xp, fp, left, right, out, xp_size)); + }); return interpolate_ev; } -interpolate_fn_ptr_t interpolate_dispatch_vector[td_ns::num_types]; +/** + * @brief A factory to define pairs of supported types for which + * interpolate function is available. + * + * @tparam T Type of input vector `a` and of result vector `y`. + */ +template +struct InterpolateOutputType +{ + using value_type = typename std::disjunction< + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::DefaultResultEntry>::result_type; +}; -void common_interpolate_checks( - const dpctl::tensor::usm_ndarray &x, - const dpctl::tensor::usm_ndarray &idx, - const dpctl::tensor::usm_ndarray &xp, - const dpctl::tensor::usm_ndarray &fp, - const dpctl::tensor::usm_ndarray &out, - const std::optional &left, - const std::optional &right) +template +struct InterpolateFactory +{ + fnT get() + { + if constexpr (std::is_same_v< + typename InterpolateOutputType::value_type, void>) + { + return nullptr; + } + else { + return interpolate_impl; + } + } +}; + +namespace detail +{ +void validate(const dpctl::tensor::usm_ndarray &x, + const dpctl::tensor::usm_ndarray &idx, + const dpctl::tensor::usm_ndarray &xp, + const dpctl::tensor::usm_ndarray &fp, + const dpctl::tensor::usm_ndarray &out, + const std::optional &left, + const std::optional &right) { array_names names = {{&x, "x"}, {&xp, "xp"}, {&fp, "fp"}, {&out, "out"}}; @@ -158,6 +199,7 @@ void common_interpolate_checks( throw py::value_error("array of sample points is empty"); } } +} // namespace detail std::pair py_interpolate(const dpctl::tensor::usm_ndarray &x, @@ -170,7 +212,7 @@ std::pair sycl::queue &exec_q, const std::vector &depends) { - common_interpolate_checks(x, idx, xp, fp, out, left, right); + detail::validate(x, idx, xp, fp, out, left, right); int out_typenum = out.get_typenum(); @@ -214,56 +256,20 @@ std::pair return std::make_pair(args_ev, ev); } -/** - * @brief A factory to define pairs of supported types for which - * interpolate function is available. - * - * @tparam T Type of input vector `a` and of result vector `y`. - */ -template -struct InterpolateOutputType -{ - using value_type = typename std::disjunction< - td_ns::TypeMapResultEntry, - td_ns::TypeMapResultEntry, - td_ns::TypeMapResultEntry>, - td_ns::TypeMapResultEntry>, - td_ns::DefaultResultEntry>::result_type; -}; - -template -struct InterpolateFactory -{ - fnT get() - { - if constexpr (std::is_same_v< - typename InterpolateOutputType::value_type, - void>) { - return nullptr; - } - else { - return interpolate_call; - } - } -}; - static void init_interpolate_dispatch_vectors() { - init_dispatch_vector( + init_dispatch_vector( interpolate_dispatch_vector); } - } // namespace impl void init_interpolate(py::module_ m) { impl::init_interpolate_dispatch_vectors(); - using impl::py_interpolate; - m.def("_interpolate", &py_interpolate, "", py::arg("x"), py::arg("idx"), - py::arg("xp"), py::arg("fp"), py::arg("left"), py::arg("right"), - py::arg("out"), py::arg("sycl_queue"), + m.def("_interpolate", &impl::py_interpolate, "", py::arg("x"), + py::arg("idx"), py::arg("xp"), py::arg("fp"), py::arg("left"), + py::arg("right"), py::arg("out"), py::arg("sycl_queue"), py::arg("depends") = py::list()); } - } // namespace dpnp::extensions::ufunc diff --git a/dpnp/backend/kernels/elementwise_functions/interpolate.hpp b/dpnp/backend/kernels/elementwise_functions/interpolate.hpp index ef38157b00e9..c85dafea24b0 100644 --- a/dpnp/backend/kernels/elementwise_functions/interpolate.hpp +++ b/dpnp/backend/kernels/elementwise_functions/interpolate.hpp @@ -28,67 +28,79 @@ #pragma once +#include +#include + #include -#include #include "ext/common.hpp" -using ext::common::IsNan; - namespace dpnp::kernels::interpolate { +using ext::common::IsNan; + template -sycl::event interpolate_impl(sycl::queue &q, - const TCoord *x, - const TIdx *idx, - const TCoord *xp, - const TValue *fp, - const TValue *left, - const TValue *right, - TValue *out, - const std::size_t n, - const std::size_t xp_size, - const std::vector &depends) +class InterpolateFunctor { +private: + const TCoord *x = nullptr; + const TIdx *idx = nullptr; + const TCoord *xp = nullptr; + const TValue *fp = nullptr; + const TValue *left = nullptr; + const TValue *right = nullptr; + TValue *out = nullptr; + const std::size_t xp_size; + +public: + InterpolateFunctor(const TCoord *x_, + const TIdx *idx_, + const TCoord *xp_, + const TValue *fp_, + const TValue *left_, + const TValue *right_, + TValue *out_, + const std::size_t xp_size_) + : x(x_), idx(idx_), xp(xp_), fp(fp_), left(left_), right(right_), + out(out_), xp_size(xp_size_) + { + } + // Selected over the work-group version // due to simpler execution and slightly better performance. - return q.submit([&](sycl::handler &h) { - h.depends_on(depends); - h.parallel_for(sycl::range<1>(n), [=](sycl::id<1> i) { - TValue left_val = left ? *left : fp[0]; - TValue right_val = right ? *right : fp[xp_size - 1]; + void operator()(sycl::id<1> id) const + { + TValue left_val = left ? *left : fp[0]; + TValue right_val = right ? *right : fp[xp_size - 1]; - TCoord x_val = x[i]; - TIdx x_idx = idx[i] - 1; + TCoord x_val = x[id]; + TIdx x_idx = idx[id] - 1; - if (IsNan::isnan(x_val)) { - out[i] = x_val; - } - else if (x_idx < 0) { - out[i] = left_val; - } - else if (x_val == xp[xp_size - 1]) { - out[i] = fp[xp_size - 1]; - } - else if (x_idx >= static_cast(xp_size - 1)) { - out[i] = right_val; - } - else { - TValue slope = - (fp[x_idx + 1] - fp[x_idx]) / (xp[x_idx + 1] - xp[x_idx]); - TValue res = slope * (x_val - xp[x_idx]) + fp[x_idx]; + if (IsNan::isnan(x_val)) { + out[id] = x_val; + } + else if (x_idx < 0) { + out[id] = left_val; + } + else if (x_val == xp[xp_size - 1]) { + out[id] = fp[xp_size - 1]; + } + else if (x_idx >= static_cast(xp_size - 1)) { + out[id] = right_val; + } + else { + TValue slope = + (fp[x_idx + 1] - fp[x_idx]) / (xp[x_idx + 1] - xp[x_idx]); + TValue res = slope * (x_val - xp[x_idx]) + fp[x_idx]; - if (IsNan::isnan(res)) { - res = slope * (x_val - xp[x_idx + 1]) + fp[x_idx + 1]; - if (IsNan::isnan(res) && - (fp[x_idx] == fp[x_idx + 1])) { - res = fp[x_idx]; - } + if (IsNan::isnan(res)) { + res = slope * (x_val - xp[x_idx + 1]) + fp[x_idx + 1]; + if (IsNan::isnan(res) && (fp[x_idx] == fp[x_idx + 1])) { + res = fp[x_idx]; } - out[i] = res; } - }); - }); -} - + out[id] = res; + } + } +}; } // namespace dpnp::kernels::interpolate From 4bd3019a20257601b6739bd584d0dbd0772899b1 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 16 Mar 2026 13:03:42 +0100 Subject: [PATCH 05/17] Add missing includes --- dpnp/backend/extensions/indexing/choose.cpp | 13 +++++++++---- dpnp/backend/kernels/indexing/choose.hpp | 2 ++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/dpnp/backend/extensions/indexing/choose.cpp b/dpnp/backend/extensions/indexing/choose.cpp index 91dcf6b00213..b2c2a9e1bcff 100644 --- a/dpnp/backend/extensions/indexing/choose.cpp +++ b/dpnp/backend/extensions/indexing/choose.cpp @@ -30,6 +30,8 @@ #include #include #include +#include +#include #include #include #include @@ -46,13 +48,16 @@ // dpctl tensor headers #include "utils/indexing_utils.hpp" #include "utils/memory_overlap.hpp" -#include "utils/offset_utils.hpp" // +#include "utils/offset_utils.hpp" #include "utils/output_validation.hpp" #include "utils/sycl_alloc_utils.hpp" #include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" namespace dpnp::extensions::indexing { +namespace impl +{ namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; @@ -504,16 +509,16 @@ void init_choose_dispatch_tables(void) init_dispatch_table( choose_wrap_dispatch_table); } +} // namespace impl void init_choose(py::module_ m) { - dpnp::extensions::indexing::init_choose_dispatch_tables(); + impl::init_choose_dispatch_tables(); - m.def("_choose", &py_choose, "", py::arg("src"), py::arg("chcs"), + m.def("_choose", &impl::py_choose, "", py::arg("src"), py::arg("chcs"), py::arg("dst"), py::arg("mode"), py::arg("sycl_queue"), py::arg("depends") = py::list()); return; } - } // namespace dpnp::extensions::indexing diff --git a/dpnp/backend/kernels/indexing/choose.hpp b/dpnp/backend/kernels/indexing/choose.hpp index 7129ca7bce1e..2fb8762eb273 100644 --- a/dpnp/backend/kernels/indexing/choose.hpp +++ b/dpnp/backend/kernels/indexing/choose.hpp @@ -28,6 +28,8 @@ #pragma once +#include + #include #include "kernels/dpctl_tensor_types.hpp" From 5d68f5a7317415d5c32f7be2d0e2544b22e76a6b Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 16 Mar 2026 14:50:21 +0100 Subject: [PATCH 06/17] Add dedicated windows functors --- dpnp/backend/extensions/indexing/choose.cpp | 3 +- .../elementwise_functions/interpolate.cpp | 31 +++++----- dpnp/backend/extensions/window/common.hpp | 24 +++++-- dpnp/backend/extensions/window/kaiser.cpp | 44 +++---------- dpnp/backend/extensions/window/kaiser.hpp | 7 ++- dpnp/backend/extensions/window/window_py.cpp | 37 +++++++---- dpnp/backend/kernels/indexing/choose.hpp | 2 +- .../window/bartlett.hpp | 25 ++------ .../window/blackman.hpp | 23 +------ .../window/hamming.hpp | 23 +------ .../window/hanning.hpp | 26 ++------ dpnp/backend/kernels/window/kaiser.hpp | 62 +++++++++++++++++++ 12 files changed, 153 insertions(+), 154 deletions(-) rename dpnp/backend/{extensions => kernels}/window/bartlett.hpp (80%) rename dpnp/backend/{extensions => kernels}/window/blackman.hpp (83%) rename dpnp/backend/{extensions => kernels}/window/hamming.hpp (83%) rename dpnp/backend/{extensions => kernels}/window/hanning.hpp (80%) create mode 100644 dpnp/backend/kernels/window/kaiser.hpp diff --git a/dpnp/backend/extensions/indexing/choose.cpp b/dpnp/backend/extensions/indexing/choose.cpp index b2c2a9e1bcff..fa128954aeb6 100644 --- a/dpnp/backend/extensions/indexing/choose.cpp +++ b/dpnp/backend/extensions/indexing/choose.cpp @@ -56,9 +56,10 @@ namespace dpnp::extensions::indexing { +namespace py = pybind11; + namespace impl { -namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; using dpctl::tensor::ssize_t; diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp index 021e1941dcaf..1d07e548a47a 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp @@ -51,28 +51,19 @@ #include "ext/common.hpp" #include "ext/validation_utils.hpp" -namespace py = pybind11; -namespace td_ns = dpctl::tensor::type_dispatch; -namespace type_utils = dpctl::tensor::type_utils; - -using ext::common::value_type_of; -using ext::validation::array_names; - -using ext::common::dtype_from_typenum; -using ext::validation::check_has_dtype; -using ext::validation::check_num_dims; -using ext::validation::check_same_dtype; -using ext::validation::check_same_size; -using ext::validation::common_checks; - namespace dpnp::extensions::ufunc { +namespace py = pybind11; + namespace impl { -using ext::common::init_dispatch_vector; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace type_utils = dpctl::tensor::type_utils; template -using value_type_of_t = typename value_type_of::type; +using value_type_of_t = typename ext::common::value_type_of::type; + +using ext::common::dtype_from_typenum; typedef sycl::event (*interpolate_fn_ptr_t)(sycl::queue &, const void *, // x @@ -163,6 +154,13 @@ struct InterpolateFactory namespace detail { +using ext::validation::array_names; +using ext::validation::check_has_dtype; +using ext::validation::check_num_dims; +using ext::validation::check_same_dtype; +using ext::validation::check_same_size; +using ext::validation::common_checks; + void validate(const dpctl::tensor::usm_ndarray &x, const dpctl::tensor::usm_ndarray &idx, const dpctl::tensor::usm_ndarray &xp, @@ -258,6 +256,7 @@ std::pair static void init_interpolate_dispatch_vectors() { + using ext::common::init_dispatch_vector; init_dispatch_vector( interpolate_dispatch_vector); } diff --git a/dpnp/backend/extensions/window/common.hpp b/dpnp/backend/extensions/window/common.hpp index cb084e972d78..d8f43b07bfc0 100644 --- a/dpnp/backend/extensions/window/common.hpp +++ b/dpnp/backend/extensions/window/common.hpp @@ -28,11 +28,11 @@ #pragma once -#include -#include #include #include "dpctl4pybind11.hpp" +#include +#include // dpctl tensor headers #include "utils/output_validation.hpp" @@ -41,10 +41,8 @@ namespace dpnp::extensions::window { - -namespace dpctl_td_ns = dpctl::tensor::type_dispatch; - namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; typedef sycl::event (*window_fn_ptr_t)(sycl::queue &, char *, @@ -72,6 +70,20 @@ sycl::event window_impl(sycl::queue &exec_q, return window_ev; } +template typename FunctorT> +struct Factory +{ + fnT get() + { + if constexpr (std::is_floating_point_v) { + return window_impl; + } + else { + return nullptr; + } + } +}; + template std::tuple window_fn(sycl::queue &exec_q, @@ -101,7 +113,7 @@ std::tuple } const int result_typenum = result.get_typenum(); - auto array_types = dpctl_td_ns::usm_ndarray_types(); + auto array_types = td_ns::usm_ndarray_types(); const int result_type_id = array_types.typenum_to_lookup_id(result_typenum); funcPtrT fn = window_dispatch_vector[result_type_id]; diff --git a/dpnp/backend/extensions/window/kaiser.cpp b/dpnp/backend/extensions/window/kaiser.cpp index b83f88f69a9b..ffbff5b43e15 100644 --- a/dpnp/backend/extensions/window/kaiser.cpp +++ b/dpnp/backend/extensions/window/kaiser.cpp @@ -29,6 +29,8 @@ #include "kaiser.hpp" #include "common.hpp" +#include "kernels/window/kaiser.hpp" + // utils extension header #include "ext/common.hpp" @@ -39,13 +41,10 @@ #include -#include "kernels/elementwise_functions/i0.hpp" - namespace dpnp::extensions::window { -namespace dpctl_td_ns = dpctl::tensor::type_dispatch; - -using ext::common::init_dispatch_vector; +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; typedef sycl::event (*kaiser_fn_ptr_t)(sycl::queue &, char *, @@ -53,34 +52,10 @@ typedef sycl::event (*kaiser_fn_ptr_t)(sycl::queue &, const py::object &, const std::vector &); -static kaiser_fn_ptr_t kaiser_dispatch_vector[dpctl_td_ns::num_types]; +static kaiser_fn_ptr_t kaiser_dispatch_vector[td_ns::num_types]; -template -class KaiserFunctor +namespace impl { -private: - T *res = nullptr; - const std::size_t N; - const T beta; - -public: - KaiserFunctor(T *res, const std::size_t N, const T beta) - : res(res), N(N), beta(beta) - { - } - - void operator()(sycl::id<1> id) const - { - using dpnp::kernels::i0::cyl_bessel_i0; - - const auto i = id.get(0); - const T alpha = (N - 1) / T(2); - const T tmp = (i - alpha) / alpha; - res[i] = cyl_bessel_i0(beta * sycl::sqrt(1 - tmp * tmp)) / - cyl_bessel_i0(beta); - } -}; - template sycl::event kaiser_impl(sycl::queue &exec_q, char *result, @@ -96,7 +71,7 @@ sycl::event kaiser_impl(sycl::queue &exec_q, sycl::event kaiser_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(depends); - using KaiserKernel = KaiserFunctor; + using KaiserKernel = dpnp::kernels::kaiser::KaiserFunctor; cgh.parallel_for(sycl::range<1>(nelems), KaiserKernel(res, nelems, beta)); }); @@ -138,11 +113,12 @@ std::pair return std::make_pair(args_ev, kaiser_ev); } +} // namespace impl void init_kaiser_dispatch_vectors() { - init_dispatch_vector( + using ext::common::init_dispatch_vector; + init_dispatch_vector( kaiser_dispatch_vector); } - } // namespace dpnp::extensions::window diff --git a/dpnp/backend/extensions/window/kaiser.hpp b/dpnp/backend/extensions/window/kaiser.hpp index 0a4712cc594e..4ba506620db2 100644 --- a/dpnp/backend/extensions/window/kaiser.hpp +++ b/dpnp/backend/extensions/window/kaiser.hpp @@ -28,11 +28,15 @@ #pragma once -#include #include +#include +#include + namespace dpnp::extensions::window { +namespace py = pybind11; + extern std::pair py_kaiser(sycl::queue &exec_q, const py::object &beta, @@ -40,5 +44,4 @@ extern std::pair const std::vector &depends); extern void init_kaiser_dispatch_vectors(void); - } // namespace dpnp::extensions::window diff --git a/dpnp/backend/extensions/window/window_py.cpp b/dpnp/backend/extensions/window/window_py.cpp index 2b8090c40cca..5ae80f4027b5 100644 --- a/dpnp/backend/extensions/window/window_py.cpp +++ b/dpnp/backend/extensions/window/window_py.cpp @@ -33,11 +33,12 @@ #include #include -#include "bartlett.hpp" -#include "blackman.hpp" +#include "kernels/window/bartlett.hpp" +#include "kernels/window/blackman.hpp" +#include "kernels/window/hamming.hpp" +#include "kernels/window/hanning.hpp" + #include "common.hpp" -#include "hamming.hpp" -#include "hanning.hpp" #include "kaiser.hpp" // utils extension header @@ -51,6 +52,22 @@ using window_ns::window_fn_ptr_t; namespace dpctl_td_ns = dpctl::tensor::type_dispatch; +template +using BartlettFactory = + window_ns::Factory; + +template +using BlackmanFactory = + window_ns::Factory; + +template +using HammingFactory = + window_ns::Factory; + +template +using HanningFactory = + window_ns::Factory; + static window_fn_ptr_t bartlett_dispatch_vector[dpctl_td_ns::num_types]; static window_fn_ptr_t blackman_dispatch_vector[dpctl_td_ns::num_types]; static window_fn_ptr_t hamming_dispatch_vector[dpctl_td_ns::num_types]; @@ -62,8 +79,7 @@ PYBIND11_MODULE(_window_impl, m) using event_vecT = std::vector; { - init_dispatch_vector( + init_dispatch_vector( bartlett_dispatch_vector); auto bartlett_pyapi = [&](sycl::queue &exec_q, const arrayT &result, @@ -78,8 +94,7 @@ PYBIND11_MODULE(_window_impl, m) } { - init_dispatch_vector( + init_dispatch_vector( blackman_dispatch_vector); auto blackman_pyapi = [&](sycl::queue &exec_q, const arrayT &result, @@ -94,8 +109,7 @@ PYBIND11_MODULE(_window_impl, m) } { - init_dispatch_vector( + init_dispatch_vector( hamming_dispatch_vector); auto hamming_pyapi = [&](sycl::queue &exec_q, const arrayT &result, @@ -110,8 +124,7 @@ PYBIND11_MODULE(_window_impl, m) } { - init_dispatch_vector( + init_dispatch_vector( hanning_dispatch_vector); auto hanning_pyapi = [&](sycl::queue &exec_q, const arrayT &result, diff --git a/dpnp/backend/kernels/indexing/choose.hpp b/dpnp/backend/kernels/indexing/choose.hpp index 2fb8762eb273..49b71d05c96b 100644 --- a/dpnp/backend/kernels/indexing/choose.hpp +++ b/dpnp/backend/kernels/indexing/choose.hpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2024, Intel Corporation +// Copyright (c) 2026, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/dpnp/backend/extensions/window/bartlett.hpp b/dpnp/backend/kernels/window/bartlett.hpp similarity index 80% rename from dpnp/backend/extensions/window/bartlett.hpp rename to dpnp/backend/kernels/window/bartlett.hpp index 69d3be627c84..461d26bc3ae6 100644 --- a/dpnp/backend/extensions/window/bartlett.hpp +++ b/dpnp/backend/kernels/window/bartlett.hpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2025, Intel Corporation +// Copyright (c) 2026, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -19,7 +19,7 @@ // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, RES, OR PROFITS; OR BUSINESS +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF @@ -28,12 +28,10 @@ #pragma once -#include "common.hpp" #include -namespace dpnp::extensions::window::kernels +namespace dpnp::kernels::bartlett { - template class BartlettFunctor { @@ -52,19 +50,4 @@ class BartlettFunctor res[i] = T(1) - sycl::fabs(i - alpha) / alpha; } }; - -template -struct BartlettFactory -{ - fnT get() - { - if constexpr (std::is_floating_point_v) { - return window_impl; - } - else { - return nullptr; - } - } -}; - -} // namespace dpnp::extensions::window::kernels +} // namespace dpnp::kernels::bartlett diff --git a/dpnp/backend/extensions/window/blackman.hpp b/dpnp/backend/kernels/window/blackman.hpp similarity index 83% rename from dpnp/backend/extensions/window/blackman.hpp rename to dpnp/backend/kernels/window/blackman.hpp index 7a75d226792f..cc468ca5e915 100644 --- a/dpnp/backend/extensions/window/blackman.hpp +++ b/dpnp/backend/kernels/window/blackman.hpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2025, Intel Corporation +// Copyright (c) 2026, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -28,12 +28,10 @@ #pragma once -#include "common.hpp" #include -namespace dpnp::extensions::window::kernels +namespace dpnp::kernels::blackman { - template class BlackmanFunctor { @@ -53,19 +51,4 @@ class BlackmanFunctor T(0.08) * sycl::cospi(T(2) * alpha); } }; - -template -struct BlackmanFactory -{ - fnT get() - { - if constexpr (std::is_floating_point_v) { - return window_impl; - } - else { - return nullptr; - } - } -}; - -} // namespace dpnp::extensions::window::kernels +} // namespace dpnp::kernels::blackman diff --git a/dpnp/backend/extensions/window/hamming.hpp b/dpnp/backend/kernels/window/hamming.hpp similarity index 83% rename from dpnp/backend/extensions/window/hamming.hpp rename to dpnp/backend/kernels/window/hamming.hpp index 521ebc10c281..d2b7cbac378f 100644 --- a/dpnp/backend/extensions/window/hamming.hpp +++ b/dpnp/backend/kernels/window/hamming.hpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2025, Intel Corporation +// Copyright (c) 2026, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -28,12 +28,10 @@ #pragma once -#include "common.hpp" #include -namespace dpnp::extensions::window::kernels +namespace dpnp::kernels::hamming { - template class HammingFunctor { @@ -51,19 +49,4 @@ class HammingFunctor res[i] = T(0.54) - T(0.46) * sycl::cospi(T(2) * i / (N - 1)); } }; - -template -struct HammingFactory -{ - fnT get() - { - if constexpr (std::is_floating_point_v) { - return window_impl; - } - else { - return nullptr; - } - } -}; - -} // namespace dpnp::extensions::window::kernels +} // namespace dpnp::kernels::hamming diff --git a/dpnp/backend/extensions/window/hanning.hpp b/dpnp/backend/kernels/window/hanning.hpp similarity index 80% rename from dpnp/backend/extensions/window/hanning.hpp rename to dpnp/backend/kernels/window/hanning.hpp index 612036d6b05a..36fd2c196060 100644 --- a/dpnp/backend/extensions/window/hanning.hpp +++ b/dpnp/backend/kernels/window/hanning.hpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2025, Intel Corporation +// Copyright (c) 2026, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -28,12 +28,10 @@ #pragma once -#include "common.hpp" #include -namespace dpnp::extensions::window::kernels +namespace dpnp::kernels::hanning { - template class HanningFunctor { @@ -48,22 +46,8 @@ class HanningFunctor { const auto i = id.get(0); - res[i] = T(0.5) - T(0.5) * sycl::cospi(T(2) * i / (N - 1)); + const T alpha = (N - 1) / T(2); + res[i] = T(1) - sycl::fabs(i - alpha) / alpha; } }; - -template -struct HanningFactory -{ - fnT get() - { - if constexpr (std::is_floating_point_v) { - return window_impl; - } - else { - return nullptr; - } - } -}; - -} // namespace dpnp::extensions::window::kernels +} // namespace dpnp::kernels::hanning diff --git a/dpnp/backend/kernels/window/kaiser.hpp b/dpnp/backend/kernels/window/kaiser.hpp new file mode 100644 index 000000000000..ab1a2146c90b --- /dev/null +++ b/dpnp/backend/kernels/window/kaiser.hpp @@ -0,0 +1,62 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +#include "kernels/elementwise_functions/i0.hpp" + +namespace dpnp::kernels::kaiser +{ +template +class KaiserFunctor +{ +private: + T *res = nullptr; + const std::size_t N; + const T beta; + +public: + KaiserFunctor(T *res, const std::size_t N, const T beta) + : res(res), N(N), beta(beta) + { + } + + void operator()(sycl::id<1> id) const + { + using dpnp::kernels::i0::cyl_bessel_i0; + + const auto i = id.get(0); + const T alpha = (N - 1) / T(2); + const T tmp = (i - alpha) / alpha; + res[i] = cyl_bessel_i0(beta * sycl::sqrt(1 - tmp * tmp)) / + cyl_bessel_i0(beta); + } +}; +} // namespace dpnp::kernels::kaiser From 51c30d1938b07bfcbd5fbe3c7c599f8a56c731b1 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 16 Mar 2026 15:05:46 +0100 Subject: [PATCH 07/17] Add missing includes and fix a typo in HanningFunctor --- dpnp/backend/kernels/window/bartlett.hpp | 2 ++ dpnp/backend/kernels/window/blackman.hpp | 2 ++ dpnp/backend/kernels/window/hamming.hpp | 2 ++ dpnp/backend/kernels/window/hanning.hpp | 5 +++-- dpnp/backend/kernels/window/kaiser.hpp | 2 ++ 5 files changed, 11 insertions(+), 2 deletions(-) diff --git a/dpnp/backend/kernels/window/bartlett.hpp b/dpnp/backend/kernels/window/bartlett.hpp index 461d26bc3ae6..20d410150dcb 100644 --- a/dpnp/backend/kernels/window/bartlett.hpp +++ b/dpnp/backend/kernels/window/bartlett.hpp @@ -28,6 +28,8 @@ #pragma once +#include + #include namespace dpnp::kernels::bartlett diff --git a/dpnp/backend/kernels/window/blackman.hpp b/dpnp/backend/kernels/window/blackman.hpp index cc468ca5e915..9df7cb8728e2 100644 --- a/dpnp/backend/kernels/window/blackman.hpp +++ b/dpnp/backend/kernels/window/blackman.hpp @@ -28,6 +28,8 @@ #pragma once +#include + #include namespace dpnp::kernels::blackman diff --git a/dpnp/backend/kernels/window/hamming.hpp b/dpnp/backend/kernels/window/hamming.hpp index d2b7cbac378f..895ecb0e588c 100644 --- a/dpnp/backend/kernels/window/hamming.hpp +++ b/dpnp/backend/kernels/window/hamming.hpp @@ -28,6 +28,8 @@ #pragma once +#include + #include namespace dpnp::kernels::hamming diff --git a/dpnp/backend/kernels/window/hanning.hpp b/dpnp/backend/kernels/window/hanning.hpp index 36fd2c196060..35b441f921f8 100644 --- a/dpnp/backend/kernels/window/hanning.hpp +++ b/dpnp/backend/kernels/window/hanning.hpp @@ -28,6 +28,8 @@ #pragma once +#include + #include namespace dpnp::kernels::hanning @@ -46,8 +48,7 @@ class HanningFunctor { const auto i = id.get(0); - const T alpha = (N - 1) / T(2); - res[i] = T(1) - sycl::fabs(i - alpha) / alpha; + res[i] = T(0.5) - T(0.5) * sycl::cospi(T(2) * i / (N - 1)); } }; } // namespace dpnp::kernels::hanning diff --git a/dpnp/backend/kernels/window/kaiser.hpp b/dpnp/backend/kernels/window/kaiser.hpp index ab1a2146c90b..ce8c8e52fd18 100644 --- a/dpnp/backend/kernels/window/kaiser.hpp +++ b/dpnp/backend/kernels/window/kaiser.hpp @@ -28,6 +28,8 @@ #pragma once +#include + #include #include "kernels/elementwise_functions/i0.hpp" From c1ef2815e85aa3039133141456cedda7cdf66a15 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 16 Mar 2026 15:21:22 +0100 Subject: [PATCH 08/17] Add missing includes to common header --- dpnp/backend/extensions/window/common.hpp | 7 +++++++ dpnp/backend/extensions/window/kaiser.cpp | 5 ++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/dpnp/backend/extensions/window/common.hpp b/dpnp/backend/extensions/window/common.hpp index d8f43b07bfc0..9e7b1192e3a2 100644 --- a/dpnp/backend/extensions/window/common.hpp +++ b/dpnp/backend/extensions/window/common.hpp @@ -28,6 +28,13 @@ #pragma once +#include +#include +#include +#include +#include +#include + #include #include "dpctl4pybind11.hpp" diff --git a/dpnp/backend/extensions/window/kaiser.cpp b/dpnp/backend/extensions/window/kaiser.cpp index ffbff5b43e15..22c80ffdcc53 100644 --- a/dpnp/backend/extensions/window/kaiser.cpp +++ b/dpnp/backend/extensions/window/kaiser.cpp @@ -26,6 +26,8 @@ // THE POSSIBILITY OF SUCH DAMAGE. //***************************************************************************** +#include + #include "kaiser.hpp" #include "common.hpp" @@ -35,12 +37,9 @@ #include "ext/common.hpp" // dpctl tensor headers -#include "utils/output_validation.hpp" #include "utils/type_dispatch.hpp" #include "utils/type_utils.hpp" -#include - namespace dpnp::extensions::window { namespace py = pybind11; From a70963b3dbea9c9019578d8dc61a62e5bd2debee Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Tue, 17 Mar 2026 12:42:18 +0100 Subject: [PATCH 09/17] Move py_kaiser away from impl namespace, because exposed as API --- dpnp/backend/extensions/window/kaiser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dpnp/backend/extensions/window/kaiser.cpp b/dpnp/backend/extensions/window/kaiser.cpp index 22c80ffdcc53..e5c1aa837a64 100644 --- a/dpnp/backend/extensions/window/kaiser.cpp +++ b/dpnp/backend/extensions/window/kaiser.cpp @@ -28,8 +28,8 @@ #include -#include "kaiser.hpp" #include "common.hpp" +#include "kaiser.hpp" #include "kernels/window/kaiser.hpp" @@ -91,6 +91,7 @@ struct KaiserFactory } } }; +} // namespace impl std::pair py_kaiser(sycl::queue &exec_q, @@ -112,7 +113,6 @@ std::pair return std::make_pair(args_ev, kaiser_ev); } -} // namespace impl void init_kaiser_dispatch_vectors() { From 9c97f38fdbd277e8f4c92b90f059f7621fb5eadb Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Tue, 17 Mar 2026 12:43:22 +0100 Subject: [PATCH 10/17] Disable window kernels from coverage report --- scripts/gen_coverage.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/gen_coverage.py b/scripts/gen_coverage.py index d245c5d31060..eaf7db1fe8bc 100644 --- a/scripts/gen_coverage.py +++ b/scripts/gen_coverage.py @@ -260,6 +260,7 @@ def find_objects(): "-ignore-filename-regex=/tmp/icpx*", r"-ignore-filename-regex=.*/backend/kernels/elementwise_functions/.*\.hpp$", r"-ignore-filename-regex=.*/backend/kernels/indexing/.*\.hpp$", + r"-ignore-filename-regex=.*/backend/kernels/window/.*\.hpp$", "-instr-profile=" + instr_profile_fn, ] + objects From 4787cea96b393039f97ba8d8aa5d77f301613b16 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Tue, 17 Mar 2026 13:40:02 +0100 Subject: [PATCH 11/17] Add dedicated HistogramFunctor kernel --- .../extensions/statistics/CMakeLists.txt | 2 +- .../statistics/histogram_common.hpp | 48 +++------ dpnp/backend/kernels/statistics/histogram.hpp | 99 +++++++++++++++++++ 3 files changed, 111 insertions(+), 38 deletions(-) create mode 100644 dpnp/backend/kernels/statistics/histogram.hpp diff --git a/dpnp/backend/extensions/statistics/CMakeLists.txt b/dpnp/backend/extensions/statistics/CMakeLists.txt index 7ccb05238ae4..36786c8cbaf3 100644 --- a/dpnp/backend/extensions/statistics/CMakeLists.txt +++ b/dpnp/backend/extensions/statistics/CMakeLists.txt @@ -67,7 +67,7 @@ set_target_properties( target_include_directories( ${python_module_name} - PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../common + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../ ${CMAKE_CURRENT_SOURCE_DIR}/../common ) # treat below headers as system to suppress the warnings there during the build diff --git a/dpnp/backend/extensions/statistics/histogram_common.hpp b/dpnp/backend/extensions/statistics/histogram_common.hpp index 02fc66f26610..e19158ef80df 100644 --- a/dpnp/backend/extensions/statistics/histogram_common.hpp +++ b/dpnp/backend/extensions/statistics/histogram_common.hpp @@ -28,9 +28,15 @@ #pragma once +#include +#include +#include +#include + #include #include "ext/common.hpp" +#include "kernels/statistics/histogram.hpp" namespace dpctl::tensor { @@ -277,9 +283,6 @@ bool check_in_bounds(const dT &val, const dT &min, const dT &max) return !_less(val, min) && !_less(max, val) && !IsNan
::isnan(val); } -template -class histogram_kernel; - template void submit_histogram(const T *in, const size_t size, @@ -291,41 +294,12 @@ void submit_histogram(const T *in, sycl::nd_range<1> nd_range, sycl::handler &cgh) { - cgh.parallel_for>( - nd_range, [=](sycl::nd_item<1> item) { - auto id = item.get_group_linear_id(); - auto lid = item.get_local_linear_id(); - auto group = item.get_group(); - auto local_size = item.get_local_range(0); - - hist.init(item); - edges.init(item); - - if constexpr (HistImpl::sync_after_init || Edges::sync_after_init) { - sycl::group_barrier(group, sycl::memory_scope::work_group); - } - - auto bounds = edges.get_bounds(); - - for (uint32_t i = 0; i < WorkPI; ++i) { - auto data_idx = id * WorkPI * local_size + i * local_size + lid; - if (data_idx < size) { - auto *d = &in[data_idx * dims]; - - if (edges.in_bounds(d, bounds)) { - auto bin = edges.get_bin(item, d, bounds); - auto weight = weights.get(data_idx); - hist.add(item, bin, weight); - } - } - } - - if constexpr (HistImpl::sync_before_finalize) { - sycl::group_barrier(group, sycl::memory_scope::work_group); - } + using HistogramKernel = + dpnp::kernels::histogram::HistogramFunctor; - hist.finalize(item); - }); + cgh.parallel_for( + nd_range, + HistogramKernel(in, size, dims, WorkPI, hist, edges, weights)); } void validate(const usm_ndarray &sample, diff --git a/dpnp/backend/kernels/statistics/histogram.hpp b/dpnp/backend/kernels/statistics/histogram.hpp new file mode 100644 index 000000000000..d55252157e92 --- /dev/null +++ b/dpnp/backend/kernels/statistics/histogram.hpp @@ -0,0 +1,99 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include +#include + +#include + +namespace dpnp::kernels::histogram +{ +template +class HistogramFunctor +{ +private: + const T *in = nullptr; + const std::size_t size; + const std::size_t dims; + const std::uint32_t WorkPI; + const HistImpl hist; + const Edges edges; + const Weights weights; + +public: + HistogramFunctor(const T *in_, + const std::size_t size_, + const std::size_t dims_, + const std::uint32_t WorkPI_, + const HistImpl &hist_, + const Edges &edges_, + const Weights &weights_) + : in(in_), size(size_), dims(dims_), WorkPI(WorkPI_), hist(hist_), + edges(edges_), weights(weights_) + { + } + + void operator()(sycl::nd_item<1> item) const + { + auto id = item.get_group_linear_id(); + auto lid = item.get_local_linear_id(); + auto group = item.get_group(); + auto local_size = item.get_local_range(0); + + hist.init(item); + edges.init(item); + + if constexpr (HistImpl::sync_after_init || Edges::sync_after_init) { + sycl::group_barrier(group, sycl::memory_scope::work_group); + } + + auto bounds = edges.get_bounds(); + + for (uint32_t i = 0; i < WorkPI; ++i) { + auto data_idx = id * WorkPI * local_size + i * local_size + lid; + if (data_idx < size) { + auto *d = &in[data_idx * dims]; + + if (edges.in_bounds(d, bounds)) { + auto bin = edges.get_bin(item, d, bounds); + auto weight = weights.get(data_idx); + hist.add(item, bin, weight); + } + } + } + + if constexpr (HistImpl::sync_before_finalize) { + sycl::group_barrier(group, sycl::memory_scope::work_group); + } + + hist.finalize(item); + } +}; +} // namespace dpnp::kernels::histogram From 9e4d7be739dbdde32d7679fa034b52e7ef496435 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Tue, 17 Mar 2026 15:06:32 +0100 Subject: [PATCH 12/17] Add dedicated SlidingWindow1dFunctor and SlidingWindow1dSmallFunctor kernels --- .../statistics/histogram_common.hpp | 10 +- .../statistics/sliding_window1d.hpp | 198 ++----------- .../kernels/statistics/sliding_window1d.hpp | 276 ++++++++++++++++++ 3 files changed, 297 insertions(+), 187 deletions(-) create mode 100644 dpnp/backend/kernels/statistics/sliding_window1d.hpp diff --git a/dpnp/backend/extensions/statistics/histogram_common.hpp b/dpnp/backend/extensions/statistics/histogram_common.hpp index e19158ef80df..b96ed7b84d71 100644 --- a/dpnp/backend/extensions/statistics/histogram_common.hpp +++ b/dpnp/backend/extensions/statistics/histogram_common.hpp @@ -35,23 +35,19 @@ #include +#include "dpctl4pybind11.hpp" + #include "ext/common.hpp" #include "kernels/statistics/histogram.hpp" -namespace dpctl::tensor +namespace statistics::histogram { -class usm_ndarray; -} - using dpctl::tensor::usm_ndarray; using ext::common::AtomicOp; using ext::common::IsNan; using ext::common::Less; -namespace statistics::histogram -{ - template struct CachedData { diff --git a/dpnp/backend/extensions/statistics/sliding_window1d.hpp b/dpnp/backend/extensions/statistics/sliding_window1d.hpp index f33a23609666..4820d0a53ad8 100644 --- a/dpnp/backend/extensions/statistics/sliding_window1d.hpp +++ b/dpnp/backend/extensions/statistics/sliding_window1d.hpp @@ -28,23 +28,19 @@ #pragma once -#include - -#include "utils/math_utils.hpp" -#include +#include +#include #include -#include - -#include "ext/common.hpp" +#include -using dpctl::tensor::usm_ndarray; +#include "dpctl4pybind11.hpp" -using ext::common::Align; -using ext::common::CeilDiv; +#include "kernels/statistics/sliding_window1d.hpp" namespace statistics::sliding_window1d { +using dpctl::tensor::usm_ndarray; template class _RegistryDataStorage @@ -424,60 +420,6 @@ PaddedSpan return PaddedSpan(data, size, offset); } -template -void process_block(Results &results, - uint32_t r_size, - AData &a_data, - VData &v_data, - uint32_t block_size, - Op op, - Red red) -{ - for (uint32_t i = 0; i < block_size; ++i) { - auto v_val = v_data.broadcast(i); - for (uint32_t r = 0; r < r_size; ++r) { - results[r] = red(results[r], op(a_data[r], v_val)); - } - a_data.advance_left(); - } -} - -template -SizeT get_global_linear_id(const uint32_t wpi, const sycl::nd_item<1> &item) -{ - auto sbgroup = item.get_sub_group(); - const auto sg_loc_id = sbgroup.get_local_linear_id(); - - const SizeT sg_base_id = wpi * (item.get_global_linear_id() - sg_loc_id); - const SizeT id = sg_base_id + sg_loc_id; - - return id; -} - -template -uint32_t get_results_num(const uint32_t wpi, - const SizeT size, - const SizeT global_id, - const sycl::nd_item<1> &item) -{ - auto sbgroup = item.get_sub_group(); - - const auto sbg_size = sbgroup.get_max_local_range()[0]; - const auto size_ = sycl::sub_sat(size, global_id); - return std::min(SizeT(wpi), CeilDiv(size_, sbg_size)); -} - -template -class sliding_window1d_kernel; - template &a, sycl::nd_range<1> nd_range, sycl::handler &cgh) { - cgh.parallel_for>( - nd_range, [=](sycl::nd_item<1> item) { - auto glid = get_global_linear_id(WorkPI, item); - - auto results = RegistryData(item); - results.fill(0); - - auto results_num = get_results_num(WorkPI, out.size(), glid, item); - - const auto *a_begin = a.begin(); - const auto *a_end = a.end(); + using SlidingWindow1dKernel = + dpnp::kernels::sliding_window1d::SlidingWindow1dFunctor< + WorkPI, PaddedSpan, Span, Op, Red, + Span, RegistryData, RegistryWindow>; - auto sbgroup = item.get_sub_group(); - - const auto chunks_count = - CeilDiv(v.size(), sbgroup.get_max_local_range()[0]); - - const auto *a_ptr = &a.padded_begin()[glid]; - - auto _a_load_cond = [a_begin, a_end](auto &&ptr) { - return ptr >= a_begin && ptr < a_end; - }; - - auto a_data = RegistryWindow(item); - a_ptr = a_data.load(a_ptr, _a_load_cond, 0); - - const auto *v_ptr = &v.begin()[sbgroup.get_local_linear_id()]; - auto v_size = v.size(); - - for (uint32_t b = 0; b < chunks_count; ++b) { - auto v_data = RegistryData(item); - v_ptr = v_data.load(v_ptr, v_data.x() < v_size, 0); - - uint32_t chunk_size_ = - std::min(v_size, SizeT(v_data.total_size())); - process_block(results, results_num, a_data, v_data, chunk_size_, - op, red); - - if (b != chunks_count - 1) { - a_ptr = a_data.load_lane(a_data.size_y() - 1, a_ptr, - _a_load_cond, 0); - v_size -= v_data.total_size(); - } - } - - auto *const out_ptr = out.begin(); - // auto *const out_end = out.end(); - - auto y_start = glid; - auto y_stop = - std::min(y_start + WorkPI * results.size_x(), out.size()); - uint32_t i = 0; - for (uint32_t y = y_start; y < y_stop; y += results.size_x()) { - out_ptr[y] = results[i++]; - } - // while the code itself seems to be valid, inside correlate - // kernel it results in memory corruption. Further investigation - // is needed. SAT-7693 - // corruption results.store(&out_ptr[glid], - // [out_end](auto &&ptr) { return ptr < out_end; }); - }); + cgh.parallel_for( + nd_range, SlidingWindow1dKernel(a, v, op, red, out)); } -template -class sliding_window1d_small_kernel; - template &a, sycl::nd_range<1> nd_range, sycl::handler &cgh) { - cgh.parallel_for>( - nd_range, [=](sycl::nd_item<1> item) { - auto glid = get_global_linear_id(WorkPI, item); - - auto results = RegistryData(item); - results.fill(0); - - auto sbgroup = item.get_sub_group(); - auto sg_size = sbgroup.get_max_local_range()[0]; - - const uint32_t to_read = WorkPI * sg_size + v.size(); - const auto *a_begin = a.begin(); - - const auto *a_ptr = &a.padded_begin()[glid]; - const auto *a_end = std::min(a_ptr + to_read, a.end()); - - auto _a_load_cond = [a_begin, a_end](auto &&ptr) { - return ptr >= a_begin && ptr < a_end; - }; + using SlidingWindow1dSmallKernel = + dpnp::kernels::sliding_window1d::SlidingWindow1dSmallFunctor< + WorkPI, PaddedSpan, Span, Op, Red, + Span, RegistryData, RegistryWindow>; - auto a_data = RegistryWindow(item); - a_data.load(a_ptr, _a_load_cond, 0); - - const auto *v_ptr = &v.begin()[sbgroup.get_local_linear_id()]; - auto v_size = v.size(); - - auto v_data = RegistryData(item); - v_ptr = v_data.load(v_ptr, v_data.x() < v_size, 0); - - auto results_num = get_results_num(WorkPI, out.size(), glid, item); - - process_block(results, results_num, a_data, v_data, v_size, op, - red); - - auto *const out_ptr = out.begin(); - // auto *const out_end = out.end(); - - auto y_start = glid; - auto y_stop = - std::min(y_start + WorkPI * results.size_x(), out.size()); - uint32_t i = 0; - for (uint32_t y = y_start; y < y_stop; y += results.size_x()) { - out_ptr[y] = results[i++]; - } - // while the code itself seems to be valid, inside correlate - // kernel it results in memory corruption. Further investigation - // is needed. SAT-7693 - // corruption results.store(&out_ptr[glid], - // [out_end](auto &&ptr) { return ptr < out_end; }); - }); + cgh.parallel_for( + nd_range, SlidingWindow1dSmallKernel(a, v, op, red, out)); } void validate(const usm_ndarray &a, diff --git a/dpnp/backend/kernels/statistics/sliding_window1d.hpp b/dpnp/backend/kernels/statistics/sliding_window1d.hpp new file mode 100644 index 000000000000..81fd5dca850c --- /dev/null +++ b/dpnp/backend/kernels/statistics/sliding_window1d.hpp @@ -0,0 +1,276 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include +#include + +#include + +#include "ext/common.hpp" + +namespace dpnp::kernels::sliding_window1d +{ +using ext::common::CeilDiv; + +namespace detail +{ +template +SizeT get_global_linear_id(const uint32_t wpi, const sycl::nd_item<1> &item) +{ + auto sbgroup = item.get_sub_group(); + const auto sg_loc_id = sbgroup.get_local_linear_id(); + + const SizeT sg_base_id = wpi * (item.get_global_linear_id() - sg_loc_id); + const SizeT id = sg_base_id + sg_loc_id; + + return id; +} + +template +uint32_t get_results_num(const uint32_t wpi, + const SizeT size, + const SizeT global_id, + const sycl::nd_item<1> &item) +{ + auto sbgroup = item.get_sub_group(); + + const auto sbg_size = sbgroup.get_max_local_range()[0]; + const auto size_ = sycl::sub_sat(size, global_id); + return std::min(SizeT(wpi), CeilDiv(size_, sbg_size)); +} + +template +void process_block(Results &results, + uint32_t r_size, + AData &a_data, + VData &v_data, + uint32_t block_size, + Op op, + Red red) +{ + for (uint32_t i = 0; i < block_size; ++i) { + auto v_val = v_data.broadcast(i); + for (uint32_t r = 0; r < r_size; ++r) { + results[r] = red(results[r], op(a_data[r], v_val)); + } + a_data.advance_left(); + } +} +} // namespace detail + +template + class RegistryDataT, + template + class RegistryWindowT> +class SlidingWindow1dFunctor +{ +private: + const SpanT a; + const KernelT v; + const OpT op; + const RedT red; + ResultT out; + + static constexpr std::uint32_t default_reg_data_size = 1; + using SizeT = typename SpanT::size_type; + +public: + SlidingWindow1dFunctor(const SpanT &a_, + const KernelT &v_, + const OpT &op_, + const RedT &red_, + ResultT &out_) + : a(a_), v(v_), op(op_), red(red_), out(out_) + { + } + + void operator()(sycl::nd_item<1> item) const + { + auto glid = detail::get_global_linear_id(WorkPI, item); + + auto results = + RegistryDataT(item); + results.fill(0); + + auto results_num = + detail::get_results_num(WorkPI, out.size(), glid, item); + + const auto *a_begin = a.begin(); + const auto *a_end = a.end(); + + auto sbgroup = item.get_sub_group(); + + const auto chunks_count = + CeilDiv(v.size(), sbgroup.get_max_local_range()[0]); + + const auto *a_ptr = &a.padded_begin()[glid]; + + auto _a_load_cond = [a_begin, a_end](auto &&ptr) { + return ptr >= a_begin && ptr < a_end; + }; + + auto a_data = + RegistryWindowT(item); + a_ptr = a_data.load(a_ptr, _a_load_cond, 0); + + const auto *v_ptr = &v.begin()[sbgroup.get_local_linear_id()]; + auto v_size = v.size(); + + for (uint32_t b = 0; b < chunks_count; ++b) { + auto v_data = RegistryDataT(item); + v_ptr = v_data.load(v_ptr, v_data.x() < v_size, 0); + + uint32_t chunk_size_ = std::min(v_size, SizeT(v_data.total_size())); + detail::process_block(results, results_num, a_data, v_data, + chunk_size_, op, red); + + if (b != chunks_count - 1) { + a_ptr = a_data.load_lane(a_data.size_y() - 1, a_ptr, + _a_load_cond, 0); + v_size -= v_data.total_size(); + } + } + + auto *const out_ptr = out.begin(); + // auto *const out_end = out.end(); + + auto y_start = glid; + auto y_stop = std::min(y_start + WorkPI * results.size_x(), out.size()); + uint32_t i = 0; + for (uint32_t y = y_start; y < y_stop; y += results.size_x()) { + out_ptr[y] = results[i++]; + } + // while the code itself seems to be valid, inside correlate + // kernel it results in memory corruption. Further investigation + // is needed. SAT-7693 + // corruption results.store(&out_ptr[glid], + // [out_end](auto &&ptr) { return ptr < out_end; }); + } +}; + +template + class RegistryDataT, + template + class RegistryWindowT> +class SlidingWindow1dSmallFunctor +{ +private: + const SpanT a; + const KernelT v; + const OpT op; + const RedT red; + ResultT out; + + static constexpr std::uint32_t default_reg_data_size = 1; + using SizeT = typename SpanT::size_type; + +public: + SlidingWindow1dSmallFunctor(const SpanT &a_, + const KernelT &v_, + const OpT &op_, + const RedT &red_, + ResultT &out_) + : a(a_), v(v_), op(op_), red(red_), out(out_) + { + } + + void operator()(sycl::nd_item<1> item) const + { + auto glid = detail::get_global_linear_id(WorkPI, item); + + auto results = + RegistryDataT(item); + results.fill(0); + + auto sbgroup = item.get_sub_group(); + auto sg_size = sbgroup.get_max_local_range()[0]; + + const uint32_t to_read = WorkPI * sg_size + v.size(); + const auto *a_begin = a.begin(); + + const auto *a_ptr = &a.padded_begin()[glid]; + const auto *a_end = std::min(a_ptr + to_read, a.end()); + + auto _a_load_cond = [a_begin, a_end](auto &&ptr) { + return ptr >= a_begin && ptr < a_end; + }; + + auto a_data = + RegistryWindowT(item); + a_data.load(a_ptr, _a_load_cond, 0); + + const auto *v_ptr = &v.begin()[sbgroup.get_local_linear_id()]; + auto v_size = v.size(); + + auto v_data = + RegistryDataT( + item); + v_ptr = v_data.load(v_ptr, v_data.x() < v_size, 0); + + auto results_num = + detail::get_results_num(WorkPI, out.size(), glid, item); + + detail::process_block(results, results_num, a_data, v_data, v_size, op, + red); + + auto *const out_ptr = out.begin(); + // auto *const out_end = out.end(); + + auto y_start = glid; + auto y_stop = std::min(y_start + WorkPI * results.size_x(), out.size()); + uint32_t i = 0; + for (uint32_t y = y_start; y < y_stop; y += results.size_x()) { + out_ptr[y] = results[i++]; + } + // while the code itself seems to be valid, inside correlate + // kernel it results in memory corruption. Further investigation + // is needed. SAT-7693 + // corruption results.store(&out_ptr[glid], + // [out_end](auto &&ptr) { return ptr < out_end; }); + } +}; +} // namespace dpnp::kernels::sliding_window1d From 8dd42752dbd1721cf9e01bec0799eb4be3138006 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Tue, 17 Mar 2026 15:07:13 +0100 Subject: [PATCH 13/17] Disable statistics kernels from coverage report --- scripts/gen_coverage.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/gen_coverage.py b/scripts/gen_coverage.py index eaf7db1fe8bc..545fd888c1ba 100644 --- a/scripts/gen_coverage.py +++ b/scripts/gen_coverage.py @@ -260,6 +260,7 @@ def find_objects(): "-ignore-filename-regex=/tmp/icpx*", r"-ignore-filename-regex=.*/backend/kernels/elementwise_functions/.*\.hpp$", r"-ignore-filename-regex=.*/backend/kernels/indexing/.*\.hpp$", + r"-ignore-filename-regex=.*/backend/kernels/statistics/.*\.hpp$", r"-ignore-filename-regex=.*/backend/kernels/window/.*\.hpp$", "-instr-profile=" + instr_profile_fn, ] From 3147fe047bcba5973fc70c3ba6c7c4d0eabbea85 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Tue, 17 Mar 2026 15:53:30 +0100 Subject: [PATCH 14/17] Use std namespace explicitly --- .../statistics/histogram_common.hpp | 95 +++++++++++-------- .../statistics/sliding_window1d.hpp | 24 ++--- dpnp/backend/kernels/statistics/histogram.hpp | 2 +- .../kernels/statistics/sliding_window1d.hpp | 46 ++++----- 4 files changed, 93 insertions(+), 74 deletions(-) diff --git a/dpnp/backend/extensions/statistics/histogram_common.hpp b/dpnp/backend/extensions/statistics/histogram_common.hpp index b96ed7b84d71..642b948ffc1c 100644 --- a/dpnp/backend/extensions/statistics/histogram_common.hpp +++ b/dpnp/backend/extensions/statistics/histogram_common.hpp @@ -71,23 +71,26 @@ struct CachedData template void init(const sycl::nd_item<_Dims> &item) const { - uint32_t llid = item.get_local_linear_id(); + std::uint32_t llid = item.get_local_linear_id(); auto local_ptr = &local_data[0]; - uint32_t size = local_data.size(); + std::uint32_t size = local_data.size(); auto group = item.get_group(); - uint32_t local_size = group.get_local_linear_range(); + std::uint32_t local_size = group.get_local_linear_range(); - for (uint32_t i = llid; i < size; i += local_size) { + for (std::uint32_t i = llid; i < size; i += local_size) { local_ptr[i] = global_data[i]; } } - size_t size() const { return local_data.size(); } + std::size_t size() const + { + return local_data.size(); + } T &operator[](const sycl::id &id) const { return local_data[id]; } template > - T &operator[](const size_t id) const + T &operator[](const std::size_t id) const { return local_data[id]; } @@ -119,12 +122,15 @@ struct UncachedData { } - size_t size() const { return _shape.size(); } + std::size_t size() const + { + return _shape.size(); + } T &operator[](const sycl::id &id) const { return global_data[id]; } template > - T &operator[](const size_t id) const + T &operator[](const std::size_t id) const { return global_data[id]; } @@ -141,15 +147,15 @@ struct HistLocalType }; template <> -struct HistLocalType +struct HistLocalType { - using type = uint32_t; + using type = std::uint32_t; }; template <> -struct HistLocalType +struct HistLocalType { - using type = int32_t; + using type = std::int32_t; }; template ::type> @@ -161,8 +167,8 @@ struct HistWithLocalCopies using LocalHist = sycl::local_accessor; HistWithLocalCopies(T *global_data, - size_t bins_count, - int32_t copies_count, + std::size_t bins_count, + std::int32_t copies_count, sycl::handler &cgh) { local_hist = LocalHist(sycl::range<2>(copies_count, bins_count), cgh); @@ -172,23 +178,25 @@ struct HistWithLocalCopies template void init(const sycl::nd_item<_Dims> &item, localT val = 0) const { - uint32_t llid = item.get_local_linear_id(); + std::uint32_t llid = item.get_local_linear_id(); auto *local_ptr = &local_hist[0][0]; - uint32_t size = local_hist.size(); + std::uint32_t size = local_hist.size(); auto group = item.get_group(); - uint32_t local_size = group.get_local_linear_range(); + std::uint32_t local_size = group.get_local_linear_range(); - for (uint32_t i = llid; i < size; i += local_size) { + for (std::uint32_t i = llid; i < size; i += local_size) { local_ptr[i] = val; } } template - void add(const sycl::nd_item<_Dims> &item, int32_t bin, localT value) const + void add(const sycl::nd_item<_Dims> &item, + std::int32_t bin, + localT value) const { - int32_t llid = item.get_local_linear_id(); - int32_t local_hist_count = local_hist.get_range().get(0); - int32_t local_copy_id = + std::int32_t llid = item.get_local_linear_id(); + std::int32_t local_hist_count = local_hist.get_range().get(0); + std::int32_t local_copy_id = local_hist_count == 1 ? 0 : llid % local_hist_count; AtomicOp void finalize(const sycl::nd_item<_Dims> &item) const { - uint32_t llid = item.get_local_linear_id(); - uint32_t bins_count = local_hist.get_range().get(1); - uint32_t local_hist_count = local_hist.get_range().get(0); + std::uint32_t llid = item.get_local_linear_id(); + std::uint32_t bins_count = local_hist.get_range().get(1); + std::uint32_t local_hist_count = local_hist.get_range().get(0); auto group = item.get_group(); - uint32_t local_size = group.get_local_linear_range(); + std::uint32_t local_size = group.get_local_linear_range(); - for (uint32_t i = llid; i < bins_count; i += local_size) { + for (std::uint32_t i = llid; i < bins_count; i += local_size) { auto value = local_hist[0][i]; - for (uint32_t lhc = 1; lhc < local_hist_count; ++lhc) { + for (std::uint32_t lhc = 1; lhc < local_hist_count; ++lhc) { value += local_hist[lhc][i]; } if (value != T(0)) { @@ -219,7 +227,10 @@ struct HistWithLocalCopies } } - uint32_t size() const { return local_hist.size(); } + std::uint32_t size() const + { + return local_hist.size(); + } private: LocalHist local_hist; @@ -240,7 +251,7 @@ struct HistGlobalMemory } template - void add(const sycl::nd_item<_Dims> &, int32_t bin, T value) const + void add(const sycl::nd_item<_Dims> &, std::int32_t bin, T value) const { AtomicOp::add(global_hist[bin], value); @@ -255,10 +266,13 @@ struct HistGlobalMemory T *global_hist = nullptr; }; -template +template struct NoWeights { - constexpr T get(size_t) const { return 1; } + constexpr T get(std::size_t) const + { + return 1; + } }; template @@ -266,7 +280,10 @@ struct Weights { Weights(T *weights) { data = weights; } - T get(size_t id) const { return data[id]; } + T get(std::size_t id) const + { + return data[id]; + } private: T *data = nullptr; @@ -281,9 +298,9 @@ bool check_in_bounds(const dT &val, const dT &min, const dT &max) template void submit_histogram(const T *in, - const size_t size, - const size_t dims, - const uint32_t WorkPI, + const std::size_t size, + const std::size_t dims, + const std::uint32_t WorkPI, const HistImpl &hist, const Edges &edges, const Weights &weights, @@ -303,8 +320,8 @@ void validate(const usm_ndarray &sample, const std::optional &weights, const usm_ndarray &histogram); -uint32_t get_local_hist_copies_count(uint32_t loc_mem_size_in_items, - uint32_t local_size, - uint32_t hist_size_in_items); +std::uint32_t get_local_hist_copies_count(std::uint32_t loc_mem_size_in_items, + std::uint32_t local_size, + std::uint32_t hist_size_in_items); } // namespace statistics::histogram diff --git a/dpnp/backend/extensions/statistics/sliding_window1d.hpp b/dpnp/backend/extensions/statistics/sliding_window1d.hpp index 4820d0a53ad8..329c96dfc1c6 100644 --- a/dpnp/backend/extensions/statistics/sliding_window1d.hpp +++ b/dpnp/backend/extensions/statistics/sliding_window1d.hpp @@ -42,7 +42,7 @@ namespace statistics::sliding_window1d { using dpctl::tensor::usm_ndarray; -template +template class _RegistryDataStorage { public: @@ -140,7 +140,7 @@ class _RegistryDataStorage ncT data[Size]; }; -template +template struct RegistryData : public _RegistryDataStorage { using SizeT = typename _RegistryDataStorage::SizeT; @@ -332,7 +332,7 @@ struct RegistryData : public _RegistryDataStorage T *store(T *const data) { return store(data, true); } }; -template +template struct RegistryWindow : public RegistryData { using SizeT = typename RegistryData::SizeT; @@ -345,7 +345,7 @@ struct RegistryWindow : public RegistryData static_assert(std::is_integral_v, "shift must be of an integral type"); - uint32_t shift_r = this->size_x() - shift; + std::uint32_t shift_r = this->size_x() - shift; for (SizeT i = 0; i < Size; ++i) { this->data[i] = this->shift_left(i, shift); auto border = @@ -365,7 +365,7 @@ struct RegistryWindow : public RegistryData } }; -template +template class Span { public: @@ -387,13 +387,13 @@ class Span const SizeT size_; }; -template +template Span make_span(T *const data, const SizeT size) { return Span(data, size); } -template +template class PaddedSpan : public Span { public: @@ -413,14 +413,14 @@ class PaddedSpan : public Span const SizeT pad_; }; -template +template PaddedSpan make_padded_span(T *const data, const SizeT size, const SizeT offset) { return PaddedSpan(data, size, offset); } -template &a, nd_range, SlidingWindow1dKernel(a, v, op, red, out)); } -template &a, void validate(const usm_ndarray &a, const usm_ndarray &v, const usm_ndarray &out, - const size_t l_pad, - const size_t r_pad); + const std::size_t l_pad, + const std::size_t r_pad); } // namespace statistics::sliding_window1d diff --git a/dpnp/backend/kernels/statistics/histogram.hpp b/dpnp/backend/kernels/statistics/histogram.hpp index d55252157e92..6d0fedbe0bc3 100644 --- a/dpnp/backend/kernels/statistics/histogram.hpp +++ b/dpnp/backend/kernels/statistics/histogram.hpp @@ -76,7 +76,7 @@ class HistogramFunctor auto bounds = edges.get_bounds(); - for (uint32_t i = 0; i < WorkPI; ++i) { + for (std::uint32_t i = 0; i < WorkPI; ++i) { auto data_idx = id * WorkPI * local_size + i * local_size + lid; if (data_idx < size) { auto *d = &in[data_idx * dims]; diff --git a/dpnp/backend/kernels/statistics/sliding_window1d.hpp b/dpnp/backend/kernels/statistics/sliding_window1d.hpp index 81fd5dca850c..d668feffce1f 100644 --- a/dpnp/backend/kernels/statistics/sliding_window1d.hpp +++ b/dpnp/backend/kernels/statistics/sliding_window1d.hpp @@ -42,7 +42,8 @@ using ext::common::CeilDiv; namespace detail { template -SizeT get_global_linear_id(const uint32_t wpi, const sycl::nd_item<1> &item) +SizeT get_global_linear_id(const std::uint32_t wpi, + const sycl::nd_item<1> &item) { auto sbgroup = item.get_sub_group(); const auto sg_loc_id = sbgroup.get_local_linear_id(); @@ -54,10 +55,10 @@ SizeT get_global_linear_id(const uint32_t wpi, const sycl::nd_item<1> &item) } template -uint32_t get_results_num(const uint32_t wpi, - const SizeT size, - const SizeT global_id, - const sycl::nd_item<1> &item) +std::uint32_t get_results_num(const std::uint32_t wpi, + const SizeT size, + const SizeT global_id, + const sycl::nd_item<1> &item) { auto sbgroup = item.get_sub_group(); @@ -72,16 +73,16 @@ template void process_block(Results &results, - uint32_t r_size, + std::uint32_t r_size, AData &a_data, VData &v_data, - uint32_t block_size, + std::uint32_t block_size, Op op, Red red) { - for (uint32_t i = 0; i < block_size; ++i) { + for (std::uint32_t i = 0; i < block_size; ++i) { auto v_val = v_data.broadcast(i); - for (uint32_t r = 0; r < r_size; ++r) { + for (std::uint32_t r = 0; r < r_size; ++r) { results[r] = red(results[r], op(a_data[r], v_val)); } a_data.advance_left(); @@ -89,15 +90,15 @@ void process_block(Results &results, } } // namespace detail -template + template class RegistryDataT, - template + template class RegistryWindowT> class SlidingWindow1dFunctor { @@ -153,12 +154,13 @@ class SlidingWindow1dFunctor const auto *v_ptr = &v.begin()[sbgroup.get_local_linear_id()]; auto v_size = v.size(); - for (uint32_t b = 0; b < chunks_count; ++b) { + for (std::uint32_t b = 0; b < chunks_count; ++b) { auto v_data = RegistryDataT(item); v_ptr = v_data.load(v_ptr, v_data.x() < v_size, 0); - uint32_t chunk_size_ = std::min(v_size, SizeT(v_data.total_size())); + std::uint32_t chunk_size_ = + std::min(v_size, SizeT(v_data.total_size())); detail::process_block(results, results_num, a_data, v_data, chunk_size_, op, red); @@ -174,8 +176,8 @@ class SlidingWindow1dFunctor auto y_start = glid; auto y_stop = std::min(y_start + WorkPI * results.size_x(), out.size()); - uint32_t i = 0; - for (uint32_t y = y_start; y < y_stop; y += results.size_x()) { + std::uint32_t i = 0; + for (std::uint32_t y = y_start; y < y_stop; y += results.size_x()) { out_ptr[y] = results[i++]; } // while the code itself seems to be valid, inside correlate @@ -186,15 +188,15 @@ class SlidingWindow1dFunctor } }; -template + template class RegistryDataT, - template + template class RegistryWindowT> class SlidingWindow1dSmallFunctor { @@ -229,7 +231,7 @@ class SlidingWindow1dSmallFunctor auto sbgroup = item.get_sub_group(); auto sg_size = sbgroup.get_max_local_range()[0]; - const uint32_t to_read = WorkPI * sg_size + v.size(); + const std::uint32_t to_read = WorkPI * sg_size + v.size(); const auto *a_begin = a.begin(); const auto *a_ptr = &a.padded_begin()[glid]; @@ -262,8 +264,8 @@ class SlidingWindow1dSmallFunctor auto y_start = glid; auto y_stop = std::min(y_start + WorkPI * results.size_x(), out.size()); - uint32_t i = 0; - for (uint32_t y = y_start; y < y_stop; y += results.size_x()) { + std::uint32_t i = 0; + for (std::uint32_t y = y_start; y < y_stop; y += results.size_x()) { out_ptr[y] = results[i++]; } // while the code itself seems to be valid, inside correlate From 0237afbc1b28bf9412415db0d48479275bd0595b Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Tue, 17 Mar 2026 16:26:58 +0100 Subject: [PATCH 15/17] Add PR to the chnagelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a77cd9840e99..096eabef6720 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -54,6 +54,7 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum * `dpnp` uses pybind11 3.0.2 [#27734](https://github.com/IntelPython/dpnp/pull/2773) * Modified CMake files for the extension to explicitly mark DPC++ compiler and dpctl headers as system ones and so to suppress the build warning generated inside them [#2770](https://github.com/IntelPython/dpnp/pull/2770) * Updated QR tests to avoid element-wise comparisons for `raw` and `r` modes [#2785](https://github.com/IntelPython/dpnp/pull/2785) +* Moved all SYCL kernel functors from `backend/extensions/` to a unified `backend/kernels/` directory hierarchy [#2816](https://github.com/IntelPython/dpnp/pull/2816) ### Deprecated From edea5eaacbbea244aae748e5e53d8a6f94708756 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 19 Mar 2026 11:09:15 +0100 Subject: [PATCH 16/17] Apply pre-commit checks --- .../statistics/histogram_common.hpp | 25 ++++--------------- .../elementwise_functions/interpolate.cpp | 4 +-- .../kernels/statistics/sliding_window1d.hpp | 12 +++------ 3 files changed, 11 insertions(+), 30 deletions(-) diff --git a/dpnp/backend/extensions/statistics/histogram_common.hpp b/dpnp/backend/extensions/statistics/histogram_common.hpp index 642b948ffc1c..8091e8874d17 100644 --- a/dpnp/backend/extensions/statistics/histogram_common.hpp +++ b/dpnp/backend/extensions/statistics/histogram_common.hpp @@ -82,10 +82,7 @@ struct CachedData } } - std::size_t size() const - { - return local_data.size(); - } + std::size_t size() const { return local_data.size(); } T &operator[](const sycl::id &id) const { return local_data[id]; } @@ -122,10 +119,7 @@ struct UncachedData { } - std::size_t size() const - { - return _shape.size(); - } + std::size_t size() const { return _shape.size(); } T &operator[](const sycl::id &id) const { return global_data[id]; } @@ -227,10 +221,7 @@ struct HistWithLocalCopies } } - std::uint32_t size() const - { - return local_hist.size(); - } + std::uint32_t size() const { return local_hist.size(); } private: LocalHist local_hist; @@ -269,10 +260,7 @@ struct HistGlobalMemory template struct NoWeights { - constexpr T get(std::size_t) const - { - return 1; - } + constexpr T get(std::size_t) const { return 1; } }; template @@ -280,10 +268,7 @@ struct Weights { Weights(T *weights) { data = weights; } - T get(std::size_t id) const - { - return data[id]; - } + T get(std::size_t id) const { return data[id]; } private: T *data = nullptr; diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp index 1d07e548a47a..8830569ce9cf 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/interpolate.cpp @@ -142,8 +142,8 @@ struct InterpolateFactory fnT get() { if constexpr (std::is_same_v< - typename InterpolateOutputType::value_type, void>) - { + typename InterpolateOutputType::value_type, + void>) { return nullptr; } else { diff --git a/dpnp/backend/kernels/statistics/sliding_window1d.hpp b/dpnp/backend/kernels/statistics/sliding_window1d.hpp index d668feffce1f..5b3c5535afd4 100644 --- a/dpnp/backend/kernels/statistics/sliding_window1d.hpp +++ b/dpnp/backend/kernels/statistics/sliding_window1d.hpp @@ -96,10 +96,8 @@ template - class RegistryDataT, - template - class RegistryWindowT> + template class RegistryDataT, + template class RegistryWindowT> class SlidingWindow1dFunctor { private: @@ -194,10 +192,8 @@ template - class RegistryDataT, - template - class RegistryWindowT> + template class RegistryDataT, + template class RegistryWindowT> class SlidingWindow1dSmallFunctor { private: From 1de418693a68e207c4b7203286dab3b77d81e37d Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 19 Mar 2026 15:02:20 +0100 Subject: [PATCH 17/17] Add missing include per review comment --- dpnp/backend/extensions/indexing/choose.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dpnp/backend/extensions/indexing/choose.cpp b/dpnp/backend/extensions/indexing/choose.cpp index fa128954aeb6..3b2df73f46ef 100644 --- a/dpnp/backend/extensions/indexing/choose.cpp +++ b/dpnp/backend/extensions/indexing/choose.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include