From b3472b0971a712e160385485aa475fbbe3bb531a Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 4 Mar 2026 21:11:19 +0100 Subject: [PATCH 01/10] Remove not used includes in hernel headers of elementwise functions --- .../libtensor/include/kernels/elementwise_functions/common.hpp | 3 --- .../include/kernels/elementwise_functions/logaddexp.hpp | 1 - .../include/kernels/elementwise_functions/maximum.hpp | 1 - .../include/kernels/elementwise_functions/minimum.hpp | 1 - 4 files changed, 6 deletions(-) diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/common.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/common.hpp index d19930b722a9..e83426df8aa9 100644 --- a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/common.hpp +++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/common.hpp @@ -33,11 +33,8 @@ #pragma once #include -#include -#include #include #include -#include #include #include diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp index 8565df2cf528..7337b6e43eab 100644 --- a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp +++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp @@ -46,7 +46,6 @@ #include "vec_size_util.hpp" #include "utils/math_utils.hpp" -#include "utils/offset_utils.hpp" #include "utils/type_dispatch_building.hpp" #include "utils/type_utils.hpp" diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp index 067ccd84f059..f204b6640042 100644 --- a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp +++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp @@ -45,7 +45,6 @@ #include "vec_size_util.hpp" #include "utils/math_utils.hpp" -#include "utils/offset_utils.hpp" #include "utils/type_dispatch_building.hpp" #include "utils/type_utils.hpp" diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp index a38945f89a25..d18577a5cf4e 100644 --- a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp +++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp @@ -44,7 +44,6 @@ #include "vec_size_util.hpp" #include "utils/math_utils.hpp" -#include "utils/offset_utils.hpp" #include "utils/type_dispatch_building.hpp" #include "utils/type_utils.hpp" From 706be59f56afe363f064fd0231cc3926c25f2693 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 4 Mar 2026 21:19:18 +0100 Subject: [PATCH 02/10] Remove unused includes in other kernels --- dpctl_ext/tensor/libtensor/include/kernels/constructors.hpp | 2 +- .../libtensor/include/kernels/integer_advanced_indexing.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dpctl_ext/tensor/libtensor/include/kernels/constructors.hpp b/dpctl_ext/tensor/libtensor/include/kernels/constructors.hpp index 26ae46707a6b..f48dfa4d4077 100644 --- a/dpctl_ext/tensor/libtensor/include/kernels/constructors.hpp +++ b/dpctl_ext/tensor/libtensor/include/kernels/constructors.hpp @@ -33,8 +33,8 @@ //===----------------------------------------------------------------------===// #pragma once + #include -#include #include #include diff --git a/dpctl_ext/tensor/libtensor/include/kernels/integer_advanced_indexing.hpp b/dpctl_ext/tensor/libtensor/include/kernels/integer_advanced_indexing.hpp index 7be2b3ea8591..f6d2f0175ce8 100644 --- a/dpctl_ext/tensor/libtensor/include/kernels/integer_advanced_indexing.hpp +++ b/dpctl_ext/tensor/libtensor/include/kernels/integer_advanced_indexing.hpp @@ -33,7 +33,7 @@ //===----------------------------------------------------------------------===// #pragma once -#include + #include #include #include From 2508c8520c9ba872881eb5f176d53ad9f13becda Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 4 Mar 2026 21:33:35 +0100 Subject: [PATCH 03/10] Remove unused includes in libtensor headers --- dpctl_ext/tensor/libtensor/source/accumulators.hpp | 1 - dpctl_ext/tensor/libtensor/source/zeros_ctor.hpp | 1 - 2 files changed, 2 deletions(-) diff --git a/dpctl_ext/tensor/libtensor/source/accumulators.hpp b/dpctl_ext/tensor/libtensor/source/accumulators.hpp index 42503093789b..e400aad2dceb 100644 --- a/dpctl_ext/tensor/libtensor/source/accumulators.hpp +++ b/dpctl_ext/tensor/libtensor/source/accumulators.hpp @@ -39,7 +39,6 @@ #include #include "dpnp4pybind11.hpp" -#include namespace dpctl::tensor::py_internal { diff --git a/dpctl_ext/tensor/libtensor/source/zeros_ctor.hpp b/dpctl_ext/tensor/libtensor/source/zeros_ctor.hpp index 51a1903a0f36..d104e37f5533 100644 --- a/dpctl_ext/tensor/libtensor/source/zeros_ctor.hpp +++ b/dpctl_ext/tensor/libtensor/source/zeros_ctor.hpp @@ -39,7 +39,6 @@ #include #include "dpnp4pybind11.hpp" -#include namespace dpctl::tensor::py_internal { From 499d4b4521b2fc844d3b52a4d9493e08dda7d294 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 4 Mar 2026 21:51:06 +0100 Subject: [PATCH 04/10] Remove unused includes in libtensor sources --- dpctl_ext/tensor/libtensor/source/accumulators.cpp | 3 --- dpctl_ext/tensor/libtensor/source/device_support_queries.cpp | 2 +- dpctl_ext/tensor/libtensor/source/full_ctor.cpp | 2 -- .../tensor/libtensor/source/integer_advanced_indexing.cpp | 3 --- dpctl_ext/tensor/libtensor/source/tensor_ctors.cpp | 1 - dpctl_ext/tensor/libtensor/source/zeros_ctor.cpp | 2 -- 6 files changed, 1 insertion(+), 12 deletions(-) diff --git a/dpctl_ext/tensor/libtensor/source/accumulators.cpp b/dpctl_ext/tensor/libtensor/source/accumulators.cpp index 82913010755a..3e06c142d7c1 100644 --- a/dpctl_ext/tensor/libtensor/source/accumulators.cpp +++ b/dpctl_ext/tensor/libtensor/source/accumulators.cpp @@ -32,7 +32,6 @@ /// This file defines functions of dpctl.tensor._tensor_impl extensions //===----------------------------------------------------------------------===// -#include #include #include #include @@ -42,10 +41,8 @@ #include "dpnp4pybind11.hpp" #include -#include #include "kernels/accumulators.hpp" -#include "simplify_iteration_space.hpp" #include "utils/memory_overlap.hpp" #include "utils/offset_utils.hpp" #include "utils/output_validation.hpp" diff --git a/dpctl_ext/tensor/libtensor/source/device_support_queries.cpp b/dpctl_ext/tensor/libtensor/source/device_support_queries.cpp index 97a8ba83831e..3cc0952c2080 100644 --- a/dpctl_ext/tensor/libtensor/source/device_support_queries.cpp +++ b/dpctl_ext/tensor/libtensor/source/device_support_queries.cpp @@ -36,7 +36,7 @@ #include "dpnp4pybind11.hpp" #include -#include + #include namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/full_ctor.cpp b/dpctl_ext/tensor/libtensor/source/full_ctor.cpp index aef57836666e..ca4a17f28f77 100644 --- a/dpctl_ext/tensor/libtensor/source/full_ctor.cpp +++ b/dpctl_ext/tensor/libtensor/source/full_ctor.cpp @@ -32,7 +32,6 @@ /// This file defines functions of dpctl.tensor._tensor_impl extensions //===--------------------------------------------------------------------===// -#include #include #include #include @@ -42,7 +41,6 @@ #include #include "dpnp4pybind11.hpp" -#include #include #include "kernels/constructors.hpp" diff --git a/dpctl_ext/tensor/libtensor/source/integer_advanced_indexing.cpp b/dpctl_ext/tensor/libtensor/source/integer_advanced_indexing.cpp index 925cc2e895ed..c6021bdfd2d1 100644 --- a/dpctl_ext/tensor/libtensor/source/integer_advanced_indexing.cpp +++ b/dpctl_ext/tensor/libtensor/source/integer_advanced_indexing.cpp @@ -34,7 +34,6 @@ //===----------------------------------------------------------------------===// #include -#include #include #include #include @@ -47,9 +46,7 @@ #include #include "dpnp4pybind11.hpp" -#include #include -#include #include "kernels/integer_advanced_indexing.hpp" #include "utils/memory_overlap.hpp" diff --git a/dpctl_ext/tensor/libtensor/source/tensor_ctors.cpp b/dpctl_ext/tensor/libtensor/source/tensor_ctors.cpp index 98ab488e5879..76f6d8bed4a5 100644 --- a/dpctl_ext/tensor/libtensor/source/tensor_ctors.cpp +++ b/dpctl_ext/tensor/libtensor/source/tensor_ctors.cpp @@ -58,7 +58,6 @@ #include "kernels/dpctl_tensor_types.hpp" // #include "linear_sequences.hpp" // #include "repeat.hpp" -#include "simplify_iteration_space.hpp" #include "triul_ctor.hpp" #include "utils/memory_overlap.hpp" #include "utils/strided_iters.hpp" diff --git a/dpctl_ext/tensor/libtensor/source/zeros_ctor.cpp b/dpctl_ext/tensor/libtensor/source/zeros_ctor.cpp index 2eb05e49f382..b9a2e01bea4a 100644 --- a/dpctl_ext/tensor/libtensor/source/zeros_ctor.cpp +++ b/dpctl_ext/tensor/libtensor/source/zeros_ctor.cpp @@ -32,7 +32,6 @@ /// This file defines functions of dpctl.tensor._tensor_impl extensions //===--------------------------------------------------------------------===// -#include #include #include #include @@ -41,7 +40,6 @@ #include #include "dpnp4pybind11.hpp" -#include #include #include "utils/output_validation.hpp" From 16fd6c9b463b82069c05ccbb101be4bbc6ab51c8 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 4 Mar 2026 21:52:22 +0100 Subject: [PATCH 05/10] Add required but missing includes in libtensor sources --- dpctl_ext/tensor/libtensor/source/accumulators.cpp | 3 +++ dpctl_ext/tensor/libtensor/source/full_ctor.cpp | 2 ++ 2 files changed, 5 insertions(+) diff --git a/dpctl_ext/tensor/libtensor/source/accumulators.cpp b/dpctl_ext/tensor/libtensor/source/accumulators.cpp index 3e06c142d7c1..6d57b034bfd0 100644 --- a/dpctl_ext/tensor/libtensor/source/accumulators.cpp +++ b/dpctl_ext/tensor/libtensor/source/accumulators.cpp @@ -35,6 +35,9 @@ #include #include #include +#include +#include +#include #include #include diff --git a/dpctl_ext/tensor/libtensor/source/full_ctor.cpp b/dpctl_ext/tensor/libtensor/source/full_ctor.cpp index ca4a17f28f77..cbc662b76c8d 100644 --- a/dpctl_ext/tensor/libtensor/source/full_ctor.cpp +++ b/dpctl_ext/tensor/libtensor/source/full_ctor.cpp @@ -44,7 +44,9 @@ #include #include "kernels/constructors.hpp" +#include "utils/offset_utils.hpp" #include "utils/output_validation.hpp" +#include "utils/sycl_alloc_utils.hpp" #include "utils/type_dispatch.hpp" #include "utils/type_utils.hpp" From f50240811f2994add485430b76e77e927b44704a Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 4 Mar 2026 22:00:14 +0100 Subject: [PATCH 06/10] Remove unused declarations in libtensor sources --- dpctl_ext/tensor/libtensor/source/tensor_ctors.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/dpctl_ext/tensor/libtensor/source/tensor_ctors.cpp b/dpctl_ext/tensor/libtensor/source/tensor_ctors.cpp index 76f6d8bed4a5..b5c89e5ec753 100644 --- a/dpctl_ext/tensor/libtensor/source/tensor_ctors.cpp +++ b/dpctl_ext/tensor/libtensor/source/tensor_ctors.cpp @@ -71,9 +71,6 @@ static_assert(std::is_same_v); namespace { -using dpctl::tensor::c_contiguous_strides; -using dpctl::tensor::f_contiguous_strides; - using dpctl::tensor::overlap::MemoryOverlap; using dpctl::tensor::overlap::SameLogicalTensors; From 1d78afd6b94cd5f6e688ebc768c2f3160f92f8b6 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 4 Mar 2026 22:12:58 +0100 Subject: [PATCH 07/10] Remove redundant namespace qualifications --- dpctl_ext/tensor/libtensor/source/accumulators.cpp | 8 ++++---- .../libtensor/source/boolean_advanced_indexing.cpp | 12 ++++++------ .../libtensor/source/copy_and_cast_usm_to_usm.cpp | 9 ++++----- dpctl_ext/tensor/libtensor/source/copy_as_contig.cpp | 8 ++++---- dpctl_ext/tensor/libtensor/source/copy_for_roll.cpp | 9 ++++----- 5 files changed, 22 insertions(+), 24 deletions(-) diff --git a/dpctl_ext/tensor/libtensor/source/accumulators.cpp b/dpctl_ext/tensor/libtensor/source/accumulators.cpp index 6d57b034bfd0..88daa63e7dbf 100644 --- a/dpctl_ext/tensor/libtensor/source/accumulators.cpp +++ b/dpctl_ext/tensor/libtensor/source/accumulators.cpp @@ -196,8 +196,8 @@ std::size_t py_mask_positions(const dpctl::tensor::usm_ndarray &mask, int mask_nd = mask.get_ndim(); int nd = mask_nd; - dpctl::tensor::py_internal::compact_iteration_space( - nd, shape, strides_vector, compact_shape, compact_strides); + compact_iteration_space(nd, shape, strides_vector, compact_shape, + compact_strides); // Strided implementation auto strided_fn = @@ -351,8 +351,8 @@ std::size_t py_cumsum_1d(const dpctl::tensor::usm_ndarray &src, int src_nd = src.get_ndim(); int nd = src_nd; - dpctl::tensor::py_internal::compact_iteration_space( - nd, shape, strides_vector, compact_shape, compact_strides); + compact_iteration_space(nd, shape, strides_vector, compact_shape, + compact_strides); // Strided implementation auto strided_fn = cumsum_1d_strided_dispatch_vector[src_typeid]; diff --git a/dpctl_ext/tensor/libtensor/source/boolean_advanced_indexing.cpp b/dpctl_ext/tensor/libtensor/source/boolean_advanced_indexing.cpp index a78cb1750b81..82044034db89 100644 --- a/dpctl_ext/tensor/libtensor/source/boolean_advanced_indexing.cpp +++ b/dpctl_ext/tensor/libtensor/source/boolean_advanced_indexing.cpp @@ -336,7 +336,7 @@ std::pair shT masked_src_shape; shT ortho_src_strides; shT masked_src_strides; - dpctl::tensor::py_internal::split_iteration_space( + split_iteration_space( src_shape_vec, src_strides_vec, axis_start, axis_end, ortho_src_shape, masked_src_shape, // 4 vectors modified @@ -346,7 +346,7 @@ std::pair shT masked_dst_shape; shT ortho_dst_strides; shT masked_dst_strides; - dpctl::tensor::py_internal::split_iteration_space( + split_iteration_space( dst_shape_vec, dst_strides_vec, axis_start, axis_start + 1, ortho_dst_shape, masked_dst_shape, // 4 vectors modified @@ -366,7 +366,7 @@ std::pair py::ssize_t ortho_src_offset(0); py::ssize_t ortho_dst_offset(0); - dpctl::tensor::py_internal::simplify_iteration_space( + simplify_iteration_space( ortho_nd, _shape, ortho_src_strides, ortho_dst_strides, // output simplified_ortho_shape, simplified_ortho_src_strides, @@ -646,7 +646,7 @@ std::pair shT masked_dst_shape; shT ortho_dst_strides; shT masked_dst_strides; - dpctl::tensor::py_internal::split_iteration_space( + split_iteration_space( dst_shape_vec, dst_strides_vec, axis_start, axis_end, ortho_dst_shape, masked_dst_shape, // 4 vectors modified @@ -656,7 +656,7 @@ std::pair shT masked_rhs_shape; shT ortho_rhs_strides; shT masked_rhs_strides; - dpctl::tensor::py_internal::split_iteration_space( + split_iteration_space( rhs_shape_vec, rhs_strides_vec, axis_start, axis_start + 1, ortho_rhs_shape, masked_rhs_shape, // 4 vectors modified @@ -676,7 +676,7 @@ std::pair py::ssize_t ortho_dst_offset(0); py::ssize_t ortho_rhs_offset(0); - dpctl::tensor::py_internal::simplify_iteration_space( + simplify_iteration_space( ortho_nd, _shape, ortho_dst_strides, ortho_rhs_strides, simplified_ortho_shape, simplified_ortho_dst_strides, simplified_ortho_rhs_strides, ortho_dst_offset, ortho_rhs_offset); diff --git a/dpctl_ext/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp b/dpctl_ext/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp index 3d20be02f885..9ea49ae1d88b 100644 --- a/dpctl_ext/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp +++ b/dpctl_ext/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp @@ -188,11 +188,10 @@ std::pair copy_usm_ndarray_into_usm_ndarray( const py::ssize_t *shape = src_shape; // nd, simplified_* and *_offset are modified by reference - dpctl::tensor::py_internal::simplify_iteration_space( - nd, shape, src_strides, dst_strides, - // output - simplified_shape, simplified_src_strides, simplified_dst_strides, - src_offset, dst_offset); + simplify_iteration_space(nd, shape, src_strides, dst_strides, + // output + simplified_shape, simplified_src_strides, + simplified_dst_strides, src_offset, dst_offset); if (nd < 2) { if (nd == 1) { diff --git a/dpctl_ext/tensor/libtensor/source/copy_as_contig.cpp b/dpctl_ext/tensor/libtensor/source/copy_as_contig.cpp index bbee24c95d4d..3f3d657f0055 100644 --- a/dpctl_ext/tensor/libtensor/source/copy_as_contig.cpp +++ b/dpctl_ext/tensor/libtensor/source/copy_as_contig.cpp @@ -225,7 +225,7 @@ std::pair int nd = src_nd; // nd, simplified_* and *_offset are modified by reference - dpctl::tensor::py_internal::simplify_iteration_space( + simplify_iteration_space( nd, src_shape_vec.data(), src_strides_vec, dst.get_strides_vector(), // output simplified_shape, simplified_src_strides, simplified_dst_strides, @@ -359,7 +359,7 @@ std::pair int nd = src_nd; // nd, simplified_* and *_offset are modified by reference - dpctl::tensor::py_internal::simplify_iteration_space( + simplify_iteration_space( nd, src_shape_vec.data(), src_strides_vec, dst.get_strides_vector(), // output simplified_shape, simplified_src_strides, simplified_dst_strides, @@ -521,7 +521,7 @@ std::pair int nd = static_cast(batch_shape_vec.size()); // nd, simplified_* and *_offset are modified by reference - dpctl::tensor::py_internal::simplify_iteration_space( + simplify_iteration_space( nd, batch_shape_vec.data(), src_batch_strides_vec, dst_batch_strides_vec, // output @@ -714,7 +714,7 @@ std::pair int nd = static_cast(batch_shape_vec.size()); // nd, simplified_* and *_offset are modified by reference - dpctl::tensor::py_internal::simplify_iteration_space( + simplify_iteration_space( nd, batch_shape_vec.data(), src_batch_strides_vec, dst_batch_strides_vec, // output diff --git a/dpctl_ext/tensor/libtensor/source/copy_for_roll.cpp b/dpctl_ext/tensor/libtensor/source/copy_for_roll.cpp index a187b2247677..7742c1c96a4e 100644 --- a/dpctl_ext/tensor/libtensor/source/copy_for_roll.cpp +++ b/dpctl_ext/tensor/libtensor/source/copy_for_roll.cpp @@ -197,11 +197,10 @@ std::pair const py::ssize_t *shape = src_shape_ptr; // nd, simplified_* and *_offset are modified by reference - dpctl::tensor::py_internal::simplify_iteration_space( - nd, shape, src_strides, dst_strides, - // output - simplified_shape, simplified_src_strides, simplified_dst_strides, - src_offset, dst_offset); + simplify_iteration_space(nd, shape, src_strides, dst_strides, + // output + simplified_shape, simplified_src_strides, + simplified_dst_strides, src_offset, dst_offset); if (nd == 1 && simplified_src_strides[0] == 1 && simplified_dst_strides[0] == 1) { From 6a0d36d5a8698c0a04acc8f83c47fa774a7f6bb7 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 4 Mar 2026 13:21:08 -0800 Subject: [PATCH 08/10] Apply pre-commit formatting rules --- .../source/boolean_advanced_indexing.cpp | 36 +++++++--------- .../libtensor/source/copy_as_contig.cpp | 42 +++++++++---------- 2 files changed, 36 insertions(+), 42 deletions(-) diff --git a/dpctl_ext/tensor/libtensor/source/boolean_advanced_indexing.cpp b/dpctl_ext/tensor/libtensor/source/boolean_advanced_indexing.cpp index 82044034db89..4c46e1e2fec8 100644 --- a/dpctl_ext/tensor/libtensor/source/boolean_advanced_indexing.cpp +++ b/dpctl_ext/tensor/libtensor/source/boolean_advanced_indexing.cpp @@ -336,21 +336,19 @@ std::pair shT masked_src_shape; shT ortho_src_strides; shT masked_src_strides; - split_iteration_space( - src_shape_vec, src_strides_vec, axis_start, axis_end, - ortho_src_shape, - masked_src_shape, // 4 vectors modified - ortho_src_strides, masked_src_strides); + split_iteration_space(src_shape_vec, src_strides_vec, axis_start, + axis_end, ortho_src_shape, + masked_src_shape, // 4 vectors modified + ortho_src_strides, masked_src_strides); shT ortho_dst_shape; shT masked_dst_shape; shT ortho_dst_strides; shT masked_dst_strides; - split_iteration_space( - dst_shape_vec, dst_strides_vec, axis_start, axis_start + 1, - ortho_dst_shape, - masked_dst_shape, // 4 vectors modified - ortho_dst_strides, masked_dst_strides); + split_iteration_space(dst_shape_vec, dst_strides_vec, axis_start, + axis_start + 1, ortho_dst_shape, + masked_dst_shape, // 4 vectors modified + ortho_dst_strides, masked_dst_strides); assert(ortho_src_shape.size() == static_cast(ortho_nd)); assert(ortho_dst_shape.size() == static_cast(ortho_nd)); @@ -646,21 +644,19 @@ std::pair shT masked_dst_shape; shT ortho_dst_strides; shT masked_dst_strides; - split_iteration_space( - dst_shape_vec, dst_strides_vec, axis_start, axis_end, - ortho_dst_shape, - masked_dst_shape, // 4 vectors modified - ortho_dst_strides, masked_dst_strides); + split_iteration_space(dst_shape_vec, dst_strides_vec, axis_start, + axis_end, ortho_dst_shape, + masked_dst_shape, // 4 vectors modified + ortho_dst_strides, masked_dst_strides); shT ortho_rhs_shape; shT masked_rhs_shape; shT ortho_rhs_strides; shT masked_rhs_strides; - split_iteration_space( - rhs_shape_vec, rhs_strides_vec, axis_start, axis_start + 1, - ortho_rhs_shape, - masked_rhs_shape, // 4 vectors modified - ortho_rhs_strides, masked_rhs_strides); + split_iteration_space(rhs_shape_vec, rhs_strides_vec, axis_start, + axis_start + 1, ortho_rhs_shape, + masked_rhs_shape, // 4 vectors modified + ortho_rhs_strides, masked_rhs_strides); assert(ortho_dst_shape.size() == static_cast(ortho_nd)); assert(ortho_rhs_shape.size() == static_cast(ortho_nd)); diff --git a/dpctl_ext/tensor/libtensor/source/copy_as_contig.cpp b/dpctl_ext/tensor/libtensor/source/copy_as_contig.cpp index 3f3d657f0055..5d78862651fc 100644 --- a/dpctl_ext/tensor/libtensor/source/copy_as_contig.cpp +++ b/dpctl_ext/tensor/libtensor/source/copy_as_contig.cpp @@ -225,11 +225,11 @@ std::pair int nd = src_nd; // nd, simplified_* and *_offset are modified by reference - simplify_iteration_space( - nd, src_shape_vec.data(), src_strides_vec, dst.get_strides_vector(), - // output - simplified_shape, simplified_src_strides, simplified_dst_strides, - src_offset, dst_offset); + simplify_iteration_space(nd, src_shape_vec.data(), src_strides_vec, + dst.get_strides_vector(), + // output + simplified_shape, simplified_src_strides, + simplified_dst_strides, src_offset, dst_offset); if (!((0 == src_offset) && (0 == dst_offset))) { throw std::runtime_error( @@ -359,11 +359,11 @@ std::pair int nd = src_nd; // nd, simplified_* and *_offset are modified by reference - simplify_iteration_space( - nd, src_shape_vec.data(), src_strides_vec, dst.get_strides_vector(), - // output - simplified_shape, simplified_src_strides, simplified_dst_strides, - src_offset, dst_offset); + simplify_iteration_space(nd, src_shape_vec.data(), src_strides_vec, + dst.get_strides_vector(), + // output + simplified_shape, simplified_src_strides, + simplified_dst_strides, src_offset, dst_offset); if (!((0 == src_offset) && (0 == dst_offset))) { throw std::runtime_error( @@ -521,12 +521,11 @@ std::pair int nd = static_cast(batch_shape_vec.size()); // nd, simplified_* and *_offset are modified by reference - simplify_iteration_space( - nd, batch_shape_vec.data(), src_batch_strides_vec, - dst_batch_strides_vec, - // output - simplified_shape, simplified_src_strides, simplified_dst_strides, - src_offset, dst_offset); + simplify_iteration_space(nd, batch_shape_vec.data(), src_batch_strides_vec, + dst_batch_strides_vec, + // output + simplified_shape, simplified_src_strides, + simplified_dst_strides, src_offset, dst_offset); if (!((0 == src_offset) && (0 == dst_offset))) { throw std::runtime_error( @@ -714,12 +713,11 @@ std::pair int nd = static_cast(batch_shape_vec.size()); // nd, simplified_* and *_offset are modified by reference - simplify_iteration_space( - nd, batch_shape_vec.data(), src_batch_strides_vec, - dst_batch_strides_vec, - // output - simplified_shape, simplified_src_strides, simplified_dst_strides, - src_offset, dst_offset); + simplify_iteration_space(nd, batch_shape_vec.data(), src_batch_strides_vec, + dst_batch_strides_vec, + // output + simplified_shape, simplified_src_strides, + simplified_dst_strides, src_offset, dst_offset); if (!((0 == src_offset) && (0 == dst_offset))) { throw std::runtime_error( From 3ca9cf676c95cd90797794cf679edd246fb0d0bf Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 4 Mar 2026 22:58:13 +0100 Subject: [PATCH 09/10] Revert removing of import simplify_iteration_space.hpp since required --- dpctl_ext/tensor/libtensor/source/accumulators.cpp | 1 + dpctl_ext/tensor/libtensor/source/tensor_ctors.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/dpctl_ext/tensor/libtensor/source/accumulators.cpp b/dpctl_ext/tensor/libtensor/source/accumulators.cpp index 88daa63e7dbf..c6ab96418d47 100644 --- a/dpctl_ext/tensor/libtensor/source/accumulators.cpp +++ b/dpctl_ext/tensor/libtensor/source/accumulators.cpp @@ -46,6 +46,7 @@ #include #include "kernels/accumulators.hpp" +#include "simplify_iteration_space.hpp" #include "utils/memory_overlap.hpp" #include "utils/offset_utils.hpp" #include "utils/output_validation.hpp" diff --git a/dpctl_ext/tensor/libtensor/source/tensor_ctors.cpp b/dpctl_ext/tensor/libtensor/source/tensor_ctors.cpp index b5c89e5ec753..7b151c773fe0 100644 --- a/dpctl_ext/tensor/libtensor/source/tensor_ctors.cpp +++ b/dpctl_ext/tensor/libtensor/source/tensor_ctors.cpp @@ -58,6 +58,7 @@ #include "kernels/dpctl_tensor_types.hpp" // #include "linear_sequences.hpp" // #include "repeat.hpp" +#include "simplify_iteration_space.hpp" #include "triul_ctor.hpp" #include "utils/memory_overlap.hpp" #include "utils/strided_iters.hpp" From b6d129fcf1cfdcb4d400a3cff8dff0f01f33d29b Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 5 Mar 2026 00:43:37 +0100 Subject: [PATCH 10/10] Revert include removal of from full_ctor.cpp - FullContigFactory is instantiated for all types including complex types - full_contig_impl uses py::cast: dstTy fill_v = py::cast(py_value); - When dstTy is std::complex or std::complex, pybind11 needs to know how to convert a Python object to that C++ type - registers the type caster for std::complex, enabling py::cast>() to work --- dpctl_ext/tensor/libtensor/source/full_ctor.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dpctl_ext/tensor/libtensor/source/full_ctor.cpp b/dpctl_ext/tensor/libtensor/source/full_ctor.cpp index cbc662b76c8d..dfe1d25b769c 100644 --- a/dpctl_ext/tensor/libtensor/source/full_ctor.cpp +++ b/dpctl_ext/tensor/libtensor/source/full_ctor.cpp @@ -41,6 +41,7 @@ #include #include "dpnp4pybind11.hpp" +#include // py::cast> #include #include "kernels/constructors.hpp"