IntelPython
diff --git a/‎dpctl_ext/tensor/CMakeLists.txt‎
Lines changed: 4 additions & 4 deletions b/‎dpctl_ext/tensor/CMakeLists.txt‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎dpctl_ext/tensor/__init__.py‎
Lines changed: 8 additions & 0 deletions b/‎dpctl_ext/tensor/__init__.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎dpctl_ext/tensor/_elementwise_funcs.py‎
Lines changed: 111 additions & 0 deletions b/‎dpctl_ext/tensor/_elementwise_funcs.py‎
Lines changed: 111 additions & 0 deletions
diff --git a/‎dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp‎
Lines changed: 224 additions & 0 deletions b/‎dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp‎
Lines changed: 224 additions & 0 deletions
@@ -136,11 +136,11 @@ set(_elementwise_sources
     ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/signbit.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sin.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sinh.cpp
-    #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sqrt.cpp
-    #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/square.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sqrt.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/square.cpp
     #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/subtract.cpp
-    #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/tan.cpp
-    #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/tanh.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/tan.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/tanh.cpp
     #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/true_divide.cpp
     #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/trunc.cpp
 )
 
@@ -118,6 +118,10 @@
     signbit,
     sin,
     sinh,
+    sqrt,
+    square,
+    tan,
+    tanh,
 )
 from ._reduction import (
     argmax,
@@ -230,12 +234,16 @@
     "sin",
     "sinh",
     "sort",
+    "sqrt",
+    "square",
     "squeeze",
     "stack",
     "sum",
     "swapaxes",
     "take",
     "take_along_axis",
+    "tan",
+    "tanh",
     "tile",
     "top_k",
     "to_numpy",
 
@@ -931,6 +931,117 @@
 )
 del _sinh_docstring
 
+# U32: ==== SQUARE      (x)
+_square_docstring_ = r"""
+square(x, /, \*, out=None, order='K')
+
+Squares each element `x_i` of input array `x`.
+
+Args:
+    x (usm_ndarray):
+        Input array. May have any data type.
+    out (Union[usm_ndarray, None], optional):
+        Output array to populate.
+        Array must have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the new output array, if parameter
+        `out` is ``None``.
+        Default: "K".
+
+Returns:
+    usm_ndarray:
+        An array containing the element-wise squares of `x`. The data type of
+        the returned array is determined by the Type Promotion Rules.
+"""
+
+square = UnaryElementwiseFunc(
+    "square", ti._square_result_type, ti._square, _square_docstring_
+)
+del _square_docstring_
+
+# U33: ==== SQRT        (x)
+_sqrt_docstring_ = r"""
+sqrt(x, /, \*, out=None, order='K')
+
+Computes the positive square-root for each element `x_i` of input array `x`.
+
+Args:
+    x (usm_ndarray):
+        Input array, expected to have a floating-point data type.
+    out (Union[usm_ndarray, None], optional):
+        Output array to populate.
+        Array must have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the new output array, if parameter
+        `out` is ``None``.
+        Default: "K".
+
+Returns:
+    usm_ndarray:
+        An array containing the element-wise positive square-roots of `x`. The
+        data type of the returned array is determined by the Type Promotion
+        Rules.
+"""
+
+sqrt = UnaryElementwiseFunc(
+    "sqrt", ti._sqrt_result_type, ti._sqrt, _sqrt_docstring_
+)
+del _sqrt_docstring_
+
+# U34: ==== TAN         (x)
+_tan_docstring = r"""
+tan(x, /, \*, out=None, order='K')
+
+Computes tangent for each element `x_i` for input array `x`.
+
+Args:
+    x (usm_ndarray):
+        Input array, expected to have a floating-point data type.
+    out (Union[usm_ndarray, None], optional):
+        Output array to populate.
+        Array must have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the new output array, if parameter
+        `out` is ``None``.
+        Default: "K".
+
+Returns:
+    usm_ndarray:
+        An array containing the element-wise tangent. The data type
+        of the returned array is determined by the Type Promotion Rules.
+"""
+
+tan = UnaryElementwiseFunc("tan", ti._tan_result_type, ti._tan, _tan_docstring)
+del _tan_docstring
+
+# U35: ==== TANH        (x)
+_tanh_docstring = r"""
+tanh(x, /, \*, out=None, order='K')
+
+Computes hyperbolic tangent for each element `x_i` for input array `x`.
+
+Args:
+    x (usm_ndarray):
+        Input array, expected to have a floating-point data type.
+    out (Union[usm_ndarray, None], optional):
+        Output array to populate.
+        Array must have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the new output array, if parameter
+        `out` is ``None``.
+        Default: "K".
+
+Returns:
+    usm_ndarray:
+        An array containing the element-wise hyperbolic tangent. The data type
+        of the returned array is determined by the Type Promotion Rules.
+"""
+
+tanh = UnaryElementwiseFunc(
+    "tanh", ti._tanh_result_type, ti._tanh, _tanh_docstring
+)
+del _tanh_docstring
+
 # U40: ==== PROJ        (x)
 _proj_docstring = r"""
 proj(x, /, \*, out=None, order='K')
 
@@ -0,0 +1,224 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines kernels for elementwise evaluation of SQRT(x)
+/// function that compute a square root.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "sycl_complex.hpp"
+#include "vec_size_util.hpp"
+
+#include "kernels/dpctl_tensor_types.hpp"
+#include "kernels/elementwise_functions/common.hpp"
+
+#include "utils/type_dispatch_building.hpp"
+#include "utils/type_utils.hpp"
+
+namespace dpctl::tensor::kernels::sqrt
+{
+
+using dpctl::tensor::ssize_t;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+using dpctl::tensor::type_utils::is_complex;
+
+template <typename argT, typename resT>
+struct SqrtFunctor
+{
+
+    // is function constant for given argT
+    using is_constant = typename std::false_type;
+    // constant value, if constant
+    // constexpr resT constant_value = resT{};
+    // is function defined for sycl::vec
+    using supports_vec = typename std::false_type;
+    // do both argTy and resTy support sugroup store/load operation
+    using supports_sg_loadstore = typename std::negation<
+        std::disjunction<is_complex<resT>, is_complex<argT>>>;
+
+    resT operator()(const argT &in) const
+    {
+        if constexpr (is_complex<argT>::value) {
+            using realT = typename argT::value_type;
+            return exprm_ns::sqrt(exprm_ns::complex<realT>(in));
+        }
+        else {
+            return sycl::sqrt(in);
+        }
+    }
+};
+
+template <typename argTy,
+          typename resTy = argTy,
+          std::uint8_t vec_sz = 4u,
+          std::uint8_t n_vecs = 2u,
+          bool enable_sg_loadstore = true>
+using SqrtContigFunctor =
+    elementwise_common::UnaryContigFunctor<argTy,
+                                           resTy,
+                                           SqrtFunctor<argTy, resTy>,
+                                           vec_sz,
+                                           n_vecs,
+                                           enable_sg_loadstore>;
+
+template <typename argTy, typename resTy, typename IndexerT>
+using SqrtStridedFunctor = elementwise_common::
+    UnaryStridedFunctor<argTy, resTy, IndexerT, SqrtFunctor<argTy, resTy>>;
+
+template <typename T>
+struct SqrtOutputType
+{
+    using value_type = typename std::disjunction<
+        td_ns::TypeMapResultEntry<T, sycl::half, sycl::half>,
+        td_ns::TypeMapResultEntry<T, float, float>,
+        td_ns::TypeMapResultEntry<T, double, double>,
+        td_ns::TypeMapResultEntry<T, std::complex<float>, std::complex<float>>,
+        td_ns::
+            TypeMapResultEntry<T, std::complex<double>, std::complex<double>>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+
+    static constexpr bool is_defined = !std::is_same_v<value_type, void>;
+};
+
+namespace hyperparam_detail
+{
+
+namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils;
+
+using vsu_ns::ContigHyperparameterSetDefault;
+using vsu_ns::UnaryContigHyperparameterSetEntry;
+
+template <typename argTy>
+struct SqrtContigHyperparameterSet
+{
+    using value_type =
+        typename std::disjunction<ContigHyperparameterSetDefault<4u, 2u>>;
+
+    constexpr static auto vec_sz = value_type::vec_sz;
+    constexpr static auto n_vecs = value_type::n_vecs;
+};
+
+} // end of namespace hyperparam_detail
+
+template <typename T1, typename T2, std::uint8_t vec_sz, std::uint8_t n_vecs>
+class sqrt_contig_kernel;
+
+template <typename argTy>
+sycl::event sqrt_contig_impl(sycl::queue &exec_q,
+                             std::size_t nelems,
+                             const char *arg_p,
+                             char *res_p,
+                             const std::vector<sycl::event> &depends = {})
+{
+    using SqrtHS = hyperparam_detail::SqrtContigHyperparameterSet<argTy>;
+    static constexpr std::uint8_t vec_sz = SqrtHS::vec_sz;
+    static constexpr std::uint8_t n_vecs = SqrtHS::n_vecs;
+
+    return elementwise_common::unary_contig_impl<
+        argTy, SqrtOutputType, SqrtContigFunctor, sqrt_contig_kernel, vec_sz,
+        n_vecs>(exec_q, nelems, arg_p, res_p, depends);
+}
+
+template <typename fnT, typename T>
+struct SqrtContigFactory
+{
+    fnT get()
+    {
+        if constexpr (!SqrtOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = sqrt_contig_impl<T>;
+            return fn;
+        }
+    }
+};
+
+template <typename fnT, typename T>
+struct SqrtTypeMapFactory
+{
+    /*! @brief get typeid for output type of std::sqrt(T x) */
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename SqrtOutputType<T>::value_type;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+template <typename T1, typename T2, typename T3>
+class sqrt_strided_kernel;
+
+template <typename argTy>
+sycl::event
+    sqrt_strided_impl(sycl::queue &exec_q,
+                      std::size_t nelems,
+                      int nd,
+                      const ssize_t *shape_and_strides,
+                      const char *arg_p,
+                      ssize_t arg_offset,
+                      char *res_p,
+                      ssize_t res_offset,
+                      const std::vector<sycl::event> &depends,
+                      const std::vector<sycl::event> &additional_depends)
+{
+    return elementwise_common::unary_strided_impl<
+        argTy, SqrtOutputType, SqrtStridedFunctor, sqrt_strided_kernel>(
+        exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p,
+        res_offset, depends, additional_depends);
+}
+
+template <typename fnT, typename T>
+struct SqrtStridedFactory
+{
+    fnT get()
+    {
+        if constexpr (!SqrtOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = sqrt_strided_impl<T>;
+            return fn;
+        }
+    }
+};
+
+} // namespace dpctl::tensor::kernels::sqrt