diff --git a/sycl/include/sycl/__spirv/spirv_ops.hpp b/sycl/include/sycl/__spirv/spirv_ops.hpp index 75f4360f415b9..039b9269a67b7 100644 --- a/sycl/include/sycl/__spirv/spirv_ops.hpp +++ b/sycl/include/sycl/__spirv/spirv_ops.hpp @@ -8,884 +8,9 @@ #pragma once -#include // for Scope, __ocl_event_t -#include // for __DPCPP_SYCL_EXTERNAL -#include // for __SYCL_EXPORT - -#include // for size_t -#include // for uint32_t -#include -#include // for pair - -// Convergent attribute -#ifdef __SYCL_DEVICE_ONLY__ -#define __SYCL_CONVERGENT__ __attribute__((convergent)) -#else -#define __SYCL_CONVERGENT__ -#endif - -#ifdef __SYCL_DEVICE_ONLY__ - -extern __DPCPP_SYCL_EXTERNAL float __spirv_RoundFToTF32INTEL(float a); - -template -extern __DPCPP_SYCL_EXTERNAL - __spv::__spirv_CooperativeMatrixKHR * - __spirv_CooperativeMatrixLoadKHR(T *Ptr, __spv::MatrixLayout Layout = L, - std::size_t Stride = 0, - int MemOperand = 0); - -template -extern __DPCPP_SYCL_EXTERNAL void __spirv_CooperativeMatrixStoreKHR( - T *Ptr, __spv::__spirv_CooperativeMatrixKHR *Object, - __spv::MatrixLayout Layout = L, std::size_t Stride = 0, int MemOperand = 0); - -template -extern __DPCPP_SYCL_EXTERNAL size_t __spirv_CooperativeMatrixLengthKHR( - __spv::__spirv_CooperativeMatrixKHR *); - -template -extern __DPCPP_SYCL_EXTERNAL - __spv::__spirv_CooperativeMatrixKHR * - __spirv_CooperativeMatrixConstructCheckedINTEL(const T Value, size_t Height, - size_t Stride, size_t Width, - size_t CoordX, - size_t CoordY); - -template -extern __DPCPP_SYCL_EXTERNAL - __spv::__spirv_CooperativeMatrixKHR * - __spirv_CooperativeMatrixLoadCheckedINTEL(T *Ptr, std::size_t Stride, - size_t Height, size_t Width, - size_t CoordX, size_t CoordY, - __spv::MatrixLayout Layout = L, - int MemOperand = 0); - -template -extern __DPCPP_SYCL_EXTERNAL void __spirv_CooperativeMatrixStoreCheckedINTEL( - T *Ptr, __spv::__spirv_CooperativeMatrixKHR *Object, - std::size_t Stride, size_t Height, size_t Width, size_t CoordX, - size_t CoordY, __spv::MatrixLayout Layout = L, int MemOperand = 0); - -template -extern __DPCPP_SYCL_EXTERNAL - __spv::__spirv_CooperativeMatrixKHR * - __spirv_CooperativeMatrixMulAddKHR( - __spv::__spirv_CooperativeMatrixKHR *A, - __spv::__spirv_CooperativeMatrixKHR *B, - __spv::__spirv_CooperativeMatrixKHR *C, - size_t Operands = 0); - -template -extern __DPCPP_SYCL_EXTERNAL - __spv::__spirv_CooperativeMatrixKHR * - __spirv_CompositeConstruct(const T v); - -// TODO: replace with __spirv_CooperativeMatrixGetElementCoordINTEL when ready -template -extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t -__spirv_JointMatrixGetElementCoordINTEL( - __spv::__spirv_CooperativeMatrixKHR *, size_t i); - -// AccessChain followed by load/store serves to extract/insert and element -// from/to the matrix -template -extern __DPCPP_SYCL_EXTERNAL Ts * -__spirv_AccessChain(__spv::__spirv_CooperativeMatrixKHR **, - size_t i); - -template -extern __DPCPP_SYCL_EXTERNAL - __spv::__spirv_CooperativeMatrixKHR * - __spirv_CooperativeMatrixConstructCheckedINTEL(int32_t CoordX, - int32_t CoordY, - uint32_t Height, - uint32_t Width, - const T Value); - -template -extern __DPCPP_SYCL_EXTERNAL - __spv::__spirv_CooperativeMatrixKHR * - __spirv_CooperativeMatrixLoadCheckedINTEL( - T *Ptr, int32_t CoordX, int32_t CoordY, __spv::MatrixLayout Layout = L, - uint32_t Height = 0, uint32_t Width = 0, std::size_t Stride = 0, - int MemOperand = 0); - -template -extern __DPCPP_SYCL_EXTERNAL void __spirv_CooperativeMatrixStoreCheckedINTEL( - T *Ptr, int32_t CoordX, int32_t CoordY, - __spv::__spirv_CooperativeMatrixKHR *Object, - __spv::MatrixLayout Layout = L, uint32_t Height = 0, uint32_t Width = 0, - std::size_t Stride = 0, int MemOperand = 0); - -template -extern __DPCPP_SYCL_EXTERNAL void __spirv_CooperativeMatrixPrefetchINTEL( - T *Ptr, uint32_t NumRows, uint32_t NumCols, unsigned int CacheLevel, - __spv::MatrixLayout Layout, size_t Stride); - -#ifndef __SPIRV_BUILTIN_DECLARATIONS__ -#error \ - "SPIR-V built-ins are not available. Please set -fdeclare-spirv-builtins flag." -#endif - -template -extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageQueryFormat(ImageT); - -template -extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageQueryOrder(ImageT); - -template -extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageQuerySize(ImageT); - -template -extern __DPCPP_SYCL_EXTERNAL void __spirv_ImageWrite(ImageT, CoordT, ValT); - -template -extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageRead(ImageT, TempArgT); - -template -extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageFetch(ImageT, TempArgT); - -template -extern __DPCPP_SYCL_EXTERNAL RetT __spirv_SampledImageFetch(ImageT, TempArgT); - -template -extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageArrayFetch(ImageT, TempArgT, - int); - -template -extern __DPCPP_SYCL_EXTERNAL RetT __spirv_SampledImageArrayFetch(ImageT, - TempArgT, int); - -template -extern __DPCPP_SYCL_EXTERNAL RetT __spirv_SampledImageGather(ImageT, TempArgT, - unsigned); - -template -extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageArrayRead(ImageT, TempArgT, int); - -template -extern __DPCPP_SYCL_EXTERNAL void __spirv_ImageArrayWrite(ImageT, CoordT, int, - ValT); - -template -extern __DPCPP_SYCL_EXTERNAL SampledType __spirv_SampledImage(ImageT, - __ocl_sampler_t); - -template -extern __DPCPP_SYCL_EXTERNAL TempRetT -__spirv_ImageSampleExplicitLod(SampledType, TempArgT, int, float); - -template -extern __DPCPP_SYCL_EXTERNAL TempRetT -__spirv_ImageSampleExplicitLod(SampledType, TempArgT, int, TempArgT, TempArgT); - -template -extern __DPCPP_SYCL_EXTERNAL TempRetT __spirv_ImageSampleCubemap(SampledType, - TempArgT); - -template -extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ConvertHandleToImageINTEL(HandleT); - -template -extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ConvertHandleToSamplerINTEL(HandleT); - -template -extern __DPCPP_SYCL_EXTERNAL - RetT __spirv_ConvertHandleToSampledImageINTEL(HandleT); - -// Atomic SPIR-V builtins -// TODO: drop these forward-declarations. -// As of now, compiler does not forward-declare long long overloads for -// these and as such we can't drop anything from here. But ideally, we should -// rely on the compiler to generate those - that would allow to drop -// spirv_ops.hpp include from more files. -#define __SPIRV_ATOMIC_LOAD(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicLoad(AS Type *P, int S, \ - int O) noexcept; -#define __SPIRV_ATOMIC_STORE(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL void __spirv_AtomicStore( \ - AS Type *P, int S, int O, Type V) noexcept; -#define __SPIRV_ATOMIC_EXCHANGE(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicExchange( \ - AS Type *P, int S, int O, Type V) noexcept; -#define __SPIRV_ATOMIC_CMP_EXCHANGE(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicCompareExchange( \ - AS Type *P, int S, int E, int U, Type V, Type C) noexcept; -#define __SPIRV_ATOMIC_IADD(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicIAdd( \ - AS Type *P, int S, int O, Type V) noexcept; -#define __SPIRV_ATOMIC_ISUB(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicISub( \ - AS Type *P, int S, int O, Type V) noexcept; -#define __SPIRV_ATOMIC_FADD(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFAddEXT( \ - AS Type *P, int S, int O, Type V) noexcept; -#define __SPIRV_ATOMIC_SMIN(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicSMin( \ - AS Type *P, int S, int O, Type V) noexcept; -#define __SPIRV_ATOMIC_UMIN(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicUMin( \ - AS Type *P, int S, int O, Type V) noexcept; -#define __SPIRV_ATOMIC_FMIN(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFMinEXT( \ - AS Type *P, int S, int O, Type V) noexcept; -#define __SPIRV_ATOMIC_SMAX(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicSMax( \ - AS Type *P, int S, int O, Type V) noexcept; -#define __SPIRV_ATOMIC_UMAX(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicUMax( \ - AS Type *P, int S, int O, Type V) noexcept; -#define __SPIRV_ATOMIC_FMAX(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFMaxEXT( \ - AS Type *P, int S, int O, Type V) noexcept; -#define __SPIRV_ATOMIC_AND(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicAnd(AS Type *P, int S, \ - int O, Type V) noexcept; -#define __SPIRV_ATOMIC_OR(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicOr(AS Type *P, int S, int O, \ - Type V) noexcept; -#define __SPIRV_ATOMIC_XOR(AS, Type) \ - extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicXor(AS Type *P, int S, \ - int O, Type V) noexcept; - -#define __SPIRV_ATOMIC_FLOAT(AS, Type) \ - __SPIRV_ATOMIC_FADD(AS, Type) \ - __SPIRV_ATOMIC_FMIN(AS, Type) \ - __SPIRV_ATOMIC_FMAX(AS, Type) \ - __SPIRV_ATOMIC_LOAD(AS, Type) \ - __SPIRV_ATOMIC_STORE(AS, Type) \ - __SPIRV_ATOMIC_EXCHANGE(AS, Type) - -#define __SPIRV_ATOMIC_BASE(AS, Type) \ - __SPIRV_ATOMIC_FLOAT(AS, Type) \ - __SPIRV_ATOMIC_CMP_EXCHANGE(AS, Type) \ - __SPIRV_ATOMIC_IADD(AS, Type) \ - __SPIRV_ATOMIC_ISUB(AS, Type) \ - __SPIRV_ATOMIC_AND(AS, Type) \ - __SPIRV_ATOMIC_OR(AS, Type) \ - __SPIRV_ATOMIC_XOR(AS, Type) - -#define __SPIRV_ATOMIC_SIGNED(AS, Type) \ - __SPIRV_ATOMIC_BASE(AS, Type) \ - __SPIRV_ATOMIC_SMIN(AS, Type) \ - __SPIRV_ATOMIC_SMAX(AS, Type) - -#define __SPIRV_ATOMIC_UNSIGNED(AS, Type) \ - __SPIRV_ATOMIC_BASE(AS, Type) \ - __SPIRV_ATOMIC_UMIN(AS, Type) \ - __SPIRV_ATOMIC_UMAX(AS, Type) - -// Helper atomic operations which select correct signed/unsigned version -// of atomic min/max based on the type -#define __SPIRV_ATOMIC_MINMAX(AS, Op) \ - template \ - typename std::enable_if_t< \ - std::is_integral::value && std::is_signed::value, T> \ - __spirv_Atomic##Op(AS T *Ptr, int Memory, int Semantics, \ - T Value) noexcept { \ - return __spirv_AtomicS##Op(Ptr, Memory, Semantics, Value); \ - } \ - template \ - typename std::enable_if_t< \ - std::is_integral::value && !std::is_signed::value, T> \ - __spirv_Atomic##Op(AS T *Ptr, int Memory, int Semantics, \ - T Value) noexcept { \ - return __spirv_AtomicU##Op(Ptr, Memory, Semantics, Value); \ - } \ - template \ - typename std::enable_if_t::value, T> \ - __spirv_Atomic##Op(AS T *Ptr, int Memory, int Semantics, \ - T Value) noexcept { \ - return __spirv_AtomicF##Op##EXT(Ptr, Memory, Semantics, Value); \ - } - -#define __SPIRV_ATOMICS(macro, Arg) \ - macro(__attribute__((opencl_global)), Arg) \ - macro(__attribute__((opencl_local)), Arg) macro(, Arg) - -__SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT, _Float16) -__SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT, float) -__SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT, double) -__SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, int) -__SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, long) -__SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, long long) -__SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned int) -__SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned long) -__SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned long long) -__SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Min) -__SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Max) - -#undef __SPIRV_ATOMICS -#undef __SPIRV_ATOMIC_AND -#undef __SPIRV_ATOMIC_BASE -#undef __SPIRV_ATOMIC_CMP_EXCHANGE -#undef __SPIRV_ATOMIC_EXCHANGE -#undef __SPIRV_ATOMIC_FADD -#undef __SPIRV_ATOMIC_FLOAT -#undef __SPIRV_ATOMIC_FMAX -#undef __SPIRV_ATOMIC_FMIN -#undef __SPIRV_ATOMIC_IADD -#undef __SPIRV_ATOMIC_ISUB -#undef __SPIRV_ATOMIC_LOAD -#undef __SPIRV_ATOMIC_MINMAX -#undef __SPIRV_ATOMIC_OR -#undef __SPIRV_ATOMIC_SIGNED -#undef __SPIRV_ATOMIC_SMAX -#undef __SPIRV_ATOMIC_SMIN -#undef __SPIRV_ATOMIC_STORE -#undef __SPIRV_ATOMIC_UMAX -#undef __SPIRV_ATOMIC_UMIN -#undef __SPIRV_ATOMIC_UNSIGNED -#undef __SPIRV_ATOMIC_XOR - -template -__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL dataT -__spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_global)) - uint8_t *Ptr) noexcept; - -template -__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL dataT -__spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_global)) - uint16_t *Ptr) noexcept; - -template -__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL dataT -__spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_global)) - uint32_t *Ptr) noexcept; - -template -__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL dataT -__spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_global)) - uint64_t *Ptr) noexcept; - -template -__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL dataT -__spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_local)) - uint8_t *Ptr) noexcept; - -template -__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL dataT -__spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_local)) - uint16_t *Ptr) noexcept; - -template -__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL dataT -__spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_local)) - uint32_t *Ptr) noexcept; - -template -__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL dataT -__spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_local)) - uint64_t *Ptr) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_FixedSqrtINTEL(sycl::detail::ap_int a, bool S, int32_t I, int32_t rI, - int32_t Quantization = 0, int32_t Overflow = 0) noexcept; -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_FixedRecipINTEL(sycl::detail::ap_int a, bool S, int32_t I, - int32_t rI, int32_t Quantization = 0, - int32_t Overflow = 0) noexcept; -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_FixedRsqrtINTEL(sycl::detail::ap_int a, bool S, int32_t I, - int32_t rI, int32_t Quantization = 0, - int32_t Overflow = 0) noexcept; -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_FixedSinINTEL(sycl::detail::ap_int a, bool S, int32_t I, int32_t rI, - int32_t Quantization = 0, int32_t Overflow = 0) noexcept; -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_FixedCosINTEL(sycl::detail::ap_int a, bool S, int32_t I, int32_t rI, - int32_t Quantization = 0, int32_t Overflow = 0) noexcept; -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<2 * rW> -__spirv_FixedSinCosINTEL(sycl::detail::ap_int a, bool S, int32_t I, - int32_t rI, int32_t Quantization = 0, - int32_t Overflow = 0) noexcept; -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_FixedSinPiINTEL(sycl::detail::ap_int a, bool S, int32_t I, - int32_t rI, int32_t Quantization = 0, - int32_t Overflow = 0) noexcept; -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_FixedCosPiINTEL(sycl::detail::ap_int a, bool S, int32_t I, - int32_t rI, int32_t Quantization = 0, - int32_t Overflow = 0) noexcept; -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<2 * rW> -__spirv_FixedSinCosPiINTEL(sycl::detail::ap_int a, bool S, int32_t I, - int32_t rI, int32_t Quantization = 0, - int32_t Overflow = 0) noexcept; -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_FixedLogINTEL(sycl::detail::ap_int a, bool S, int32_t I, int32_t rI, - int32_t Quantization = 0, int32_t Overflow = 0) noexcept; -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_FixedExpINTEL(sycl::detail::ap_int a, bool S, int32_t I, int32_t rI, - int32_t Quantization = 0, int32_t Overflow = 0) noexcept; - -// In the following built-ins width of arbitrary precision integer type for -// a floating point variable should be equal to sum of corresponding -// exponent width E, mantissa width M and 1 for sign bit. I.e. WA = EA + MA + 1. -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatCastINTEL(sycl::detail::ap_int A, int32_t MA, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatCastFromIntINTEL(sycl::detail::ap_int A, int32_t Mout, - bool FromSign = false, - int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatCastToIntINTEL(sycl::detail::ap_int A, int32_t MA, - bool ToSign = false, - int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatAddINTEL(sycl::detail::ap_int A, int32_t MA, - sycl::detail::ap_int B, int32_t MB, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatSubINTEL(sycl::detail::ap_int A, int32_t MA, - sycl::detail::ap_int B, int32_t MB, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatMulINTEL(sycl::detail::ap_int A, int32_t MA, - sycl::detail::ap_int B, int32_t MB, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatDivINTEL(sycl::detail::ap_int A, int32_t MA, - sycl::detail::ap_int B, int32_t MB, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -// Comparison built-ins don't use Subnormal Support, Rounding Mode and -// Rounding Accuracy. -template -extern __DPCPP_SYCL_EXTERNAL bool -__spirv_ArbitraryFloatGTINTEL(sycl::detail::ap_int A, int32_t MA, - sycl::detail::ap_int B, int32_t MB) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL bool -__spirv_ArbitraryFloatGEINTEL(sycl::detail::ap_int A, int32_t MA, - sycl::detail::ap_int B, int32_t MB) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL bool -__spirv_ArbitraryFloatLTINTEL(sycl::detail::ap_int A, int32_t MA, - sycl::detail::ap_int B, int32_t MB) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL bool -__spirv_ArbitraryFloatLEINTEL(sycl::detail::ap_int A, int32_t MA, - sycl::detail::ap_int B, int32_t MB) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL bool -__spirv_ArbitraryFloatEQINTEL(sycl::detail::ap_int A, int32_t MA, - sycl::detail::ap_int B, int32_t MB) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatRecipINTEL(sycl::detail::ap_int A, int32_t MA, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatRSqrtINTEL(sycl::detail::ap_int A, int32_t MA, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatCbrtINTEL(sycl::detail::ap_int A, int32_t MA, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatHypotINTEL(sycl::detail::ap_int A, int32_t MA, - sycl::detail::ap_int B, int32_t MB, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatSqrtINTEL(sycl::detail::ap_int A, int32_t MA, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatLogINTEL(sycl::detail::ap_int A, int32_t MA, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatLog2INTEL(sycl::detail::ap_int A, int32_t MA, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatLog10INTEL(sycl::detail::ap_int A, int32_t MA, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatLog1pINTEL(sycl::detail::ap_int A, int32_t MA, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatExpINTEL(sycl::detail::ap_int A, int32_t MA, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatExp2INTEL(sycl::detail::ap_int A, int32_t MA, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatExp10INTEL(sycl::detail::ap_int A, int32_t MA, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatExpm1INTEL(sycl::detail::ap_int A, int32_t MA, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatSinINTEL(sycl::detail::ap_int A, int32_t MA, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatCosINTEL(sycl::detail::ap_int A, int32_t MA, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -// Result value contains both values of sine and cosine and so has the size of -// 2 * Wout where Wout is equal to (1 + Eout + Mout). -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<2 * Wout> -__spirv_ArbitraryFloatSinCosINTEL(sycl::detail::ap_int A, int32_t MA, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatSinPiINTEL(sycl::detail::ap_int A, int32_t MA, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatCosPiINTEL(sycl::detail::ap_int A, int32_t MA, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -// Result value contains both values of sine(A*pi) and cosine(A*pi) and so has -// the size of 2 * Wout where Wout is equal to (1 + Eout + Mout). -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<2 * Wout> -__spirv_ArbitraryFloatSinCosPiINTEL(sycl::detail::ap_int A, int32_t MA, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatASinINTEL(sycl::detail::ap_int A, int32_t MA, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatASinPiINTEL(sycl::detail::ap_int A, int32_t MA, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatACosINTEL(sycl::detail::ap_int A, int32_t MA, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatACosPiINTEL(sycl::detail::ap_int A, int32_t MA, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatATanINTEL(sycl::detail::ap_int A, int32_t MA, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatATanPiINTEL(sycl::detail::ap_int A, int32_t MA, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatATan2INTEL(sycl::detail::ap_int A, int32_t MA, - sycl::detail::ap_int B, int32_t MB, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatPowINTEL(sycl::detail::ap_int A, int32_t MA, - sycl::detail::ap_int B, int32_t MB, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatPowRINTEL(sycl::detail::ap_int A, int32_t MA, - sycl::detail::ap_int B, int32_t MB, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -// PowN built-in calculates `A^B` where `A` is arbitrary precision floating -// point number and `B` is signed or unsigned arbitrary precision integer, -// i.e. its width doesn't depend on sum of exponent and mantissa. -template -extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int -__spirv_ArbitraryFloatPowNINTEL(sycl::detail::ap_int A, int32_t MA, - sycl::detail::ap_int B, bool SignOfB, - int32_t Mout, int32_t EnableSubnormals = 0, - int32_t RoundingMode = 0, - int32_t RoundingAccuracy = 0) noexcept; - -extern __DPCPP_SYCL_EXTERNAL float -__spirv_ConvertBF16ToFINTEL(uint16_t) noexcept; -extern __DPCPP_SYCL_EXTERNAL uint16_t -__spirv_ConvertFToBF16INTEL(float) noexcept; -template -extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t - __spirv_ConvertBF16ToFINTEL(__ocl_vec_t) noexcept; -template -extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t - __spirv_ConvertFToBF16INTEL(__ocl_vec_t) noexcept; - -extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void -__clc_BarrierInitialize(int64_t *state, int32_t expected_count) noexcept; - -extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void -__clc_BarrierInvalidate(int64_t *state) noexcept; - -extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT int64_t -__clc_BarrierArrive(int64_t *state) noexcept; - -extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT int64_t -__clc_BarrierArriveAndDrop(int64_t *state) noexcept; - -extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT int64_t -__clc_BarrierArriveNoComplete(int64_t *state, int32_t count) noexcept; - -extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT int64_t -__clc_BarrierArriveAndDropNoComplete(int64_t *state, int32_t count) noexcept; - -extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void -__clc_BarrierCopyAsyncArrive(int64_t *state) noexcept; - -extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void -__clc_BarrierCopyAsyncArriveNoInc(int64_t *state) noexcept; - -__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void -__clc_BarrierWait(int64_t *state, int64_t arrival) noexcept; - -extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT bool -__clc_BarrierTestWait(int64_t *state, int64_t arrival) noexcept; - -__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void -__clc_BarrierArriveAndWait(int64_t *state) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL int -__spirv_ocl_printf(const __attribute__((opencl_constant)) char *Format, - Args... args); -template -extern __DPCPP_SYCL_EXTERNAL int __spirv_ocl_printf(const char *Format, - Args... args); - -// FIXME: __clc symbols are intended to be internal symbols to libclc/libspirv -// and should not be relied upon externally; consider them deprecated. We can't, -// however, explicitly declare __spirv_ocl versions of these builtins as that -// interferes with the implicit declarations provided by clang. This results in -// legitimate calls being seen as ambiguous and causing errors. Since these -// symbols are intended to expose native versions of bfloat16 builtins for -// NVPTX, we should probably just be exposing builtins with actual bfloat16 -// types, not unsigned integer types. -#define __CLC_BF16(...) \ - extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fabs( \ - __VA_ARGS__) noexcept; \ - extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fmin( \ - __VA_ARGS__, __VA_ARGS__) noexcept; \ - extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fmax( \ - __VA_ARGS__, __VA_ARGS__) noexcept; \ - extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fma( \ - __VA_ARGS__, __VA_ARGS__, __VA_ARGS__) noexcept; - -#define __CLC_BF16_SCAL_VEC(TYPE) \ - __CLC_BF16(TYPE) \ - __CLC_BF16(__ocl_vec_t) \ - __CLC_BF16(__ocl_vec_t) \ - __CLC_BF16(__ocl_vec_t) \ - __CLC_BF16(__ocl_vec_t) \ - __CLC_BF16(__ocl_vec_t) - -__CLC_BF16_SCAL_VEC(uint16_t) -__CLC_BF16_SCAL_VEC(uint32_t) - -#undef __CLC_BF16_SCAL_VEC -#undef __CLC_BF16 - -extern __DPCPP_SYCL_EXTERNAL int32_t __spirv_BuiltInGlobalHWThreadIDINTEL(); -extern __DPCPP_SYCL_EXTERNAL int32_t __spirv_BuiltInSubDeviceIDINTEL(); -extern __DPCPP_SYCL_EXTERNAL uint64_t __spirv_ReadClockKHR(int); - -template -extern __DPCPP_SYCL_EXTERNAL - std::enable_if_t && std::is_unsigned_v, to> - __spirv_ConvertPtrToU(from val) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL std::pair<__ocl_vec_t, __ocl_vec_t> -__spirv_IAddCarry(__ocl_vec_t src0, __ocl_vec_t src1); - -template -extern __DPCPP_SYCL_EXTERNAL std::pair<__ocl_vec_t, __ocl_vec_t> -__spirv_ISubBorrow(__ocl_vec_t src0, __ocl_vec_t src1); -template -extern __DPCPP_SYCL_EXTERNAL __spv::__spirv_TaskSequenceINTEL * -__spirv_TaskSequenceCreateINTEL(RetT (*f)(ArgsT...), int Pipelined = -1, - int ClusterMode = -1, - unsigned int ResponseCapacity = 0, - unsigned int InvocationCapacity = 0) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL void -__spirv_TaskSequenceAsyncINTEL(__spv::__spirv_TaskSequenceINTEL *TaskSequence, - ArgsT... Args) noexcept; - -template -extern __DPCPP_SYCL_EXTERNAL RetT __spirv_TaskSequenceGetINTEL( - __spv::__spirv_TaskSequenceINTEL *TaskSequence) noexcept; - -extern __DPCPP_SYCL_EXTERNAL void __spirv_TaskSequenceReleaseINTEL( - __spv::__spirv_TaskSequenceINTEL *TaskSequence) noexcept; - -#endif // !__SYCL_DEVICE_ONLY__ +#include +#include +#include +#include +#include +#include diff --git a/sycl/include/sycl/__spirv/spirv_ops_atomic.hpp b/sycl/include/sycl/__spirv/spirv_ops_atomic.hpp new file mode 100644 index 0000000000000..ddafb59d20d2e --- /dev/null +++ b/sycl/include/sycl/__spirv/spirv_ops_atomic.hpp @@ -0,0 +1,159 @@ +//==------- spirv_ops_atomic.hpp --- SPIRV atomic operations --------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include + +#include + +#ifdef __SYCL_DEVICE_ONLY__ + +// Atomic SPIR-V builtins +// TODO: drop these forward-declarations. +// As of now, compiler does not forward-declare long long overloads for +// these and as such we can't drop anything from here. But ideally, we should +// rely on the compiler to generate those - that would allow to drop +// spirv_ops.hpp include from more files. +#define __SPIRV_ATOMIC_LOAD(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicLoad(AS Type *P, int S, \ + int O) noexcept; +#define __SPIRV_ATOMIC_STORE(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL void __spirv_AtomicStore( \ + AS Type *P, int S, int O, Type V) noexcept; +#define __SPIRV_ATOMIC_EXCHANGE(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicExchange( \ + AS Type *P, int S, int O, Type V) noexcept; +#define __SPIRV_ATOMIC_CMP_EXCHANGE(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicCompareExchange( \ + AS Type *P, int S, int E, int U, Type V, Type C) noexcept; +#define __SPIRV_ATOMIC_IADD(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicIAdd( \ + AS Type *P, int S, int O, Type V) noexcept; +#define __SPIRV_ATOMIC_ISUB(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicISub( \ + AS Type *P, int S, int O, Type V) noexcept; +#define __SPIRV_ATOMIC_FADD(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFAddEXT( \ + AS Type *P, int S, int O, Type V) noexcept; +#define __SPIRV_ATOMIC_SMIN(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicSMin( \ + AS Type *P, int S, int O, Type V) noexcept; +#define __SPIRV_ATOMIC_UMIN(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicUMin( \ + AS Type *P, int S, int O, Type V) noexcept; +#define __SPIRV_ATOMIC_FMIN(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFMinEXT( \ + AS Type *P, int S, int O, Type V) noexcept; +#define __SPIRV_ATOMIC_SMAX(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicSMax( \ + AS Type *P, int S, int O, Type V) noexcept; +#define __SPIRV_ATOMIC_UMAX(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicUMax( \ + AS Type *P, int S, int O, Type V) noexcept; +#define __SPIRV_ATOMIC_FMAX(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicFMaxEXT( \ + AS Type *P, int S, int O, Type V) noexcept; +#define __SPIRV_ATOMIC_AND(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicAnd(AS Type *P, int S, \ + int O, Type V) noexcept; +#define __SPIRV_ATOMIC_OR(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicOr(AS Type *P, int S, int O, \ + Type V) noexcept; +#define __SPIRV_ATOMIC_XOR(AS, Type) \ + extern __DPCPP_SYCL_EXTERNAL Type __spirv_AtomicXor(AS Type *P, int S, \ + int O, Type V) noexcept; + +#define __SPIRV_ATOMIC_FLOAT(AS, Type) \ + __SPIRV_ATOMIC_FADD(AS, Type) \ + __SPIRV_ATOMIC_FMIN(AS, Type) \ + __SPIRV_ATOMIC_FMAX(AS, Type) \ + __SPIRV_ATOMIC_LOAD(AS, Type) \ + __SPIRV_ATOMIC_STORE(AS, Type) \ + __SPIRV_ATOMIC_EXCHANGE(AS, Type) + +#define __SPIRV_ATOMIC_BASE(AS, Type) \ + __SPIRV_ATOMIC_FLOAT(AS, Type) \ + __SPIRV_ATOMIC_CMP_EXCHANGE(AS, Type) \ + __SPIRV_ATOMIC_IADD(AS, Type) \ + __SPIRV_ATOMIC_ISUB(AS, Type) \ + __SPIRV_ATOMIC_AND(AS, Type) \ + __SPIRV_ATOMIC_OR(AS, Type) \ + __SPIRV_ATOMIC_XOR(AS, Type) + +#define __SPIRV_ATOMIC_SIGNED(AS, Type) \ + __SPIRV_ATOMIC_BASE(AS, Type) \ + __SPIRV_ATOMIC_SMIN(AS, Type) \ + __SPIRV_ATOMIC_SMAX(AS, Type) + +#define __SPIRV_ATOMIC_UNSIGNED(AS, Type) \ + __SPIRV_ATOMIC_BASE(AS, Type) \ + __SPIRV_ATOMIC_UMIN(AS, Type) \ + __SPIRV_ATOMIC_UMAX(AS, Type) + +// Helper atomic operations which select correct signed/unsigned version +// of atomic min/max based on the type +#define __SPIRV_ATOMIC_MINMAX(AS, Op) \ + template \ + typename std::enable_if_t< \ + std::is_integral::value && std::is_signed::value, T> \ + __spirv_Atomic##Op(AS T *Ptr, int Memory, int Semantics, T Value) noexcept { \ + return __spirv_AtomicS##Op(Ptr, Memory, Semantics, Value); \ + } \ + template \ + typename std::enable_if_t< \ + std::is_integral::value && !std::is_signed::value, T> \ + __spirv_Atomic##Op(AS T *Ptr, int Memory, int Semantics, T Value) noexcept { \ + return __spirv_AtomicU##Op(Ptr, Memory, Semantics, Value); \ + } \ + template \ + typename std::enable_if_t::value, T> \ + __spirv_Atomic##Op(AS T *Ptr, int Memory, int Semantics, T Value) noexcept { \ + return __spirv_AtomicF##Op##EXT(Ptr, Memory, Semantics, Value); \ + } + +#define __SPIRV_ATOMICS(macro, Arg) \ + macro(__attribute__((opencl_global)), Arg) \ + macro(__attribute__((opencl_local)), Arg) macro(, Arg) + +__SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT, _Float16) +__SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT, float) +__SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT, double) +__SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, int) +__SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, long) +__SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, long long) +__SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned int) +__SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned long) +__SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned long long) +__SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Min) +__SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Max) + +#undef __SPIRV_ATOMICS +#undef __SPIRV_ATOMIC_AND +#undef __SPIRV_ATOMIC_BASE +#undef __SPIRV_ATOMIC_CMP_EXCHANGE +#undef __SPIRV_ATOMIC_EXCHANGE +#undef __SPIRV_ATOMIC_FADD +#undef __SPIRV_ATOMIC_FLOAT +#undef __SPIRV_ATOMIC_FMAX +#undef __SPIRV_ATOMIC_FMIN +#undef __SPIRV_ATOMIC_IADD +#undef __SPIRV_ATOMIC_ISUB +#undef __SPIRV_ATOMIC_LOAD +#undef __SPIRV_ATOMIC_MINMAX +#undef __SPIRV_ATOMIC_OR +#undef __SPIRV_ATOMIC_SIGNED +#undef __SPIRV_ATOMIC_SMAX +#undef __SPIRV_ATOMIC_SMIN +#undef __SPIRV_ATOMIC_STORE +#undef __SPIRV_ATOMIC_UMAX +#undef __SPIRV_ATOMIC_UMIN +#undef __SPIRV_ATOMIC_UNSIGNED +#undef __SPIRV_ATOMIC_XOR + +#endif diff --git a/sycl/include/sycl/__spirv/spirv_ops_base.hpp b/sycl/include/sycl/__spirv/spirv_ops_base.hpp new file mode 100644 index 0000000000000..fbbaaba4a766e --- /dev/null +++ b/sycl/include/sycl/__spirv/spirv_ops_base.hpp @@ -0,0 +1,20 @@ +//==-------- spirv_ops_base.hpp --- SPIRV operation common support --------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include // for Scope, __ocl_event_t +#include // for __DPCPP_SYCL_EXTERNAL +#include // for __SYCL_EXPORT + +// Convergent attribute +#ifdef __SYCL_DEVICE_ONLY__ +#define __SYCL_CONVERGENT__ __attribute__((convergent)) +#else +#define __SYCL_CONVERGENT__ +#endif diff --git a/sycl/include/sycl/__spirv/spirv_ops_builtin_decls.hpp b/sycl/include/sycl/__spirv/spirv_ops_builtin_decls.hpp new file mode 100644 index 0000000000000..4d403c4cc3fc9 --- /dev/null +++ b/sycl/include/sycl/__spirv/spirv_ops_builtin_decls.hpp @@ -0,0 +1,18 @@ +//==--- spirv_ops_builtin_decls.hpp --- SPIRV built-in declaration guard --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include + +#ifdef __SYCL_DEVICE_ONLY__ +#ifndef __SPIRV_BUILTIN_DECLARATIONS__ +#error \ + "SPIR-V built-ins are not available. Please set -fdeclare-spirv-builtins flag." +#endif +#endif diff --git a/sycl/include/sycl/__spirv/spirv_ops_image.hpp b/sycl/include/sycl/__spirv/spirv_ops_image.hpp new file mode 100644 index 0000000000000..eb325d883c0c0 --- /dev/null +++ b/sycl/include/sycl/__spirv/spirv_ops_image.hpp @@ -0,0 +1,81 @@ +//==-------- spirv_ops_image.hpp --- SPIRV image operations ---------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include + +#ifdef __SYCL_DEVICE_ONLY__ + +template +extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageQueryFormat(ImageT); + +template +extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageQueryOrder(ImageT); + +template +extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageQuerySize(ImageT); + +template +extern __DPCPP_SYCL_EXTERNAL void __spirv_ImageWrite(ImageT, CoordT, ValT); + +template +extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageRead(ImageT, TempArgT); + +template +extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageFetch(ImageT, TempArgT); + +template +extern __DPCPP_SYCL_EXTERNAL RetT __spirv_SampledImageFetch(ImageT, TempArgT); + +template +extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageArrayFetch(ImageT, TempArgT, + int); + +template +extern __DPCPP_SYCL_EXTERNAL RetT __spirv_SampledImageArrayFetch(ImageT, + TempArgT, int); + +template +extern __DPCPP_SYCL_EXTERNAL RetT __spirv_SampledImageGather(ImageT, TempArgT, + unsigned); + +template +extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ImageArrayRead(ImageT, TempArgT, int); + +template +extern __DPCPP_SYCL_EXTERNAL void __spirv_ImageArrayWrite(ImageT, CoordT, int, + ValT); + +template +extern __DPCPP_SYCL_EXTERNAL SampledType __spirv_SampledImage(ImageT, + __ocl_sampler_t); + +template +extern __DPCPP_SYCL_EXTERNAL TempRetT +__spirv_ImageSampleExplicitLod(SampledType, TempArgT, int, float); + +template +extern __DPCPP_SYCL_EXTERNAL TempRetT +__spirv_ImageSampleExplicitLod(SampledType, TempArgT, int, TempArgT, TempArgT); + +template +extern __DPCPP_SYCL_EXTERNAL TempRetT __spirv_ImageSampleCubemap(SampledType, + TempArgT); + +template +extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ConvertHandleToImageINTEL(HandleT); + +template +extern __DPCPP_SYCL_EXTERNAL RetT __spirv_ConvertHandleToSamplerINTEL(HandleT); + +template +extern __DPCPP_SYCL_EXTERNAL + RetT __spirv_ConvertHandleToSampledImageINTEL(HandleT); + +#endif diff --git a/sycl/include/sycl/__spirv/spirv_ops_intel_math.hpp b/sycl/include/sycl/__spirv/spirv_ops_intel_math.hpp new file mode 100644 index 0000000000000..26166a18e709c --- /dev/null +++ b/sycl/include/sycl/__spirv/spirv_ops_intel_math.hpp @@ -0,0 +1,379 @@ +//==---- spirv_ops_intel_math.hpp --- SPIRV INTEL numeric operations ------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include + +#include + +#ifdef __SYCL_DEVICE_ONLY__ + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_FixedSqrtINTEL(sycl::detail::ap_int a, bool S, int32_t I, int32_t rI, + int32_t Quantization = 0, int32_t Overflow = 0) noexcept; +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_FixedRecipINTEL(sycl::detail::ap_int a, bool S, int32_t I, + int32_t rI, int32_t Quantization = 0, + int32_t Overflow = 0) noexcept; +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_FixedRsqrtINTEL(sycl::detail::ap_int a, bool S, int32_t I, + int32_t rI, int32_t Quantization = 0, + int32_t Overflow = 0) noexcept; +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_FixedSinINTEL(sycl::detail::ap_int a, bool S, int32_t I, int32_t rI, + int32_t Quantization = 0, int32_t Overflow = 0) noexcept; +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_FixedCosINTEL(sycl::detail::ap_int a, bool S, int32_t I, int32_t rI, + int32_t Quantization = 0, int32_t Overflow = 0) noexcept; +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<2 * rW> +__spirv_FixedSinCosINTEL(sycl::detail::ap_int a, bool S, int32_t I, + int32_t rI, int32_t Quantization = 0, + int32_t Overflow = 0) noexcept; +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_FixedSinPiINTEL(sycl::detail::ap_int a, bool S, int32_t I, + int32_t rI, int32_t Quantization = 0, + int32_t Overflow = 0) noexcept; +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_FixedCosPiINTEL(sycl::detail::ap_int a, bool S, int32_t I, + int32_t rI, int32_t Quantization = 0, + int32_t Overflow = 0) noexcept; +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<2 * rW> +__spirv_FixedSinCosPiINTEL(sycl::detail::ap_int a, bool S, int32_t I, + int32_t rI, int32_t Quantization = 0, + int32_t Overflow = 0) noexcept; +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_FixedLogINTEL(sycl::detail::ap_int a, bool S, int32_t I, int32_t rI, + int32_t Quantization = 0, int32_t Overflow = 0) noexcept; +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_FixedExpINTEL(sycl::detail::ap_int a, bool S, int32_t I, int32_t rI, + int32_t Quantization = 0, int32_t Overflow = 0) noexcept; + +// In the following built-ins width of arbitrary precision integer type for +// a floating point variable should be equal to sum of corresponding +// exponent width E, mantissa width M and 1 for sign bit. I.e. WA = EA + MA + 1. +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatCastINTEL(sycl::detail::ap_int A, int32_t MA, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatCastFromIntINTEL(sycl::detail::ap_int A, int32_t Mout, + bool FromSign = false, + int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatCastToIntINTEL(sycl::detail::ap_int A, int32_t MA, + bool ToSign = false, + int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatAddINTEL(sycl::detail::ap_int A, int32_t MA, + sycl::detail::ap_int B, int32_t MB, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatSubINTEL(sycl::detail::ap_int A, int32_t MA, + sycl::detail::ap_int B, int32_t MB, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatMulINTEL(sycl::detail::ap_int A, int32_t MA, + sycl::detail::ap_int B, int32_t MB, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatDivINTEL(sycl::detail::ap_int A, int32_t MA, + sycl::detail::ap_int B, int32_t MB, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +// Comparison built-ins don't use Subnormal Support, Rounding Mode and +// Rounding Accuracy. +template +extern __DPCPP_SYCL_EXTERNAL bool +__spirv_ArbitraryFloatGTINTEL(sycl::detail::ap_int A, int32_t MA, + sycl::detail::ap_int B, int32_t MB) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL bool +__spirv_ArbitraryFloatGEINTEL(sycl::detail::ap_int A, int32_t MA, + sycl::detail::ap_int B, int32_t MB) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL bool +__spirv_ArbitraryFloatLTINTEL(sycl::detail::ap_int A, int32_t MA, + sycl::detail::ap_int B, int32_t MB) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL bool +__spirv_ArbitraryFloatLEINTEL(sycl::detail::ap_int A, int32_t MA, + sycl::detail::ap_int B, int32_t MB) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL bool +__spirv_ArbitraryFloatEQINTEL(sycl::detail::ap_int A, int32_t MA, + sycl::detail::ap_int B, int32_t MB) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatRecipINTEL(sycl::detail::ap_int A, int32_t MA, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatRSqrtINTEL(sycl::detail::ap_int A, int32_t MA, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatCbrtINTEL(sycl::detail::ap_int A, int32_t MA, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatHypotINTEL(sycl::detail::ap_int A, int32_t MA, + sycl::detail::ap_int B, int32_t MB, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatSqrtINTEL(sycl::detail::ap_int A, int32_t MA, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatLogINTEL(sycl::detail::ap_int A, int32_t MA, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatLog2INTEL(sycl::detail::ap_int A, int32_t MA, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatLog10INTEL(sycl::detail::ap_int A, int32_t MA, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatLog1pINTEL(sycl::detail::ap_int A, int32_t MA, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatExpINTEL(sycl::detail::ap_int A, int32_t MA, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatExp2INTEL(sycl::detail::ap_int A, int32_t MA, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatExp10INTEL(sycl::detail::ap_int A, int32_t MA, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatExpm1INTEL(sycl::detail::ap_int A, int32_t MA, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatSinINTEL(sycl::detail::ap_int A, int32_t MA, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatCosINTEL(sycl::detail::ap_int A, int32_t MA, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +// Result value contains both values of sine and cosine and so has the size of +// 2 * Wout where Wout is equal to (1 + Eout + Mout). +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<2 * Wout> +__spirv_ArbitraryFloatSinCosINTEL(sycl::detail::ap_int A, int32_t MA, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatSinPiINTEL(sycl::detail::ap_int A, int32_t MA, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatCosPiINTEL(sycl::detail::ap_int A, int32_t MA, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +// Result value contains both values of sine(A*pi) and cosine(A*pi) and so has +// the size of 2 * Wout where Wout is equal to (1 + Eout + Mout). +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int<2 * Wout> +__spirv_ArbitraryFloatSinCosPiINTEL(sycl::detail::ap_int A, int32_t MA, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatASinINTEL(sycl::detail::ap_int A, int32_t MA, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatASinPiINTEL(sycl::detail::ap_int A, int32_t MA, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatACosINTEL(sycl::detail::ap_int A, int32_t MA, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatACosPiINTEL(sycl::detail::ap_int A, int32_t MA, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatATanINTEL(sycl::detail::ap_int A, int32_t MA, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatATanPiINTEL(sycl::detail::ap_int A, int32_t MA, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatATan2INTEL(sycl::detail::ap_int A, int32_t MA, + sycl::detail::ap_int B, int32_t MB, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatPowINTEL(sycl::detail::ap_int A, int32_t MA, + sycl::detail::ap_int B, int32_t MB, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatPowRINTEL(sycl::detail::ap_int A, int32_t MA, + sycl::detail::ap_int B, int32_t MB, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +// PowN built-in calculates `A^B` where `A` is arbitrary precision floating +// point number and `B` is signed or unsigned arbitrary precision integer, +// i.e. its width doesn't depend on sum of exponent and mantissa. +template +extern __DPCPP_SYCL_EXTERNAL sycl::detail::ap_int +__spirv_ArbitraryFloatPowNINTEL(sycl::detail::ap_int A, int32_t MA, + sycl::detail::ap_int B, bool SignOfB, + int32_t Mout, int32_t EnableSubnormals = 0, + int32_t RoundingMode = 0, + int32_t RoundingAccuracy = 0) noexcept; + +extern __DPCPP_SYCL_EXTERNAL float +__spirv_ConvertBF16ToFINTEL(uint16_t) noexcept; +extern __DPCPP_SYCL_EXTERNAL uint16_t +__spirv_ConvertFToBF16INTEL(float) noexcept; +template +extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t + __spirv_ConvertBF16ToFINTEL(__ocl_vec_t) noexcept; +template +extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t + __spirv_ConvertFToBF16INTEL(__ocl_vec_t) noexcept; + +#endif diff --git a/sycl/include/sycl/__spirv/spirv_ops_matrix.hpp b/sycl/include/sycl/__spirv/spirv_ops_matrix.hpp new file mode 100644 index 0000000000000..0bd42ffca3081 --- /dev/null +++ b/sycl/include/sycl/__spirv/spirv_ops_matrix.hpp @@ -0,0 +1,154 @@ +//==------- spirv_ops_matrix.hpp --- SPIRV matrix operations --------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include + +#include +#include + +#ifdef __SYCL_DEVICE_ONLY__ + +extern __DPCPP_SYCL_EXTERNAL float __spirv_RoundFToTF32INTEL(float a); + +template +extern __DPCPP_SYCL_EXTERNAL + __spv::__spirv_CooperativeMatrixKHR * + __spirv_CooperativeMatrixLoadKHR(T *Ptr, __spv::MatrixLayout Layout = L, + std::size_t Stride = 0, + int MemOperand = 0); + +template +extern __DPCPP_SYCL_EXTERNAL void __spirv_CooperativeMatrixStoreKHR( + T *Ptr, __spv::__spirv_CooperativeMatrixKHR *Object, + __spv::MatrixLayout Layout = L, std::size_t Stride = 0, int MemOperand = 0); + +template +extern __DPCPP_SYCL_EXTERNAL size_t __spirv_CooperativeMatrixLengthKHR( + __spv::__spirv_CooperativeMatrixKHR *); + +template +extern __DPCPP_SYCL_EXTERNAL + __spv::__spirv_CooperativeMatrixKHR * + __spirv_CooperativeMatrixConstructCheckedINTEL(const T Value, size_t Height, + size_t Stride, size_t Width, + size_t CoordX, + size_t CoordY); + +template +extern __DPCPP_SYCL_EXTERNAL + __spv::__spirv_CooperativeMatrixKHR * + __spirv_CooperativeMatrixLoadCheckedINTEL(T *Ptr, std::size_t Stride, + size_t Height, size_t Width, + size_t CoordX, size_t CoordY, + __spv::MatrixLayout Layout = L, + int MemOperand = 0); + +template +extern __DPCPP_SYCL_EXTERNAL void __spirv_CooperativeMatrixStoreCheckedINTEL( + T *Ptr, __spv::__spirv_CooperativeMatrixKHR *Object, + std::size_t Stride, size_t Height, size_t Width, size_t CoordX, + size_t CoordY, __spv::MatrixLayout Layout = L, int MemOperand = 0); + +template +extern __DPCPP_SYCL_EXTERNAL + __spv::__spirv_CooperativeMatrixKHR * + __spirv_CooperativeMatrixMulAddKHR( + __spv::__spirv_CooperativeMatrixKHR *A, + __spv::__spirv_CooperativeMatrixKHR *B, + __spv::__spirv_CooperativeMatrixKHR *C, + size_t Operands = 0); + +template +extern __DPCPP_SYCL_EXTERNAL + __spv::__spirv_CooperativeMatrixKHR * + __spirv_CompositeConstruct(const T v); + +// TODO: replace with __spirv_CooperativeMatrixGetElementCoordINTEL when ready +template +extern __DPCPP_SYCL_EXTERNAL __ocl_vec_t +__spirv_JointMatrixGetElementCoordINTEL( + __spv::__spirv_CooperativeMatrixKHR *, size_t i); + +// AccessChain followed by load/store serves to extract/insert and element +// from/to the matrix +template +extern __DPCPP_SYCL_EXTERNAL Ts * +__spirv_AccessChain(__spv::__spirv_CooperativeMatrixKHR **, + size_t i); + +template +extern __DPCPP_SYCL_EXTERNAL + __spv::__spirv_CooperativeMatrixKHR * + __spirv_CooperativeMatrixConstructCheckedINTEL(int32_t CoordX, + int32_t CoordY, + uint32_t Height, + uint32_t Width, + const T Value); + +template +extern __DPCPP_SYCL_EXTERNAL + __spv::__spirv_CooperativeMatrixKHR * + __spirv_CooperativeMatrixLoadCheckedINTEL( + T *Ptr, int32_t CoordX, int32_t CoordY, __spv::MatrixLayout Layout = L, + uint32_t Height = 0, uint32_t Width = 0, std::size_t Stride = 0, + int MemOperand = 0); + +template +extern __DPCPP_SYCL_EXTERNAL void __spirv_CooperativeMatrixStoreCheckedINTEL( + T *Ptr, int32_t CoordX, int32_t CoordY, + __spv::__spirv_CooperativeMatrixKHR *Object, + __spv::MatrixLayout Layout = L, uint32_t Height = 0, uint32_t Width = 0, + std::size_t Stride = 0, int MemOperand = 0); + +template +extern __DPCPP_SYCL_EXTERNAL void __spirv_CooperativeMatrixPrefetchINTEL( + T *Ptr, uint32_t NumRows, uint32_t NumCols, unsigned int CacheLevel, + __spv::MatrixLayout Layout, size_t Stride); + +#endif diff --git a/sycl/include/sycl/__spirv/spirv_ops_runtime.hpp b/sycl/include/sycl/__spirv/spirv_ops_runtime.hpp new file mode 100644 index 0000000000000..774d79d89010f --- /dev/null +++ b/sycl/include/sycl/__spirv/spirv_ops_runtime.hpp @@ -0,0 +1,127 @@ +//==------ spirv_ops_runtime.hpp --- SPIRV runtime and misc operations ----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include + +#include +#include + +#ifdef __SYCL_DEVICE_ONLY__ + +extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void +__clc_BarrierInitialize(int64_t *state, int32_t expected_count) noexcept; + +extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void +__clc_BarrierInvalidate(int64_t *state) noexcept; + +extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT int64_t +__clc_BarrierArrive(int64_t *state) noexcept; + +extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT int64_t +__clc_BarrierArriveAndDrop(int64_t *state) noexcept; + +extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT int64_t +__clc_BarrierArriveNoComplete(int64_t *state, int32_t count) noexcept; + +extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT int64_t +__clc_BarrierArriveAndDropNoComplete(int64_t *state, int32_t count) noexcept; + +extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void +__clc_BarrierCopyAsyncArrive(int64_t *state) noexcept; + +extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void +__clc_BarrierCopyAsyncArriveNoInc(int64_t *state) noexcept; + +__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void +__clc_BarrierWait(int64_t *state, int64_t arrival) noexcept; + +extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT bool +__clc_BarrierTestWait(int64_t *state, int64_t arrival) noexcept; + +__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT void +__clc_BarrierArriveAndWait(int64_t *state) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL int +__spirv_ocl_printf(const __attribute__((opencl_constant)) char *Format, + Args... args); +template +extern __DPCPP_SYCL_EXTERNAL int __spirv_ocl_printf(const char *Format, + Args... args); + +// FIXME: __clc symbols are intended to be internal symbols to libclc/libspirv +// and should not be relied upon externally; consider them deprecated. We can't, +// however, explicitly declare __spirv_ocl versions of these builtins as that +// interferes with the implicit declarations provided by clang. This results in +// legitimate calls being seen as ambiguous and causing errors. Since these +// symbols are intended to expose native versions of bfloat16 builtins for +// NVPTX, we should probably just be exposing builtins with actual bfloat16 +// types, not unsigned integer types. +#define __CLC_BF16(...) \ + extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fabs( \ + __VA_ARGS__) noexcept; \ + extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fmin( \ + __VA_ARGS__, __VA_ARGS__) noexcept; \ + extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fmax( \ + __VA_ARGS__, __VA_ARGS__) noexcept; \ + extern __DPCPP_SYCL_EXTERNAL __SYCL_EXPORT __VA_ARGS__ __clc_fma( \ + __VA_ARGS__, __VA_ARGS__, __VA_ARGS__) noexcept; + +#define __CLC_BF16_SCAL_VEC(TYPE) \ + __CLC_BF16(TYPE) \ + __CLC_BF16(__ocl_vec_t) \ + __CLC_BF16(__ocl_vec_t) \ + __CLC_BF16(__ocl_vec_t) \ + __CLC_BF16(__ocl_vec_t) \ + __CLC_BF16(__ocl_vec_t) + +__CLC_BF16_SCAL_VEC(uint16_t) +__CLC_BF16_SCAL_VEC(uint32_t) + +#undef __CLC_BF16_SCAL_VEC +#undef __CLC_BF16 + +extern __DPCPP_SYCL_EXTERNAL int32_t __spirv_BuiltInGlobalHWThreadIDINTEL(); +extern __DPCPP_SYCL_EXTERNAL int32_t __spirv_BuiltInSubDeviceIDINTEL(); +extern __DPCPP_SYCL_EXTERNAL uint64_t __spirv_ReadClockKHR(int); + +template +extern __DPCPP_SYCL_EXTERNAL + std::enable_if_t && std::is_unsigned_v, to> + __spirv_ConvertPtrToU(from val) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL std::pair<__ocl_vec_t, __ocl_vec_t> +__spirv_IAddCarry(__ocl_vec_t src0, __ocl_vec_t src1); + +template +extern __DPCPP_SYCL_EXTERNAL std::pair<__ocl_vec_t, __ocl_vec_t> +__spirv_ISubBorrow(__ocl_vec_t src0, __ocl_vec_t src1); + +template +extern __DPCPP_SYCL_EXTERNAL __spv::__spirv_TaskSequenceINTEL * +__spirv_TaskSequenceCreateINTEL(RetT (*f)(ArgsT...), int Pipelined = -1, + int ClusterMode = -1, + unsigned int ResponseCapacity = 0, + unsigned int InvocationCapacity = 0) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL void +__spirv_TaskSequenceAsyncINTEL(__spv::__spirv_TaskSequenceINTEL *TaskSequence, + ArgsT... Args) noexcept; + +template +extern __DPCPP_SYCL_EXTERNAL RetT __spirv_TaskSequenceGetINTEL( + __spv::__spirv_TaskSequenceINTEL *TaskSequence) noexcept; + +extern __DPCPP_SYCL_EXTERNAL void __spirv_TaskSequenceReleaseINTEL( + __spv::__spirv_TaskSequenceINTEL *TaskSequence) noexcept; + +#endif diff --git a/sycl/include/sycl/__spirv/spirv_ops_subgroup.hpp b/sycl/include/sycl/__spirv/spirv_ops_subgroup.hpp new file mode 100644 index 0000000000000..9211d43884763 --- /dev/null +++ b/sycl/include/sycl/__spirv/spirv_ops_subgroup.hpp @@ -0,0 +1,55 @@ +//==------ spirv_ops_subgroup.hpp --- SPIRV subgroup operations -----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include + +#ifdef __SYCL_DEVICE_ONLY__ + +template +__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL dataT +__spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_global)) + uint8_t *Ptr) noexcept; + +template +__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL dataT +__spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_global)) + uint16_t *Ptr) noexcept; + +template +__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL dataT +__spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_global)) + uint32_t *Ptr) noexcept; + +template +__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL dataT +__spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_global)) + uint64_t *Ptr) noexcept; + +template +__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL dataT +__spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_local)) + uint8_t *Ptr) noexcept; + +template +__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL dataT +__spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_local)) + uint16_t *Ptr) noexcept; + +template +__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL dataT +__spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_local)) + uint32_t *Ptr) noexcept; + +template +__SYCL_CONVERGENT__ extern __DPCPP_SYCL_EXTERNAL dataT +__spirv_SubgroupBlockReadINTEL(const __attribute__((opencl_local)) + uint64_t *Ptr) noexcept; + +#endif diff --git a/sycl/include/sycl/atomic.hpp b/sycl/include/sycl/atomic.hpp index 5b6e3c5c6c85e..5e9e3b13dac52 100644 --- a/sycl/include/sycl/atomic.hpp +++ b/sycl/include/sycl/atomic.hpp @@ -8,7 +8,7 @@ #pragma once -#include +#include #include // for Scope, MemorySemanticsMask #include #include diff --git a/sycl/include/sycl/detail/image_ocl_types.hpp b/sycl/include/sycl/detail/image_ocl_types.hpp index b81f74aa656f2..1880c7e520c54 100644 --- a/sycl/include/sycl/detail/image_ocl_types.hpp +++ b/sycl/include/sycl/detail/image_ocl_types.hpp @@ -32,7 +32,7 @@ #include #include -#include +#include namespace sycl { inline namespace _V1 { diff --git a/sycl/include/sycl/detail/spirv.hpp b/sycl/include/sycl/detail/spirv.hpp index 3dd5ed5bd161e..8f97fa8566f77 100644 --- a/sycl/include/sycl/detail/spirv.hpp +++ b/sycl/include/sycl/detail/spirv.hpp @@ -14,7 +14,7 @@ // __spirv_AtomicStore(unsigned long long*, ...) // Therefore, we need the following include to get forward-declarations of those // versions. -#include +#include #include #include diff --git a/sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp b/sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp index d39b0b9660f41..e3623cec2f984 100644 --- a/sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp +++ b/sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp @@ -20,7 +20,7 @@ #include #ifdef __SYCL_DEVICE_ONLY__ -#include +#include #endif #include diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/tfloat32.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/tfloat32.hpp index c418f33646ea6..2a9750abead67 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/tfloat32.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/tfloat32.hpp @@ -10,9 +10,10 @@ #pragma once -#include #include +#include + namespace sycl { inline namespace _V1 { namespace ext { diff --git a/sycl/include/sycl/ext/oneapi/bf16_storage_builtins.hpp b/sycl/include/sycl/ext/oneapi/bf16_storage_builtins.hpp index c15418273e7aa..caf1e1d2d731a 100644 --- a/sycl/include/sycl/ext/oneapi/bf16_storage_builtins.hpp +++ b/sycl/include/sycl/ext/oneapi/bf16_storage_builtins.hpp @@ -8,7 +8,7 @@ #pragma once -#include +#include #include #include #include diff --git a/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp b/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp index 3fbc29fd8c4e0..25d5c4faaa217 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/bfloat16_math.hpp @@ -8,7 +8,7 @@ #pragma once -#include +#include #include // for sycl::bit_cast #include // for ceil, cos, exp, exp10, exp2 #include // sycl::detail::memcpy diff --git a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp index 03ba6f818eee2..86e32968d1287 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/builtins.hpp @@ -8,7 +8,7 @@ #pragma once -#include +#include #include // for half #include // for to_vec2 #include // for __SYCL_ALWAYS_INLINE diff --git a/sycl/include/sycl/ext/oneapi/experimental/clock.hpp b/sycl/include/sycl/ext/oneapi/experimental/clock.hpp index f0cf05b2b3bd9..849bb5c6fb5e6 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/clock.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/clock.hpp @@ -8,7 +8,7 @@ #pragma once -#include +#include #include #include diff --git a/sycl/include/sycl/ext/oneapi/experimental/cuda/barrier.hpp b/sycl/include/sycl/ext/oneapi/experimental/cuda/barrier.hpp index 4c473fb88a8f1..a1cfbb5060262 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/cuda/barrier.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/cuda/barrier.hpp @@ -8,7 +8,7 @@ #pragma once -#include // for __clc_BarrierInitialize +#include // for __clc_BarrierInitialize #include #include // for int32_t, int64_t, uint32_t, uint64_t diff --git a/sycl/include/sycl/stl_wrappers/complex b/sycl/include/sycl/stl_wrappers/complex index c133ced813bbb..861784f33f109 100644 --- a/sycl/include/sycl/stl_wrappers/complex +++ b/sycl/include/sycl/stl_wrappers/complex @@ -38,7 +38,7 @@ #include -#include // for __SYCL_CONVERGENT__ +#include // for __SYCL_CONVERGENT__ #include // for half // We provide std::complex specializations here for the following: diff --git a/sycl/include/sycl/sub_group.hpp b/sycl/include/sycl/sub_group.hpp index 466b8ada9e78c..2c9cc11f28fc0 100644 --- a/sycl/include/sycl/sub_group.hpp +++ b/sycl/include/sycl/sub_group.hpp @@ -8,7 +8,8 @@ #pragma once -#include +#include +#include #include // for __SYCL_DEPRECATED #include // for id #include // for memory_scope diff --git a/sycl/test/include_deps/sycl_khr_includes_atomic.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_atomic.hpp.cpp index 577ea802c3104..3d062b3cd5365 100644 --- a/sycl/test/include_deps/sycl_khr_includes_atomic.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_atomic.hpp.cpp @@ -11,7 +11,9 @@ // CHECK-NEXT: atomic_fence.hpp // CHECK-NEXT: memory_enums.hpp // CHECK-NEXT: detail/spirv.hpp -// CHECK-NEXT: __spirv/spirv_ops.hpp +// CHECK-NEXT: __spirv/spirv_ops_atomic.hpp +// CHECK-NEXT: __spirv/spirv_ops_builtin_decls.hpp +// CHECK-NEXT: __spirv/spirv_ops_base.hpp // CHECK-NEXT: __spirv/spirv_types.hpp // CHECK-NEXT: detail/defines.hpp // CHECK-NEXT: detail/export.hpp diff --git a/sycl/test/include_deps/sycl_khr_includes_group_algorithms.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_group_algorithms.hpp.cpp index 7936929596a17..0166b1f265e6b 100644 --- a/sycl/test/include_deps/sycl_khr_includes_group_algorithms.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_group_algorithms.hpp.cpp @@ -58,7 +58,9 @@ // CHECK-NEXT: nd_range.hpp // CHECK-NEXT: ext/oneapi/functional.hpp // CHECK-NEXT: detail/spirv.hpp -// CHECK-NEXT: __spirv/spirv_ops.hpp +// CHECK-NEXT: __spirv/spirv_ops_atomic.hpp +// CHECK-NEXT: __spirv/spirv_ops_builtin_decls.hpp +// CHECK-NEXT: __spirv/spirv_ops_base.hpp // CHECK-NEXT: multi_ptr.hpp // CHECK-NEXT: detail/address_space_cast.hpp // CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_includes_groups.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_groups.hpp.cpp index 931fed70bc7f1..9dd46646844b2 100644 --- a/sycl/test/include_deps/sycl_khr_includes_groups.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_groups.hpp.cpp @@ -58,9 +58,12 @@ // CHECK-NEXT: nd_range.hpp // CHECK-NEXT: ext/oneapi/functional.hpp // CHECK-NEXT: detail/spirv.hpp -// CHECK-NEXT: __spirv/spirv_ops.hpp +// CHECK-NEXT: __spirv/spirv_ops_atomic.hpp +// CHECK-NEXT: __spirv/spirv_ops_builtin_decls.hpp +// CHECK-NEXT: __spirv/spirv_ops_base.hpp // CHECK-NEXT: multi_ptr.hpp // CHECK-NEXT: detail/address_space_cast.hpp // CHECK-NEXT: group_barrier.hpp // CHECK-NEXT: sub_group.hpp +// CHECK-NEXT: __spirv/spirv_ops_subgroup.hpp // CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_includes_images.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_images.hpp.cpp index 07dfaf2272174..9a6431e6ffd48 100644 --- a/sycl/test/include_deps/sycl_khr_includes_images.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_images.hpp.cpp @@ -110,7 +110,9 @@ // CHECK-NEXT: detail/named_swizzles_mixin.hpp // CHECK-NEXT: detail/memcpy.hpp // CHECK-NEXT: detail/image_ocl_types.hpp -// CHECK-NEXT: __spirv/spirv_ops.hpp +// CHECK-NEXT: __spirv/spirv_ops_image.hpp +// CHECK-NEXT: __spirv/spirv_ops_builtin_decls.hpp +// CHECK-NEXT: __spirv/spirv_ops_base.hpp // CHECK-NEXT: properties/image_properties.hpp // CHECK-NEXT: properties/image_properties.def // CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp index 6775a70dbd859..10f734bcf521e 100644 --- a/sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_reduction.hpp.cpp @@ -96,7 +96,9 @@ // CHECK-NEXT: usm/usm_enums.hpp // CHECK-NEXT: properties/buffer_properties.def // CHECK-NEXT: atomic.hpp -// CHECK-NEXT: __spirv/spirv_ops.hpp +// CHECK-NEXT: __spirv/spirv_ops_atomic.hpp +// CHECK-NEXT: __spirv/spirv_ops_builtin_decls.hpp +// CHECK-NEXT: __spirv/spirv_ops_base.hpp // CHECK-NEXT: atomic_ref.hpp // CHECK-NEXT: ext/oneapi/experimental/address_cast.hpp // CHECK-NEXT: detail/spirv.hpp diff --git a/sycl/test/include_deps/sycl_khr_includes_stream.hpp.cpp b/sycl/test/include_deps/sycl_khr_includes_stream.hpp.cpp index 8b69fc29fe202..2ed0ef4c90712 100644 --- a/sycl/test/include_deps/sycl_khr_includes_stream.hpp.cpp +++ b/sycl/test/include_deps/sycl_khr_includes_stream.hpp.cpp @@ -94,7 +94,9 @@ // CHECK-NEXT: usm/usm_enums.hpp // CHECK-NEXT: properties/buffer_properties.def // CHECK-NEXT: atomic.hpp -// CHECK-NEXT: __spirv/spirv_ops.hpp +// CHECK-NEXT: __spirv/spirv_ops_atomic.hpp +// CHECK-NEXT: __spirv/spirv_ops_builtin_decls.hpp +// CHECK-NEXT: __spirv/spirv_ops_base.hpp // CHECK-NEXT: builtins.hpp // CHECK-NEXT: detail/builtins/builtins.hpp // CHECK-NEXT: detail/vector_core.hpp