From 1ece36c505d8614dc5ab6bb5effecc8784cd7aa3 Mon Sep 17 00:00:00 2001 From: "Jiang, Zhiwei" Date: Thu, 24 Apr 2025 13:33:57 +0800 Subject: [PATCH 1/4] [SYCLomatic] Fix migration issue for free-function-queries Signed-off-by: Jiang, Zhiwei --- clang/lib/DPCT/AnalysisInfo.cpp | 31 +++++++++++++++++++++++----- clang/test/dpct/with_this_nd_item.cu | 13 ++++++++++++ 2 files changed, 39 insertions(+), 5 deletions(-) diff --git a/clang/lib/DPCT/AnalysisInfo.cpp b/clang/lib/DPCT/AnalysisInfo.cpp index 67fe389b928e..5a5856a40202 100644 --- a/clang/lib/DPCT/AnalysisInfo.cpp +++ b/clang/lib/DPCT/AnalysisInfo.cpp @@ -7512,11 +7512,32 @@ void FreeQueriesInfo::printImmediateText(llvm::raw_ostream &OS, const Node *S, return Info->printImmediateText(OS, S->getBeginLoc(), K); } -#ifdef DPCT_DEBUG_BUILD - llvm::errs() << "Can not get FreeQueriesInfo for this FunctionDecl\n"; - assert(0); -#endif // DPCT_DEBUG_BUILD - + auto DFI = DeviceFunctionDecl::LinkRedecls(FD); + if (!DFI) + return; + auto Index = DpctGlobalInfo::getCudaKernelDimDFIIndexThenInc(); + DpctGlobalInfo::insertCudaKernelDimDFIMap(Index, DFI); + switch (K) { + case FreeQueriesKind::NdItem: { + OS << MapNames::getClNamespace() + << "ext::oneapi::this_work_item::get_nd_item<{{NEEDREPLACEG" + + std::to_string(Index) + "}}>()"; + break; + } + case FreeQueriesKind::Group: { + OS << MapNames::getClNamespace() + << "ext::oneapi::this_work_item::get_work_group<{{NEEDREPLACEG" + + std::to_string(Index) + "}}>()"; + break; + } + case FreeQueriesKind::SubGroup: { + OS << MapNames::getClNamespace() + << "ext::oneapi::this_work_item::get_sub_group()"; + break; + } + default: + llvm_unreachable("Unexpected FreeQueriesKind"); + } } else { if (auto DFI = DeviceFunctionDecl::LinkRedecls(FD)) DFI->setItem(); diff --git a/clang/test/dpct/with_this_nd_item.cu b/clang/test/dpct/with_this_nd_item.cu index 92792245ae1c..fef60784e82a 100644 --- a/clang/test/dpct/with_this_nd_item.cu +++ b/clang/test/dpct/with_this_nd_item.cu @@ -87,3 +87,16 @@ int main() { test3<<<32,32>>>(); } + +struct TEST4 { + // CHECK: TEST4(int a) : thread_idx(sycl::ext::oneapi::this_work_item::get_nd_item<1>().get_local_id(0)) {} + __device__ TEST4(int a) : thread_idx(threadIdx.x) {} + int thread_idx; +}; + +// CHECK: int test5(const int ct, int numLane = 0) { +// CHECK-NEXT: if (!numLane) numLane = sycl::ext::oneapi::this_work_item::get_sub_group().get_local_range().get(0); +__device__ int test5(const int ct, const int numLane = warpSize) { + int r = ct * numLane; + return r; +} From aec62ae8ca0d05c21dff772a44a8fb97499956ab Mon Sep 17 00:00:00 2001 From: "Jiang, Zhiwei" Date: Mon, 28 Apr 2025 11:01:16 +0800 Subject: [PATCH 2/4] Refine Signed-off-by: Jiang, Zhiwei --- clang/lib/DPCT/AnalysisInfo.cpp | 50 +++++++++++++-------------------- 1 file changed, 19 insertions(+), 31 deletions(-) diff --git a/clang/lib/DPCT/AnalysisInfo.cpp b/clang/lib/DPCT/AnalysisInfo.cpp index 5a5856a40202..ee733c4c2a41 100644 --- a/clang/lib/DPCT/AnalysisInfo.cpp +++ b/clang/lib/DPCT/AnalysisInfo.cpp @@ -326,16 +326,22 @@ class FreeQueriesInfo { static const FreeQueriesNames &getNames(FreeQueriesKind); static std::shared_ptr getInfo(const FunctionDecl *); - static void printFreeQueriesFunctionName(llvm::raw_ostream &OS, - FreeQueriesKind K, - unsigned Dimension) { + template + static void printFreeQueriesFunctionName( + llvm::raw_ostream &OS, FreeQueriesKind K, T Dimension, + typename std::enable_if || + std::is_same_v>::type * = 0) { OS << getNames(K).FreeQueriesFuncName; if (K != FreeQueriesKind::SubGroup) { OS << '<'; - if (Dimension) { - OS << Dimension; + if constexpr (std::is_same_v) { + if (Dimension) { + OS << Dimension; + } else { + OS << "dpct_placeholder /* Fix the dimension manually */"; + } } else { - OS << "dpct_placeholder /* Fix the dimension manually */"; + OS << Dimension; } OS << '>'; } @@ -7517,27 +7523,8 @@ void FreeQueriesInfo::printImmediateText(llvm::raw_ostream &OS, const Node *S, return; auto Index = DpctGlobalInfo::getCudaKernelDimDFIIndexThenInc(); DpctGlobalInfo::insertCudaKernelDimDFIMap(Index, DFI); - switch (K) { - case FreeQueriesKind::NdItem: { - OS << MapNames::getClNamespace() - << "ext::oneapi::this_work_item::get_nd_item<{{NEEDREPLACEG" + - std::to_string(Index) + "}}>()"; - break; - } - case FreeQueriesKind::Group: { - OS << MapNames::getClNamespace() - << "ext::oneapi::this_work_item::get_work_group<{{NEEDREPLACEG" + - std::to_string(Index) + "}}>()"; - break; - } - case FreeQueriesKind::SubGroup: { - OS << MapNames::getClNamespace() - << "ext::oneapi::this_work_item::get_sub_group()"; - break; - } - default: - llvm_unreachable("Unexpected FreeQueriesKind"); - } + printFreeQueriesFunctionName( + OS, K, "{{NEEDREPLACEG" + std::to_string(Index) + "}}"); } else { if (auto DFI = DeviceFunctionDecl::LinkRedecls(FD)) DFI->setItem(); @@ -7597,7 +7584,7 @@ void FreeQueriesInfo::emplaceExtraDecl() { auto &KindNames = getNames(static_cast(FreeQueriesKind::NdItem)); OS << "auto " << KindNames.ExtraVariableName << " = "; - printFreeQueriesFunctionName( + printFreeQueriesFunctionName( OS, static_cast(FreeQueriesKind::NdItem), Dimension); OS << ';' << NL << Indent; } @@ -7611,8 +7598,8 @@ std::string FreeQueriesInfo::getReplaceString(unsigned Num) { bool IsMacro = isMacro(Num); if (IsMacro) { if (Index < MacroInfos.size()) { - return buildStringFromPrinter(printFreeQueriesFunctionName, Kind, - MacroInfos[Index]->Dimension); + return buildStringFromPrinter(printFreeQueriesFunctionName, + Kind, MacroInfos[Index]->Dimension); } #ifdef DPCT_DEBUG_BUILD llvm::errs() << "FreeQueriesInfo index[" << Index @@ -7632,7 +7619,8 @@ std::string FreeQueriesInfo::getReplaceString(unsigned Num) { std::string FreeQueriesInfo::getReplaceString(FreeQueriesKind K) { if (K != FreeQueriesKind::NdItem || Counter[K] < 2) - return buildStringFromPrinter(printFreeQueriesFunctionName, K, Dimension); + return buildStringFromPrinter(printFreeQueriesFunctionName, K, + Dimension); else return getNames(K).ExtraVariableName; } From 22acd3b94d015d2902d6a8410391c3b726b2ead5 Mon Sep 17 00:00:00 2001 From: "Jiang, Zhiwei" Date: Mon, 28 Apr 2025 11:29:59 +0800 Subject: [PATCH 3/4] Fix Signed-off-by: Jiang, Zhiwei --- clang/lib/DPCT/AnalysisInfo.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/DPCT/AnalysisInfo.cpp b/clang/lib/DPCT/AnalysisInfo.cpp index ee733c4c2a41..1876eb53bde1 100644 --- a/clang/lib/DPCT/AnalysisInfo.cpp +++ b/clang/lib/DPCT/AnalysisInfo.cpp @@ -327,10 +327,10 @@ class FreeQueriesInfo { static const FreeQueriesNames &getNames(FreeQueriesKind); static std::shared_ptr getInfo(const FunctionDecl *); template - static void printFreeQueriesFunctionName( - llvm::raw_ostream &OS, FreeQueriesKind K, T Dimension, - typename std::enable_if || - std::is_same_v>::type * = 0) { + static typename std::enable_if || + std::is_same_v>::type + printFreeQueriesFunctionName(llvm::raw_ostream &OS, FreeQueriesKind K, + T Dimension) { OS << getNames(K).FreeQueriesFuncName; if (K != FreeQueriesKind::SubGroup) { OS << '<'; From 085ca9206bf8e568d381d7274cc1ba049251cc2c Mon Sep 17 00:00:00 2001 From: "Jiang, Zhiwei" Date: Tue, 29 Apr 2025 10:10:23 +0800 Subject: [PATCH 4/4] Fix Signed-off-by: Jiang, Zhiwei --- clang/lib/DPCT/AnalysisInfo.cpp | 5 +- .../DPCT/RuleInfra/CallExprRewriterCommon.h | 2 - clang/test/dpct/builtin_warpSize.cu | 24 ++++---- clang/test/dpct/macro_test.cu | 42 +++++++------ clang/test/dpct/macro_test.h | 3 +- clang/test/dpct/template-kernel-call.cu | 59 ++++++++++--------- .../template_src/template_explicit/test.hpp | 4 +- .../template_src/template_explicit/test2.cu | 2 +- 8 files changed, 76 insertions(+), 65 deletions(-) diff --git a/clang/lib/DPCT/AnalysisInfo.cpp b/clang/lib/DPCT/AnalysisInfo.cpp index 1876eb53bde1..a82c96edda44 100644 --- a/clang/lib/DPCT/AnalysisInfo.cpp +++ b/clang/lib/DPCT/AnalysisInfo.cpp @@ -7554,6 +7554,7 @@ void FreeQueriesInfo::printImmediateText(llvm::raw_ostream &OS, (*Iter)->Infos.push_back(Idx); Index = Iter - MacroInfos.begin(); } else { + IsMacro = false; auto SLocInfo = DpctGlobalInfo::getLocInfo(SL); if (SLocInfo.first != FilePath) return; @@ -7603,7 +7604,7 @@ std::string FreeQueriesInfo::getReplaceString(unsigned Num) { } #ifdef DPCT_DEBUG_BUILD llvm::errs() << "FreeQueriesInfo index[" << Index - << "]is larger than list size[" << InfoList.size() << "]\n"; + << "] is larger than list size[" << MacroInfos.size() << "]\n"; assert(0); #endif // DPCT_DEBUG_BUILD } @@ -7611,7 +7612,7 @@ std::string FreeQueriesInfo::getReplaceString(unsigned Num) { return InfoList[Index]->getReplaceString(getKind(Num)); #ifdef DPCT_DEBUG_BUILD llvm::errs() << "FreeQueriesInfo index[" << Index - << "]is larger than list size[" << InfoList.size() << "]\n"; + << "] is larger than list size[" << InfoList.size() << "]\n"; assert(0); #endif // DPCT_DEBUG_BUILD return ""; diff --git a/clang/lib/DPCT/RuleInfra/CallExprRewriterCommon.h b/clang/lib/DPCT/RuleInfra/CallExprRewriterCommon.h index fe0a8cfb1485..54798437ef5e 100644 --- a/clang/lib/DPCT/RuleInfra/CallExprRewriterCommon.h +++ b/clang/lib/DPCT/RuleInfra/CallExprRewriterCommon.h @@ -149,8 +149,6 @@ template class CastIfNotSameExprPrinter { clang::QualType ArgType = InputArg->getType().getCanonicalType(); ArgType.removeLocalFastQualifiers(clang::Qualifiers::CVRMask); bool NeedParen = false; - std::cout << "Arg type: " << ArgType.getAsString() << "\n"; - std::cout << "Given type " << TypeInfo << "\n"; if (ArgType.getAsString() != TypeInfo) { NeedParen = needExtraParens(SubExpr); Stream << "(" << TypeInfo << ")"; diff --git a/clang/test/dpct/builtin_warpSize.cu b/clang/test/dpct/builtin_warpSize.cu index bb4f63bf51f8..e5b773dfd3d4 100644 --- a/clang/test/dpct/builtin_warpSize.cu +++ b/clang/test/dpct/builtin_warpSize.cu @@ -1,4 +1,4 @@ -// RUN: dpct --no-dpcpp-extensions=free-function-queries --format-range=none -out-root %T/builtin_warpSize %s --cuda-include-path="%cuda-path/include" -- -x cuda --cuda-host-only +// RUN: dpct --format-range=none -out-root %T/builtin_warpSize %s --cuda-include-path="%cuda-path/include" -- -x cuda --cuda-host-only // RUN: FileCheck --input-file %T/builtin_warpSize/builtin_warpSize.dp.cpp --match-full-lines %s // RUN: %if build_lit %{icpx -c -fsycl %T/builtin_warpSize/builtin_warpSize.dp.cpp -o %T/builtin_warpSize/builtin_warpSize.dp.o %} @@ -8,7 +8,7 @@ __global__ void foo(){ - // CHECK: int a = item_ct1.get_sub_group().get_local_range().get(0); + // CHECK: int a = sycl::ext::oneapi::this_work_item::get_sub_group().get_local_range().get(0); // CHECK-NEXT: int warpSize = 1; // CHECK-NEXT: warpSize = 2; // CHECK-NEXT: int c= warpSize; @@ -18,8 +18,8 @@ __global__ void foo(){ int c= warpSize; } -// CHECK: void bar(const sycl::nd_item<3> &item_ct1){ -// CHECK-NEXT: int a = sycl::max((int)item_ct1.get_sub_group().get_local_range().get(0), 0); +// CHECK: void bar(){ +// CHECK-NEXT: int a = sycl::max((int)sycl::ext::oneapi::this_work_item::get_sub_group().get_local_range().get(0), 0); // CHECK-NEXT: int warpSize = 1; // CHECK-NEXT: int b = sycl::max(warpSize, 0); // CHECK-NEXT: } @@ -29,8 +29,8 @@ __global__ void bar(){ int b = max(warpSize, 0); } -// CHECK: int tensorPos(const int ct, const sycl::nd_item<3> &item_ct1, int numLane = 0) { -// CHECK-NEXT: if (!numLane) numLane = item_ct1.get_sub_group().get_local_range().get(0); +// CHECK: int tensorPos(const int ct, int numLane = 0) { +// CHECK-NEXT: if (!numLane) numLane = sycl::ext::oneapi::this_work_item::get_sub_group().get_local_range().get(0); // CHECK-NEXT: int r = ct * numLane; // CHECK-NEXT: return r; // CHECK-NEXT: } @@ -39,18 +39,18 @@ __device__ int tensorPos(const int ct, const int numLane = warpSize) { return r; } -// CHECK: int tensorPos(const int ct, const sycl::nd_item<3> &item_ct1, int numLane); +// CHECK: int tensorPos(const int ct, int numLane); __device__ int tensorPos(const int ct, const int numLane); -// CHECK: int tensorPos2(const int ct, const sycl::nd_item<3> &item_ct1, int numLane); +// CHECK: int tensorPos2(const int ct, int numLane); __device__ int tensorPos2(const int ct, const int numLane); -// CHECK: int tensorPos2(const int ct, const sycl::nd_item<3> &item_ct1, int numLane) { -// CHECK-NEXT: if (!numLane) numLane = item_ct1.get_sub_group().get_local_range().get(0); +// CHECK: int tensorPos2(const int ct, int numLane) { +// CHECK-NEXT: if (!numLane) numLane = sycl::ext::oneapi::this_work_item::get_sub_group().get_local_range().get(0); // CHECK-NEXT: int r = ct * numLane; // CHECK-NEXT: return r; // CHECK-NEXT: } @@ -59,9 +59,9 @@ __device__ int tensorPos2(const int ct, const int numLane) { return r; } -// CHECK: int tensorPos2(const int ct, const sycl::nd_item<3> &item_ct1, int numLane = 0); +// CHECK: int tensorPos2(const int ct, int numLane = 0); __device__ int tensorPos2(const int ct, const int numLane = warpSize); -// CHECK: int tensorPos3(const int ct, const sycl::nd_item<3> &item_ct1, int numLane = 0) {} +// CHECK: int tensorPos3(const int ct, int numLane = 0) {} __device__ int tensorPos3(const int ct, const int numLane = warpSize) {} \ No newline at end of file diff --git a/clang/test/dpct/macro_test.cu b/clang/test/dpct/macro_test.cu index 9c7120240084..abe0c9c6af2a 100644 --- a/clang/test/dpct/macro_test.cu +++ b/clang/test/dpct/macro_test.cu @@ -5,7 +5,7 @@ // RUN: cd %T // RUN: rm -rf %T/macro_test_output // RUN: mkdir %T/macro_test_output -// RUN: dpct --no-dpcpp-extensions=free-function-queries -out-root %T/macro_test_output macro_test.cu --cuda-include-path="%cuda-path/include" -- -x cuda --cuda-host-only +// RUN: dpct -out-root %T/macro_test_output macro_test.cu --cuda-include-path="%cuda-path/include" -- -x cuda --cuda-host-only // RUN: FileCheck --input-file %T/macro_test_output/macro_test.dp.cpp --match-full-lines macro_test.cu // RUN: %if build_lit %{icpx -c -fsycl -DNO_BUILD_TEST %T/macro_test_output/macro_test.dp.cpp -o %T/macro_test_output/macro_test.dp.o %} // RUN: FileCheck --input-file %T/macro_test_output/macro_test.h --match-full-lines macro_test.h @@ -67,8 +67,8 @@ __global__ void foo_kernel() {} //CHECK-NEXT: #ifdef MACRO_CC //CHECK-NEXT: , int c //CHECK-NEXT: #endif -//CHECK-NEXT: , const sycl::nd_item<3> &item_ct1) { -//CHECK-NEXT: int x = item_ct1.get_group(2); +//CHECK-NEXT: ) { +//CHECK-NEXT: int x = sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(2); //CHECK-NEXT: } __global__ void foo_kernel2(int a, int b #ifdef MACRO_CC @@ -288,7 +288,7 @@ int b; //CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 2) * sycl::range<3>(1, 1, 2), //CHECK-NEXT: sycl::range<3>(1, 1, 2)), //CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { - //CHECK-NEXT: foo_kernel2(3, 3, item_ct1); + //CHECK-NEXT: foo_kernel2(3, 3); //CHECK-NEXT: }); foo_kernel2<<<2, 2, 0>>>(3,3 #ifdef MACRO_CC @@ -435,13 +435,14 @@ FFF } -// CHECK: #define FFFFF(aaa,bbb) void foo4(const int * __restrict__ aaa, const float * __restrict__ bbb, int *c, BBB, const sycl::nd_item<3> &item_ct1, float *sp_lj, float *sp_coul, int *ljd, double la[8][1]) +// CHECK: #define FFFFF(aaa,bbb) void foo4(const int * __restrict__ aaa, const float * __restrict__ bbb, int *c, BBB, float *sp_lj, float *sp_coul, int *ljd, double la[8][1]) #define FFFFF(aaa,bbb) __device__ void foo4(const int * __restrict__ aaa, const float * __restrict__ bbb, int *c, BBB) // CHECK: FFFFF(pos, q) // CHECK-NEXT: { // CHECK-EMPTY: -// CHECK-NEXT: const int tid = item_ct1.get_local_id(2); +// CHECK-NEXT: const int tid = +// CHECK-NEXT: sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_local_id(2); // CHECK-NEXT: } FFFFF(pos, q) { @@ -452,13 +453,14 @@ FFFFF(pos, q) const int tid = threadIdx.x; } -// CHECK: #define FFFFFF(aaa,bbb) void foo5(const int * __restrict__ aaa, const float * __restrict__ bbb, const sycl::nd_item<3> &item_ct1, float *sp_lj, float *sp_coul, int *ljd, double la[8][1]) +// CHECK: #define FFFFFF(aaa,bbb) void foo5(const int * __restrict__ aaa, const float * __restrict__ bbb, float *sp_lj, float *sp_coul, int *ljd, double la[8][1]) #define FFFFFF(aaa,bbb) __device__ void foo5(const int * __restrict__ aaa, const float * __restrict__ bbb) // CHECK: FFFFFF(pos, q) // CHECK-NEXT: { // CHECK-EMPTY: -// CHECK-NEXT: const int tid = item_ct1.get_local_id(2); +// CHECK-NEXT: const int tid = +// CHECK-NEXT: sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_local_id(2); // CHECK-NEXT: } FFFFFF(pos, q) { @@ -483,9 +485,13 @@ __device__ void foo6(AAA, BBB) //CHECK: #define MM __umul24 //CHECK-NEXT: #define MUL(a, b) sycl::mul24((unsigned int)a, (unsigned int)b) -//CHECK-NEXT: void foo7(const sycl::nd_item<3> &item_ct1) { -//CHECK-NEXT: unsigned int tid = MUL(item_ct1.get_local_range(2), item_ct1.get_group(2)) + -//CHECK-NEXT: item_ct1.get_local_range(2); +//CHECK-NEXT: void foo7() { +//CHECK-NEXT: auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>(); +//CHECK-NEXT: unsigned int tid = +//CHECK-NEXT: MUL(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_local_range( +//CHECK-NEXT: 2), +//CHECK-NEXT: sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(2)) + +//CHECK-NEXT: item_ct1.get_local_range(2); //CHECK-NEXT: unsigned int tid2 = sycl::mul24((unsigned int)item_ct1.get_local_range(2), //CHECK-NEXT: (unsigned int)item_ct1.get_group_range(2)); //CHECK-NEXT: } @@ -573,7 +579,7 @@ void templatefoo2(){ CALL_KERNEL2(8, AAA) } -//CHECK: void foo11(const sycl::nd_item<3> &item_ct1){ +//CHECK: void foo11(){ //CHECK-NEXT: sycl::exp((double)(THREAD_IDX_X)); //CHECK-NEXT: } __global__ void foo11(){ @@ -915,13 +921,14 @@ void foo20() { } //CHECK: #define CALLSHFLSYNC(x) \ -//CHECK-NEXT: dpct::select_from_sub_group(item_ct1.get_sub_group(), x, 3 ^ 1); +//CHECK-NEXT: dpct::select_from_sub_group( \ +//CHECK-NEXT: sycl::ext::oneapi::this_work_item::get_sub_group(), x, 3 ^ 1); #define CALLSHFLSYNC(x) __shfl_sync(0xffffffff, x, 3 ^ 1); //CHECK: #define CALLANYSYNC(x) \ //CHECK-NEXT: sycl::any_of_group( \ -//CHECK-NEXT: item_ct1.get_sub_group(), \ -//CHECK-NEXT: (0xffffffff & \ -//CHECK-NEXT: (0x1 << item_ct1.get_sub_group().get_local_linear_id())) && \ +//CHECK-NEXT: sycl::ext::oneapi::this_work_item::get_sub_group(), \ +//CHECK-NEXT: (0xffffffff & (0x1 << sycl::ext::oneapi::this_work_item::get_sub_group() \ +//CHECK-NEXT: .get_local_linear_id())) && \ //CHECK-NEXT: x != 0.0f); #define CALLANYSYNC(x) __any_sync(0xffffffff, x != 0.0f); @@ -964,7 +971,8 @@ foo23(void) } //CHECK: #define SHFL(x, y, z) \ -//CHECK-NEXT: dpct::select_from_sub_group(item_ct1.get_sub_group(), (x), (y), (z)) +//CHECK-NEXT: dpct::select_from_sub_group( \ +//CHECK-NEXT: sycl::ext::oneapi::this_work_item::get_sub_group(), (x), (y), (z)) #define SHFL(x, y, z) __shfl((x), (y), (z)) __global__ void foo24(){ int i; diff --git a/clang/test/dpct/macro_test.h b/clang/test/dpct/macro_test.h index 320a10f95b87..6663c1b300f4 100644 --- a/clang/test/dpct/macro_test.h +++ b/clang/test/dpct/macro_test.h @@ -1,4 +1,5 @@ -//CHECK: #define THREAD_IDX_X item_ct1.get_local_id(2) +//CHECK: #define THREAD_IDX_X \ +//CHECK-NEXT: sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_local_id(2) #define THREAD_IDX_X threadIdx.x #define STRINGIFY_(...) #__VA_ARGS__ diff --git a/clang/test/dpct/template-kernel-call.cu b/clang/test/dpct/template-kernel-call.cu index bd47db0399b1..60a2602d0c1c 100644 --- a/clang/test/dpct/template-kernel-call.cu +++ b/clang/test/dpct/template-kernel-call.cu @@ -1,6 +1,6 @@ // FIXME // UNSUPPORTED: system-windows -// RUN: dpct --no-dpcpp-extensions=free-function-queries --format-range=none --usm-level=none -out-root %T/template-kernel-call %s --cuda-include-path="%cuda-path/include" --sycl-named-lambda -- -x cuda --cuda-host-only -std=c++11 +// RUN: dpct --format-range=none --usm-level=none -out-root %T/template-kernel-call %s --cuda-include-path="%cuda-path/include" --sycl-named-lambda -- -x cuda --cuda-host-only -std=c++11 // RUN: FileCheck --input-file %T/template-kernel-call/template-kernel-call.dp.cpp --match-full-lines %s #include @@ -50,17 +50,18 @@ public: template void runTest(); template -// CHECK: void testKernelPtr(const TData *L, const TData *M, -// CHECK-NEXT: const sycl::nd_item<3> &[[ITEMNAME:item_ct1]]) { +// CHECK: void testKernelPtr(const TData *L, const TData *M) { __global__ void testKernelPtr(const TData *L, const TData *M) { - // CHECK: int gtid = [[ITEMNAME]].get_group(2) * [[ITEMNAME]].get_local_range(2) + [[ITEMNAME]].get_local_id(2); + // CHECK: auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>(); + // CHECK-NEXT: int gtid = item_ct1.get_group(2) * item_ct1.get_local_range(2) + item_ct1.get_local_id(2); int gtid = blockIdx.x * blockDim.x + threadIdx.x; } template -// CHECK: void testKernel(TData L, TData M, int N, const sycl::nd_item<3> &[[ITEMNAME:item_ct1]]) { +// CHECK: void testKernel(TData L, TData M, int N) { __global__ void testKernel(TData L, TData M, int N) { - // CHECK: int gtid = [[ITEMNAME]].get_group(2) * [[ITEMNAME]].get_local_range(2) + [[ITEMNAME]].get_local_id(2); + // CHECK: auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>(); + // CHECK-NEXT: int gtid = item_ct1.get_group(2) * item_ct1.get_local_range(2) + item_ct1.get_local_id(2); int gtid = blockIdx.x * blockDim.x + threadIdx.x; L = M; } @@ -104,7 +105,7 @@ void runTest() { // CHECK-NEXT: cgh.parallel_for, T>>( // CHECK-NEXT: sycl::nd_range<3>(griddim * threaddim, threaddim), // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { - // CHECK-NEXT: testKernelPtr(karg1_acc_ct0.get_raw_pointer(), karg2_acc_ct1.get_raw_pointer(), item_ct1); + // CHECK-NEXT: testKernelPtr(karg1_acc_ct0.get_raw_pointer(), karg2_acc_ct1.get_raw_pointer()); // CHECK-NEXT: }); // CHECK-NEXT: }); testKernelPtr<<>>((const T *)karg1, karg2); @@ -120,7 +121,7 @@ void runTest() { // CHECK-NEXT: cgh.parallel_for, dpct_kernel_scalar, T>>( // CHECK-NEXT: sycl::nd_range<3>(griddim * threaddim, threaddim), // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { - // CHECK-NEXT: testKernelPtr, ktarg, T>(karg1_acc_ct0.get_raw_pointer(), karg3_acc_ct1.get_raw_pointer(), item_ct1); + // CHECK-NEXT: testKernelPtr, ktarg, T>(karg1_acc_ct0.get_raw_pointer(), karg3_acc_ct1.get_raw_pointer()); // CHECK-NEXT: }); // CHECK-NEXT: }); testKernelPtr, ktarg, T><<>>((const T *)karg1, karg3); @@ -136,7 +137,7 @@ void runTest() { // CHECK-NEXT: cgh.parallel_for, TestTemplate>>( // CHECK-NEXT: sycl::nd_range<3>(griddim * threaddim, threaddim), // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { - // CHECK-NEXT: testKernelPtr>(karg4_acc_ct0.get_raw_pointer(), karg5_acc_ct1.get_raw_pointer(), item_ct1); + // CHECK-NEXT: testKernelPtr>(karg4_acc_ct0.get_raw_pointer(), karg5_acc_ct1.get_raw_pointer()); // CHECK-NEXT: }); // CHECK-NEXT: }); testKernelPtr ><<>>(karg4, karg5); @@ -152,7 +153,7 @@ void runTest() { // CHECK-NEXT: cgh.parallel_for>( // CHECK-NEXT: sycl::nd_range<3>(griddim * threaddim, threaddim), // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { - // CHECK-NEXT: testKernel(karg1T, karg2T, ktarg_ct2, item_ct1); + // CHECK-NEXT: testKernel(karg1T, karg2T, ktarg_ct2); // CHECK-NEXT: }); // CHECK-NEXT: }); testKernel<<>>(karg1T, karg2T, ktarg); @@ -173,7 +174,7 @@ void runTest() { // CHECK-NEXT: cgh.parallel_for>>( // CHECK-NEXT: sycl::nd_range<3>(griddim * threaddim, threaddim), // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { - // CHECK-NEXT: testKernel>(karg3TT, karg4TT, ktarg_ct2, item_ct1); + // CHECK-NEXT: testKernel>(karg3TT, karg4TT, ktarg_ct2); // CHECK-NEXT: }); // CHECK-NEXT: }); testKernel ><<>>(karg3TT, karg4TT, ktarg); @@ -191,7 +192,7 @@ void runTest() { // CHECK-NEXT: cgh.parallel_for>( // CHECK-NEXT: sycl::nd_range<3>(griddim * threaddim, threaddim), // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { - // CHECK-NEXT: testKernel(karg3TT, karg4TT, ktarg_ct2, item_ct1); + // CHECK-NEXT: testKernel(karg3TT, karg4TT, ktarg_ct2); // CHECK-NEXT: }); // CHECK-NEXT: }); testKernel<<>>(karg3TT, karg4TT, ktarg); @@ -216,7 +217,7 @@ int main() { // CHECK-NEXT: cgh.parallel_for, LA>>( // CHECK-NEXT: sycl::nd_range<3>(griddim * threaddim, threaddim), // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { - // CHECK-NEXT: testKernelPtr(karg1_acc_ct0.get_raw_pointer(), karg2_acc_ct1.get_raw_pointer(), item_ct1); + // CHECK-NEXT: testKernelPtr(karg1_acc_ct0.get_raw_pointer(), karg2_acc_ct1.get_raw_pointer()); // CHECK-NEXT: }); // CHECK-NEXT: }); testKernelPtr<<>>((const LA *)karg1, karg2); @@ -233,7 +234,7 @@ int main() { // CHECK-NEXT: cgh.parallel_for>( // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 10) * sycl::range<3>(1, 1, intvar), sycl::range<3>(1, 1, intvar)), // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { - // CHECK-NEXT: testKernel(karg1LA, karg2LA, ktarg_ct2, item_ct1); + // CHECK-NEXT: testKernel(karg1LA, karg2LA, ktarg_ct2); // CHECK-NEXT: }); // CHECK-NEXT: }); testKernel<<<10, intvar>>>(karg1LA, karg2LA, ktarg); @@ -241,8 +242,8 @@ int main() { // CHECK:template -// CHECK-NEXT:void convert_kernel(T b, const sycl::nd_item<3> &item_ct1, int *aaa, -// CHECK-NEXT: double bbb[8][0]){ +// CHECK-NEXT:void convert_kernel(T b, int *aaa, double bbb[8][0]){ +// CHECK-NEXT: auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>(); // CHECK: T a = item_ct1.get_local_range(2) * item_ct1.get_group(2) + item_ct1.get_local_id(2); // CHECK-NEXT:} template @@ -266,7 +267,7 @@ __global__ void convert_kernel(T b){ // CHECK-NEXT: cgh.parallel_for>( // CHECK-NEXT: sycl::nd_range<3>(sycl::range<3>(1, 1, 128) * sycl::range<3>(1, 1, 128), sycl::range<3>(1, 1, 128)), // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) { -// CHECK-NEXT: convert_kernel(b, item_ct1, aaa_acc_ct1.get_multi_ptr().get(), bbb_acc_ct1); +// CHECK-NEXT: convert_kernel(b, aaa_acc_ct1.get_multi_ptr().get(), bbb_acc_ct1); // CHECK-NEXT: }); // CHECK-NEXT: }); // CHECK-NEXT: } @@ -457,7 +458,8 @@ template struct spmv_driver : public ::spmv_driver { class IndexType {}; -// CHECK: void thread_id(const sycl::nd_item<3> &item_ct1) { +// CHECK: void thread_id() { +// CHECK-NEXT: auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>(); // CHECK-NEXT: auto tidx = item_ct1.get_local_id(2); // CHECK-NEXT: auto tidx_int = static_cast(item_ct1.get_local_id(2)); // CHECK-NEXT: } @@ -466,7 +468,8 @@ __device__ void thread_id() { auto tidx_int = static_cast(threadIdx.x); } -// CHECK: template void thread_id(const sycl::nd_item<3> &item_ct1) { +// CHECK: template void thread_id() { +// CHECK-NEXT: auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>(); // CHECK-NEXT: auto tidx = item_ct1.get_local_id(2); // CHECK-NEXT: auto tidx_template = static_cast(item_ct1.get_local_id(2)); // CHECK-NEXT: auto tidx_int = static_cast(item_ct1.get_local_id(2)); @@ -477,7 +480,8 @@ template __device__ void thread_id() { auto tidx_int = static_cast(threadIdx.x); } -// CHECK: template void kernel(const sycl::nd_item<3> &item_ct1) { +// CHECK: template void kernel() { +// CHECK-NEXT: auto item_ct1 = sycl::ext::oneapi::this_work_item::get_nd_item<3>(); // CHECK-NEXT: auto tidx = item_ct1.get_local_id(2); // CHECK-NEXT: auto tidx_template = static_cast(item_ct1.get_local_id(2)); // CHECK-NEXT: auto tidx_int = static_cast(item_ct1.get_local_id(2)); @@ -530,8 +534,8 @@ void foo() { template class foo_class1{ public: -// CHECK: void foo(const sycl::nd_item<3> &item_ct1) { -// CHECK-NEXT: int a = item_ct1.get_local_id(2); +// CHECK: void foo() { +// CHECK-NEXT: int a = sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_local_id(2); __device__ void foo() { int a = threadIdx.x; } @@ -539,8 +543,8 @@ public: template class foo_class1{ public: -// CHECK: void foo(const sycl::nd_item<3> &item_ct1) { -// CHECK-NEXT: int a = item_ct1.get_local_id(2); +// CHECK: void foo() { +// CHECK-NEXT: int a = sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_local_id(2); __device__ void foo() { int a = threadIdx.x; } @@ -579,14 +583,13 @@ public: }; template __global__ void kernel2() { + // CHECK: A a; + // CHECK-NEXT: a.f1(); A a; -//CHECK: /* -//CHECK: DPCT1084:{{[0-9]+}}: The function call "A::f1" has multiple migration results in different template instantiations that could not be unified. You may need to adjust the code. -//CHECK: */ a.f1(); } -int main() { +int foo3() { kernel2<<<1, 1>>>(); kernel2<<<1, 1>>>(); return 0; diff --git a/clang/test/dpct/template_src/template_explicit/test.hpp b/clang/test/dpct/template_src/template_explicit/test.hpp index caead92bfe06..44555147e848 100644 --- a/clang/test/dpct/template_src/template_explicit/test.hpp +++ b/clang/test/dpct/template_src/template_explicit/test.hpp @@ -4,8 +4,8 @@ template __global__ void foo(int parm) #ifdef TEST ; -// CHECK: extern template void foo(int, const sycl::nd_item<3> &item_ct1); -// CHECK: extern template void foo(int, const sycl::nd_item<3> &item_ct1); +// CHECK: extern template void foo(int); +// CHECK: extern template void foo(int); extern template __global__ void foo(int); extern template __global__ void foo(int); #else diff --git a/clang/test/dpct/template_src/template_explicit/test2.cu b/clang/test/dpct/template_src/template_explicit/test2.cu index de646d996a65..5baba35ad155 100644 --- a/clang/test/dpct/template_src/template_explicit/test2.cu +++ b/clang/test/dpct/template_src/template_explicit/test2.cu @@ -1,4 +1,4 @@ -// RUN: dpct --no-dpcpp-extensions=free-function-queries --format-range=none -out-root %T/out %s %S/test.cu --cuda-include-path="%cuda-path/include" +// RUN: dpct --format-range=none -out-root %T/out %s %S/test.cu --cuda-include-path="%cuda-path/include" // RUN: FileCheck %S/test.hpp --match-full-lines --input-file %T/out/test.hpp // RUN: %if build_lit %{icpx -c -fsycl %T/out/test.dp.cpp -o %T/out/test.dp.o %} // RUN: %if build_lit %{icpx -c -fsycl %T/out/test2.dp.cpp -o %T/out/test2.dp.o %}