diff --git a/clang/examples/DPCT/Runtime/nvcuda$$wmma$$fill_fragment.cu b/clang/examples/DPCT/Runtime/nvcuda$$wmma$$fill_fragment.cu new file mode 100644 index 000000000000..f2b732db91e7 --- /dev/null +++ b/clang/examples/DPCT/Runtime/nvcuda$$wmma$$fill_fragment.cu @@ -0,0 +1,9 @@ +// Option: --use-experimental-features=matrix +#include + +template __global__ void test(T val) { + // Start + nvcuda::wmma::fragment acc_frag; + nvcuda::wmma::fill_fragment(acc_frag, val /*const T&*/); + // End +} diff --git a/clang/examples/DPCT/Runtime/nvcuda$$wmma$$load_matrix_sync.cu b/clang/examples/DPCT/Runtime/nvcuda$$wmma$$load_matrix_sync.cu new file mode 100644 index 000000000000..a93d2bf1cbe7 --- /dev/null +++ b/clang/examples/DPCT/Runtime/nvcuda$$wmma$$load_matrix_sync.cu @@ -0,0 +1,13 @@ +// Option: --use-experimental-features=matrix +#include + +template +__global__ void test(const T *a, int row, int col, unsigned lda) { + // Start + nvcuda::wmma::fragment + a_frag; + nvcuda::wmma::load_matrix_sync(a_frag, a + col + row * lda /*const T **/, + lda /*unsigned*/); + // End +} diff --git a/clang/examples/DPCT/Runtime/nvcuda$$wmma$$mma_sync.cu b/clang/examples/DPCT/Runtime/nvcuda$$wmma$$mma_sync.cu new file mode 100644 index 000000000000..bf651e4d63b0 --- /dev/null +++ b/clang/examples/DPCT/Runtime/nvcuda$$wmma$$mma_sync.cu @@ -0,0 +1,15 @@ +// Option: --use-experimental-features=matrix +#include + +__global__ void test() { + // Start + nvcuda::wmma::fragment + a_frag; + nvcuda::wmma::fragment + b_frag; + nvcuda::wmma::fragment acc_frag; + nvcuda::wmma::mma_sync(acc_frag, a_frag, b_frag, acc_frag); + // End +} diff --git a/clang/examples/DPCT/Runtime/nvcuda$$wmma$$store_matrix_sync.cu b/clang/examples/DPCT/Runtime/nvcuda$$wmma$$store_matrix_sync.cu new file mode 100644 index 000000000000..06fff11ead9c --- /dev/null +++ b/clang/examples/DPCT/Runtime/nvcuda$$wmma$$store_matrix_sync.cu @@ -0,0 +1,15 @@ +// Option: --use-experimental-features=matrix +#include + +template +__global__ void test(const T *c, int row, int col, unsigned ldc) { + // Start + nvcuda::wmma::fragment acc_frag; + nvcuda::wmma::store_matrix_sync( + c + col + row * ldc /*const T **/, acc_frag, ldc /*unsigned*/, + nvcuda::wmma::mem_col_major /*nvcuda::wmma::layout_t*/); + nvcuda::wmma::store_matrix_sync( + c + row + col * ldc /*const T **/, acc_frag, ldc /*unsigned*/, + nvcuda::wmma::mem_row_major /*nvcuda::wmma::layout_t*/); + // End +} diff --git a/clang/lib/DPCT/DPCT.cpp b/clang/lib/DPCT/DPCT.cpp index efad8a5917d6..153c0d4a31d9 100644 --- a/clang/lib/DPCT/DPCT.cpp +++ b/clang/lib/DPCT/DPCT.cpp @@ -1063,6 +1063,8 @@ int runDPCT(int argc, const char **argv) { Experimentals.addValue(ExperimentalFeatures::Exp_LevelZero); else if (Option.ends_with("non-uniform-groups")) Experimentals.addValue(ExperimentalFeatures::Exp_NonUniformGroups); + else if (Option.ends_with("matrix")) + Experimentals.addValue(ExperimentalFeatures::Exp_Matrix); } else if (Option == "--no-dry-pattern") { NoDRYPattern.setValue(true); } else if (Option == "--enable-profiling") { diff --git a/clang/test/dpct/query_api_mapping/Runtime/test_wmma.cu b/clang/test/dpct/query_api_mapping/Runtime/test_wmma.cu new file mode 100644 index 000000000000..91e988ef8e29 --- /dev/null +++ b/clang/test/dpct/query_api_mapping/Runtime/test_wmma.cu @@ -0,0 +1,54 @@ +// UNSUPPORTED: cuda-8.0, cuda-9.0, cuda-9.1, cuda-9.2, cuda-10.0 +// UNSUPPORTED: v8.0, v9.0, v9.1, v9.2, v10.0 + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=nvcuda::wmma::fill_fragment | FileCheck %s -check-prefix=NVCUDA_WMMA_FILL_FRAGMENT +// NVCUDA_WMMA_FILL_FRAGMENT: CUDA API: +// NVCUDA_WMMA_FILL_FRAGMENT-NEXT: nvcuda::wmma::fragment acc_frag; +// NVCUDA_WMMA_FILL_FRAGMENT-NEXT: nvcuda::wmma::fill_fragment(acc_frag, val /*const T&*/); +// NVCUDA_WMMA_FILL_FRAGMENT-NEXT: Is migrated to (with the option --use-experimental-features=matrix): +// NVCUDA_WMMA_FILL_FRAGMENT-NEXT: dpct::experimental::matrix::joint_matrix acc_frag; +// NVCUDA_WMMA_FILL_FRAGMENT-NEXT: sycl::ext::oneapi::experimental::matrix::joint_matrix_fill(sycl::ext::oneapi::this_work_item::get_sub_group(), acc_frag.get(), val); + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=nvcuda::wmma::load_matrix_sync | FileCheck %s -check-prefix=NVCUDA_WMMA_LOAD_MATRIX_SYNC +// NVCUDA_WMMA_LOAD_MATRIX_SYNC: CUDA API: +// NVCUDA_WMMA_LOAD_MATRIX_SYNC-NEXT: nvcuda::wmma::fragment +// NVCUDA_WMMA_LOAD_MATRIX_SYNC-NEXT: a_frag; +// NVCUDA_WMMA_LOAD_MATRIX_SYNC-NEXT: nvcuda::wmma::load_matrix_sync(a_frag, a + col + row * lda /*const T **/, +// NVCUDA_WMMA_LOAD_MATRIX_SYNC-NEXT: lda /*unsigned*/); +// NVCUDA_WMMA_LOAD_MATRIX_SYNC-NEXT: Is migrated to (with the option --use-experimental-features=matrix): +// NVCUDA_WMMA_LOAD_MATRIX_SYNC-NEXT: dpct::experimental::matrix::joint_matrix +// NVCUDA_WMMA_LOAD_MATRIX_SYNC-NEXT: a_frag; +// NVCUDA_WMMA_LOAD_MATRIX_SYNC-NEXT: sycl::ext::oneapi::experimental::matrix::joint_matrix_load(sycl::ext::oneapi::this_work_item::get_sub_group(), a_frag.get(), sycl::address_space_cast::type>(a + col + row * lda), lda); + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=nvcuda::wmma::store_matrix_sync | FileCheck %s -check-prefix=NVCUDA_WMMA_STORE_MATRIX_SYNC +// NVCUDA_WMMA_STORE_MATRIX_SYNC: CUDA API: +// NVCUDA_WMMA_STORE_MATRIX_SYNC-NEXT: nvcuda::wmma::fragment acc_frag; +// NVCUDA_WMMA_STORE_MATRIX_SYNC-NEXT: nvcuda::wmma::store_matrix_sync( +// NVCUDA_WMMA_STORE_MATRIX_SYNC-NEXT: c + col + row * ldc /*const T **/, acc_frag, ldc /*unsigned*/, +// NVCUDA_WMMA_STORE_MATRIX_SYNC-NEXT: nvcuda::wmma::mem_col_major /*nvcuda::wmma::layout_t*/); +// NVCUDA_WMMA_STORE_MATRIX_SYNC-NEXT: nvcuda::wmma::store_matrix_sync( +// NVCUDA_WMMA_STORE_MATRIX_SYNC-NEXT: c + row + col * ldc /*const T **/, acc_frag, ldc /*unsigned*/, +// NVCUDA_WMMA_STORE_MATRIX_SYNC-NEXT: nvcuda::wmma::mem_row_major /*nvcuda::wmma::layout_t*/); +// NVCUDA_WMMA_STORE_MATRIX_SYNC-NEXT: Is migrated to (with the option --use-experimental-features=matrix): +// NVCUDA_WMMA_STORE_MATRIX_SYNC-NEXT: dpct::experimental::matrix::joint_matrix acc_frag; +// NVCUDA_WMMA_STORE_MATRIX_SYNC-NEXT: sycl::ext::oneapi::experimental::matrix::joint_matrix_store(sycl::ext::oneapi::this_work_item::get_sub_group(), acc_frag.get(), sycl::address_space_cast::type>(c + col + row * ldc), ldc, sycl::ext::oneapi::experimental::matrix::layout::col_major); +// NVCUDA_WMMA_STORE_MATRIX_SYNC-NEXT: sycl::ext::oneapi::experimental::matrix::joint_matrix_store(sycl::ext::oneapi::this_work_item::get_sub_group(), acc_frag.get(), sycl::address_space_cast::type>(c + row + col * ldc), ldc, sycl::ext::oneapi::experimental::matrix::layout::row_major); + +// RUN: dpct --cuda-include-path="%cuda-path/include" --query-api-mapping=nvcuda::wmma::mma_sync | FileCheck %s -check-prefix=NVCUDA_WMMA_MMA_SYNC +// NVCUDA_WMMA_MMA_SYNC: CUDA API: +// NVCUDA_WMMA_MMA_SYNC-NEXT: nvcuda::wmma::fragment +// NVCUDA_WMMA_MMA_SYNC-NEXT: a_frag; +// NVCUDA_WMMA_MMA_SYNC-NEXT: nvcuda::wmma::fragment +// NVCUDA_WMMA_MMA_SYNC-NEXT: b_frag; +// NVCUDA_WMMA_MMA_SYNC-NEXT: nvcuda::wmma::fragment acc_frag; +// NVCUDA_WMMA_MMA_SYNC-NEXT: nvcuda::wmma::mma_sync(acc_frag, a_frag, b_frag, acc_frag); +// NVCUDA_WMMA_MMA_SYNC-NEXT: Is migrated to (with the option --use-experimental-features=matrix): +// NVCUDA_WMMA_MMA_SYNC-NEXT: dpct::experimental::matrix::joint_matrix +// NVCUDA_WMMA_MMA_SYNC-NEXT: a_frag; +// NVCUDA_WMMA_MMA_SYNC-NEXT: dpct::experimental::matrix::joint_matrix +// NVCUDA_WMMA_MMA_SYNC-NEXT: b_frag; +// NVCUDA_WMMA_MMA_SYNC-NEXT: dpct::experimental::matrix::joint_matrix acc_frag; +// NVCUDA_WMMA_MMA_SYNC-NEXT: sycl::ext::oneapi::experimental::matrix::joint_matrix_mad(sycl::ext::oneapi::this_work_item::get_sub_group(), acc_frag.get(), a_frag.get(), b_frag.get(), acc_frag.get()); diff --git a/clang/test/dpct/query_api_mapping/test_all.cu b/clang/test/dpct/query_api_mapping/test_all.cu index d1af2a07f629..e52eff4a8f5e 100644 --- a/clang/test/dpct/query_api_mapping/test_all.cu +++ b/clang/test/dpct/query_api_mapping/test_all.cu @@ -2339,6 +2339,10 @@ // CHECK-NEXT: normcdfinv // CHECK-NEXT: normcdfinvf // CHECK-NEXT: normf +// CHECK-NEXT: nvcuda::wmma::fill_fragment +// CHECK-NEXT: nvcuda::wmma::load_matrix_sync +// CHECK-NEXT: nvcuda::wmma::mma_sync +// CHECK-NEXT: nvcuda::wmma::store_matrix_sync // CHECK-NEXT: nvshmem_align // CHECK-NEXT: nvshmem_calloc // CHECK-NEXT: nvshmem_finalize