diff --git a/clang/include/clang/DPCT/DPCTOptions.inc b/clang/include/clang/DPCT/DPCTOptions.inc index 62841bf91dd1..f8c4060c658e 100644 --- a/clang/include/clang/DPCT/DPCTOptions.inc +++ b/clang/include/clang/DPCT/DPCTOptions.inc @@ -826,6 +826,11 @@ DPCT_ENUM_OPTION( "prefetch", int(ExperimentalFeatures::Exp_Prefetch), "Experimental extension that allows use of SYCL prefetch APIs.\n", false), + DPCT_OPTION_ENUM_VALUE( + "level_zero", int(ExperimentalFeatures::Exp_LevelZero), + "Experimental migration feature that enables the use of Level Zero " + "APIs to migrate target code, like CUDA Inter-Process " + "Communication (IPC) APIs.\n", false), DPCT_OPTION_ENUM_VALUE( "all", int(ExperimentalFeatures::Exp_All), "Enable all experimental extensions listed in this option.\n", diff --git a/clang/lib/DPCT/AnalysisInfo.cpp b/clang/lib/DPCT/AnalysisInfo.cpp index 67fe389b928e..47542f40ee50 100644 --- a/clang/lib/DPCT/AnalysisInfo.cpp +++ b/clang/lib/DPCT/AnalysisInfo.cpp @@ -905,6 +905,9 @@ void DpctFileInfo::insertHeader(HeaderType Type, unsigned Offset, // Add the label for profiling macro "DPCT_PROFILING_ENABLED", which will be // replaced by "#define DPCT_PROFILING_ENABLED" or not in the post // replacement. + if (DpctGlobalInfo::useExtLevelZero()) + OS << "#define ONEAPI_BACKEND_LEVEL_ZERO_EXT" << getNL(); + OS << "{{NEEDREPLACEP0}}"; if (DpctGlobalInfo::getUsmLevel() == UsmLevel::UL_None) diff --git a/clang/lib/DPCT/AnalysisInfo.h b/clang/lib/DPCT/AnalysisInfo.h index 021430a1e8d1..12a87c2857c6 100644 --- a/clang/lib/DPCT/AnalysisInfo.h +++ b/clang/lib/DPCT/AnalysisInfo.h @@ -1347,6 +1347,9 @@ class DpctGlobalInfo { return getUsingExperimental< ExperimentalFeatures::Exp_NonStandardSYCLBuiltins>(); } + static bool useExtLevelZero() { + return getUsingExperimental(); + } static bool useExtPrefetch() { return getUsingExperimental(); } diff --git a/clang/lib/DPCT/CMakeLists.txt b/clang/lib/DPCT/CMakeLists.txt index ad53ffc063b6..474e38b70aa2 100644 --- a/clang/lib/DPCT/CMakeLists.txt +++ b/clang/lib/DPCT/CMakeLists.txt @@ -21,6 +21,7 @@ set(RUNTIME_HEADERS ${CMAKE_SOURCE_DIR}/../clang/runtime/dpct-rt/include/dpct/lib_common_utils.hpp ${CMAKE_SOURCE_DIR}/../clang/runtime/dpct-rt/include/dpct/ccl_utils.hpp ${CMAKE_SOURCE_DIR}/../clang/runtime/dpct-rt/include/dpct/sparse_utils.hpp + ${CMAKE_SOURCE_DIR}/../clang/runtime/dpct-rt/include/dpct/ze_utils.hpp ${CMAKE_SOURCE_DIR}/../clang/runtime/dpct-rt/include/dpct/fft_utils.hpp ${CMAKE_SOURCE_DIR}/../clang/runtime/dpct-rt/include/dpct/lapack_utils.hpp ${CMAKE_SOURCE_DIR}/../clang/runtime/dpct-rt/include/dpct/group_utils.hpp @@ -71,6 +72,7 @@ set(PROCESS_FILES_OUTPUT ${CMAKE_BINARY_DIR}/tools/clang/include/clang/DPCT/rng_utils.hpp.inc ${CMAKE_BINARY_DIR}/tools/clang/include/clang/DPCT/lib_common_utils.hpp.inc ${CMAKE_BINARY_DIR}/tools/clang/include/clang/DPCT/ccl_utils.hpp.inc + ${CMAKE_BINARY_DIR}/tools/clang/include/clang/DPCT/ze_utils.hpp.inc ${CMAKE_BINARY_DIR}/tools/clang/include/clang/DPCT/sparse_utils.hpp.inc ${CMAKE_BINARY_DIR}/tools/clang/include/clang/DPCT/fft_utils.hpp.inc ${CMAKE_BINARY_DIR}/tools/clang/include/clang/DPCT/lapack_utils.hpp.inc diff --git a/clang/lib/DPCT/CommandOption/ValidateArguments.h b/clang/lib/DPCT/CommandOption/ValidateArguments.h index c7b660996e71..fdc507c8451f 100644 --- a/clang/lib/DPCT/CommandOption/ValidateArguments.h +++ b/clang/lib/DPCT/CommandOption/ValidateArguments.h @@ -101,6 +101,7 @@ enum class ExperimentalFeatures : unsigned int { Exp_ExperimentalFeaturesEnumSize, Exp_NonStandardSYCLBuiltins, Exp_Prefetch, + Exp_LevelZero, Exp_All }; enum class HelperFuncPreference : unsigned int { NoQueueDevice = 0 }; diff --git a/clang/lib/DPCT/DPCT.cpp b/clang/lib/DPCT/DPCT.cpp index 24deabc028c4..220ddd50559e 100644 --- a/clang/lib/DPCT/DPCT.cpp +++ b/clang/lib/DPCT/DPCT.cpp @@ -1060,6 +1060,8 @@ int runDPCT(int argc, const char **argv) { Experimentals.addValue(ExperimentalFeatures::Exp_Graph); else if (Option.ends_with("prefetch")) Experimentals.addValue(ExperimentalFeatures::Exp_Prefetch); + else if (Option.ends_with("level_zero")) + Experimentals.addValue(ExperimentalFeatures::Exp_LevelZero); } else if (Option == "--no-dry-pattern") { NoDRYPattern.setValue(true); } diff --git a/clang/lib/DPCT/FileGenerator/GenHelperFunction.cpp b/clang/lib/DPCT/FileGenerator/GenHelperFunction.cpp index 4a1e3a8351a1..7d532686aa50 100644 --- a/clang/lib/DPCT/FileGenerator/GenHelperFunction.cpp +++ b/clang/lib/DPCT/FileGenerator/GenHelperFunction.cpp @@ -150,6 +150,10 @@ const std::string CodePinSerializationBasicAllContentStr = #include "clang/DPCT/codepin/serialization/basic.hpp.inc" ; +const std::string ZEUtilsAllContentStr = +#include "clang/DPCT/ze_utils.hpp.inc" + ; + const std::string CmakeAllContentStr = #include "clang/DPCT/dpct.cmake.inc" ; @@ -206,6 +210,7 @@ void genHelperFunction(const clang::tooling::UnifiedPath &OutRoot) { GENERATE_ALL_FILE_CONTENT(Util, ".", util.hpp) GENERATE_ALL_FILE_CONTENT(RngUtils, ".", rng_utils.hpp) GENERATE_ALL_FILE_CONTENT(LibCommonUtils, ".", lib_common_utils.hpp) + GENERATE_ALL_FILE_CONTENT(ZEUtils, ".", ze_utils.hpp) GENERATE_ALL_FILE_CONTENT(CclUtils, ".", ccl_utils.hpp) GENERATE_ALL_FILE_CONTENT(SparseUtils, ".", sparse_utils.hpp) GENERATE_ALL_FILE_CONTENT(FftUtils, ".", fft_utils.hpp) diff --git a/clang/lib/DPCT/FileGenerator/GenHelperFunction.h b/clang/lib/DPCT/FileGenerator/GenHelperFunction.h index 02843b349743..7c20b2faa311 100644 --- a/clang/lib/DPCT/FileGenerator/GenHelperFunction.h +++ b/clang/lib/DPCT/FileGenerator/GenHelperFunction.h @@ -45,6 +45,7 @@ extern const std::string MathDetailAllContentStr; extern const std::string MemoryDetailAllContentStr; extern const std::string CodePinAllContentStr; extern const std::string CodePinSerializationBasicAllContentStr; +extern const std::string ZEUtilsAllContentStr; extern const std::string DpctCmakeHelperFileStr; void genHelperFunction(const clang::tooling::UnifiedPath &OutRoot); void genCmakeHelperFunction(const clang::tooling::UnifiedPath &OutRoot); diff --git a/clang/lib/DPCT/RuleInfra/CallExprRewriterCommon.h b/clang/lib/DPCT/RuleInfra/CallExprRewriterCommon.h index f8c7c177f6ce..5143e7f465da 100644 --- a/clang/lib/DPCT/RuleInfra/CallExprRewriterCommon.h +++ b/clang/lib/DPCT/RuleInfra/CallExprRewriterCommon.h @@ -1825,6 +1825,10 @@ inline auto UseExtBindlessImages = [](const CallExpr *C) -> bool { return DpctGlobalInfo::useExtBindlessImages(); }; +inline auto UseExtLevelZero = [](const CallExpr *C) -> bool { + return DpctGlobalInfo::useExtLevelZero(); +}; + inline auto UseExtGraph = [](const CallExpr *C) -> bool { return DpctGlobalInfo::useExtGraph(); }; diff --git a/clang/lib/DPCT/RuleInfra/MapNames.cpp b/clang/lib/DPCT/RuleInfra/MapNames.cpp index 051939b3e1d0..a49f5f12b5b1 100644 --- a/clang/lib/DPCT/RuleInfra/MapNames.cpp +++ b/clang/lib/DPCT/RuleInfra/MapNames.cpp @@ -363,6 +363,9 @@ void MapNames::setExplicitNamespaceMap( {"CUstreamCallback", std::make_shared(getDpctNamespace() + "queue_callback", HelperFeatureEnum::device_ext)}, + {"cudaIpcMemHandle_t", + std::make_shared(getDpctNamespace() + + "experimental::ipc_mem_handle_ext_t")}, {"char1", std::make_shared("int8_t")}, {"char2", std::make_shared(getClNamespace() + "char2")}, {"char3", std::make_shared(getClNamespace() + "char3")}, diff --git a/clang/lib/DPCT/RulesLang/APINamesMisc.inc b/clang/lib/DPCT/RulesLang/APINamesMisc.inc index 10f8e4ee41fc..3112a9aa57ef 100644 --- a/clang/lib/DPCT/RulesLang/APINamesMisc.inc +++ b/clang/lib/DPCT/RulesLang/APINamesMisc.inc @@ -139,3 +139,44 @@ REMOVE_API_FACTORY_ENTRY_WITH_MSG("cuFuncSetAttribute", REMOVE_API_FACTORY_ENTRY_WITH_MSG("cuGetExportTable", getRemovedAPIWarningMessage("cuGetExportTable")) + +ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( + HelperFeatureEnum::device_ext, + CONDITIONAL_FACTORY_ENTRY( + clang::dpct::UseExtLevelZero, + CALL_FACTORY_ENTRY("cudaIpcGetMemHandle", + CALL(MapNames::getDpctNamespace() + + "experimental::get_mem_ipc_handle", + ARG(1), ARG(0))), + UNSUPPORT_FACTORY_ENTRY( + "cudaIpcGetMemHandle", Diagnostics::TRY_EXPERIMENTAL_FEATURE, + ARG("cudaIpcGetMemHandle"), + ARG("--use-experimental-features=level_zero"))))) + +ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( + HelperFeatureEnum::device_ext, + CONDITIONAL_FACTORY_ENTRY( + clang::dpct::UseExtLevelZero, + CALL_FACTORY_ENTRY("cudaIpcCloseMemHandle", + CALL("zeMemCloseIpcHandle", + CALL("sycl::get_native", MEMBER_CALL(CALL(MapNames::getDpctNamespace() + + "get_current_device"), + false, "get_context")), + ARG(0))), + UNSUPPORT_FACTORY_ENTRY( + "cudaIpcCloseMemHandle", Diagnostics::TRY_EXPERIMENTAL_FEATURE, + ARG("cudaIpcCloseMemHandle"), + ARG("--use-experimental-features=level_zero"))))) + +ASSIGNABLE_FACTORY(FEATURE_REQUEST_FACTORY( + HelperFeatureEnum::device_ext, + CONDITIONAL_FACTORY_ENTRY( + clang::dpct::UseExtLevelZero, + CALL_FACTORY_ENTRY("cudaIpcOpenMemHandle", + CALL(MapNames::getDpctNamespace() + + "experimental::open_mem_ipc_handle", + ARG(1), ARG(0))), + UNSUPPORT_FACTORY_ENTRY("cudaIpcOpenMemHandle", + Diagnostics::TRY_EXPERIMENTAL_FEATURE, + ARG("cudaIpcOpenMemHandle"), + ARG("--use-experimental-features=level_zero"))))) diff --git a/clang/lib/DPCT/RulesLang/RulesLang.cpp b/clang/lib/DPCT/RulesLang/RulesLang.cpp index 012f4e0e2370..859bf795c812 100644 --- a/clang/lib/DPCT/RulesLang/RulesLang.cpp +++ b/clang/lib/DPCT/RulesLang/RulesLang.cpp @@ -348,7 +348,7 @@ void TypeInDeclRule::registerMatcher(MatchFinder &MF) { "cudaGraphicsRegisterFlags", "cudaExternalMemoryHandleType", "cudaExternalSemaphoreHandleType", "CUstreamCallback", "cudaHostFn_t", "__nv_half2", "__nv_half", "cudaGraphNodeType", - "CUsurfref", "CUdevice_P2PAttribute")))))) + "CUsurfref", "CUdevice_P2PAttribute", "cudaIpcMemHandle_t")))))) .bind("cudaTypeDef"), this); @@ -928,6 +928,13 @@ void TypeInDeclRule::runRule(const MatchFinder::MatchResult &Result) { "cudaGraphNodeType", "--use-experimental-features=graph"); } } + if (CanonicalTypeStr == "cudaIpcMemHandle_st") { + if (!DpctGlobalInfo::useExtLevelZero()) { + report(TL->getBeginLoc(), Diagnostics::TRY_EXPERIMENTAL_FEATURE, false, + "cudaIpcMemHandle_t", "--use-experimental-features=level_zero"); + return; + } + } if (CanonicalTypeStr == "cudaGraphExecUpdateResult") { report(TL->getBeginLoc(), Diagnostics::API_NOT_MIGRATED, false, @@ -2650,12 +2657,6 @@ void FunctionCallRule::runRule(const MatchFinder::MatchResult &Result) { MapNames::ITFName.at(FuncName), Msg); emplaceTransformation(new ReplaceStmt(CE, "")); } - } else if (FuncName == "cudaIpcGetEventHandle" || - FuncName == "cudaIpcOpenEventHandle" || - FuncName == "cudaIpcGetMemHandle" || - FuncName == "cudaIpcOpenMemHandle" || - FuncName == "cudaIpcCloseMemHandle") { - report(CE->getBeginLoc(), Diagnostics::IPC_NOT_SUPPORTED, false); } else if (FuncName == "__trap") { if (DpctGlobalInfo::useAssert()) { emplaceTransformation(new ReplaceStmt(CE, "assert(0)")); diff --git a/clang/lib/DPCT/SrcAPI/APINames.inc b/clang/lib/DPCT/SrcAPI/APINames.inc index 9866f93fcdbe..17d1872b487b 100644 --- a/clang/lib/DPCT/SrcAPI/APINames.inc +++ b/clang/lib/DPCT/SrcAPI/APINames.inc @@ -65,11 +65,11 @@ ENTRY(cudaGetDeviceFlags, cudaGetDeviceFlags, false, NO_FLAG, P4, "comment") ENTRY(cudaGetDeviceProperties, cudaGetDeviceProperties, true, NO_FLAG, P0, "Successful") ENTRY(cudaGetDeviceProperties_v2 , cudaGetDeviceProperties_v2, true, NO_FLAG, P0, "Successful") ENTRY(cudaInitDevice, cudaInitDevice, false, NO_FLAG, P4, "comment") -ENTRY(cudaIpcCloseMemHandle, cudaIpcCloseMemHandle, true, NO_FLAG, P0, "DPCT1030") -ENTRY(cudaIpcGetEventHandle, cudaIpcGetEventHandle, true, NO_FLAG, P0, "DPCT1030") -ENTRY(cudaIpcGetMemHandle, cudaIpcGetMemHandle, true, NO_FLAG, P0, "DPCT1030") -ENTRY(cudaIpcOpenEventHandle, cudaIpcOpenEventHandle, true, NO_FLAG, P0, "DPCT1030") -ENTRY(cudaIpcOpenMemHandle, cudaIpcOpenMemHandle, true, NO_FLAG, P0, "DPCT1030") +ENTRY(cudaIpcCloseMemHandle, cudaIpcCloseMemHandle, true, NO_FLAG, P0, "DPCT1119") +ENTRY(cudaIpcGetEventHandle, cudaIpcGetEventHandle, false, NO_FLAG, P0, "DPCT1030") +ENTRY(cudaIpcGetMemHandle, cudaIpcGetMemHandle, true, NO_FLAG, P0, "DPCT1119") +ENTRY(cudaIpcOpenEventHandle, cudaIpcOpenEventHandle, false, NO_FLAG, P0, "DPCT1030") +ENTRY(cudaIpcOpenMemHandle, cudaIpcOpenMemHandle, true, NO_FLAG, P0, "DPCT1119") ENTRY(cudaSetDevice, cudaSetDevice, true, NO_FLAG, P0, "DPCT1093") ENTRY(cudaSetDeviceFlags, cudaSetDeviceFlags, true, NO_FLAG, P0, "DPCT1026/DPCT1027") ENTRY(cudaSetValidDevices, cudaSetValidDevices, false, NO_FLAG, P4, "comment") diff --git a/clang/runtime/dpct-rt/CMakeLists.txt b/clang/runtime/dpct-rt/CMakeLists.txt index 43f62d56a75e..9383343f63ab 100644 --- a/clang/runtime/dpct-rt/CMakeLists.txt +++ b/clang/runtime/dpct-rt/CMakeLists.txt @@ -12,6 +12,7 @@ set(dpct_rt_files include/dpct/dnnl_utils.hpp include/dpct/dpl_utils.hpp include/dpct/rng_utils.hpp + include/dpct/ze_utils.hpp include/dpct/lib_common_utils.hpp include/dpct/ccl_utils.hpp include/dpct/sparse_utils.hpp diff --git a/clang/runtime/dpct-rt/include/dpct/dpct.hpp b/clang/runtime/dpct-rt/include/dpct/dpct.hpp index 845c0274fe2f..6318e88cbf95 100644 --- a/clang/runtime/dpct-rt/include/dpct/dpct.hpp +++ b/clang/runtime/dpct-rt/include/dpct/dpct.hpp @@ -58,7 +58,7 @@ template class dpct_kernel_scalar; #include "bindless_images.hpp" #include "graph.hpp" - +#include "ze_utils.hpp" #define USE_DPCT_HELPER 1 #if defined(_MSC_VER) diff --git a/clang/runtime/dpct-rt/include/dpct/ze_utils.hpp b/clang/runtime/dpct-rt/include/dpct/ze_utils.hpp new file mode 100644 index 000000000000..631246e40f6b --- /dev/null +++ b/clang/runtime/dpct-rt/include/dpct/ze_utils.hpp @@ -0,0 +1,85 @@ +//==---- ze_utils.hpp ---------------------------------*- C++ +//-*----------------==// +// +// Copyright (C) Intel Corporation +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#ifndef __DPCT_ZE_UTILS_HPP__ +#define __DPCT_ZE_UTILS_HPP__ + +#ifdef ONEAPI_BACKEND_LEVEL_ZERO_EXT +#if defined(__linux__) +#include +#include +#include +namespace dpct { +namespace experimental { + +struct ipc_mem_handle_ext_t { + pid_t pid; + ze_ipc_mem_handle_t handle; +}; + +namespace detail { + +#ifndef _SYS_pidfd_open +#define _SYS_pidfd_open 434 // syscall number for pidfd_open +#endif + +#ifndef _SYS_pidfd_getfd +#define _SYS_pidfd_getfd 438 // syscall number for pidfd_getfd +#endif + +inline int get_fd_of_peer_process(ipc_mem_handle_ext_t ext_handle) { + int pidfd = syscall(_SYS_pidfd_open, ext_handle.pid, + 0); // obtain a file descriptor that refers to a + // process(requires kernel 5.6+). + if (pidfd < 0) + return -1; + return syscall(_SYS_pidfd_getfd, pidfd, *(int *)ext_handle.handle.data, + 0); // obtain a duplicate of another process's file + // descriptor(requires kernel 5.6+). +} + +} // namespace detail + +/// Creates an IPC memory handle for the specified allocation. +/// \param [in] ptr Pointer to the device memory allocation +/// \param [out] ext_handle_ptr IPC memory handle extension +inline ze_result_t get_mem_ipc_handle(const void *ptr, + ipc_mem_handle_ext_t *ext_handle_ptr) { + ext_handle_ptr->pid = getpid(); + return zeMemGetIpcHandle( + sycl::get_native( + dpct::get_current_device().get_context()), + ptr, &ext_handle_ptr->handle); +} + +/// Opens an IPC memory handle to retrieve a device pointer. +/// \param [in] ext_handle IPC memory handle extension +/// \param [out] pptr Pointer to device allocation in this process +inline ze_result_t open_mem_ipc_handle(ipc_mem_handle_ext_t ext_handle, + void **pptr) { + int fd = detail::get_fd_of_peer_process(ext_handle); + if (fd < 0) + throw std::runtime_error("Cannot get file descriptor of peer process."); + *((int *)ext_handle.handle.data) = fd; + + return zeMemOpenIpcHandle( + sycl::get_native( + dpct::get_current_device().get_context()), + sycl::get_native( + (sycl::device)dpct::get_current_device()), + ext_handle.handle, 0u, pptr); +} + +} // namespace experimental +} // namespace dpct + +#endif // __linux__ +#endif // ONEAPI_BACKEND_LEVEL_ZERO_EXT + +#endif // ! __DPCT_ZE_UTILS_HPP__ diff --git a/clang/test/dpct/IPC/share_mem.cu b/clang/test/dpct/IPC/share_mem.cu new file mode 100644 index 000000000000..73a2b5060184 --- /dev/null +++ b/clang/test/dpct/IPC/share_mem.cu @@ -0,0 +1,151 @@ +// UNSUPPORTED: system-windows +// RUN: dpct --format-range=none --out-root %T/share_mem %s --cuda-include-path="%cuda-path/include" --sycl-named-lambda -- -x cuda --cuda-host-only +// RUN: FileCheck --input-file %T/share_mem/share_mem.dp.cpp --match-full-lines %s + +#include +#include +#include +#include +#include +#include +#include +#include +#define DATA_SIZE 1024 + +#define shName "shared_memory" + +typedef struct sharedMemoryInfo_st { + void *addr; + size_t size; + int shmFd; +} sharedMemoryInfo; + +int sharedMemoryCreate(const char *name, size_t sz, sharedMemoryInfo *info) { + int status = 0; + info->size = sz; + info->shmFd = shm_open(name, O_RDWR | O_CREAT, 0777); + if (info->shmFd < 0) { + return errno; + } + + status = ftruncate(info->shmFd, sz); + if (status != 0) { + return status; + } + + info->addr = mmap(0, sz, PROT_READ | PROT_WRITE, MAP_SHARED, info->shmFd, 0); + if (info->addr == NULL) { + return errno; + } + + return 0; +} + +int sharedMemoryOpen(const char *name, size_t sz, sharedMemoryInfo *info) { + info->size = sz; + + info->shmFd = shm_open(name, O_RDWR, 0777); + if (info->shmFd < 0) { + return errno; + } + + info->addr = mmap(0, sz, PROT_READ | PROT_WRITE, MAP_SHARED, info->shmFd, 0); + if (info->addr == NULL) { + return errno; + } + + return 0; +} + +typedef struct shmStruct_st { + // CHECK: /* + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaIpcMemHandle_t is not supported, please try to remigrate with option: --use-experimental-features=level_zero. + // CHECK-NEXT: */ + cudaIpcMemHandle_t memHandle; +} shmStruct; + +__global__ void simpleKernel(int *ptr) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + ptr[idx] = idx - 10; +} + +typedef pid_t Process; +int spawnProcess(Process *process, const char *app, char *const *args) { + *process = fork(); + if (*process == 0) { + if (0 > execvp(app, args)) { + return errno; + } + } else if (*process < 0) { + return errno; + } + return 0; +} + +int childProcess(int id) { + int threads = 256; + sharedMemoryInfo info; + shmStruct *shm = NULL; + if (sharedMemoryCreate(shName, sizeof(shmStruct), &info) != 0) { + printf("Failed to create shared memory slab\n"); + exit(EXIT_FAILURE); + } + shm = (shmStruct *)info.addr; + int *ptr; + // CHECK: /* + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaIpcOpenMemHandle is not supported, please try to remigrate with option: --use-experimental-features=level_zero. + // CHECK-NEXT: */ + cudaIpcOpenMemHandle((void **)&ptr, shm->memHandle, + cudaIpcMemLazyEnablePeerAccess); + // CHECK: /* + // CHECK-NEXT: DPCT1049:{{[0-9]+}}: The work-group size passed to the SYCL kernel may exceed the limit. To get the device limit, query info::device::max_work_group_size. Adjust the work-group size if needed. + // CHECK-NEXT: */ + simpleKernel<<<1, threads, 0>>>(ptr); + return 0; +} + +int parentProcess(char *app) { + + shmStruct *shm; + sharedMemoryInfo info; + void *ptr; + if (sharedMemoryCreate(shName, sizeof(*shm), &info) != 0) { + printf("Failed to create shared memory slab\n"); + exit(EXIT_FAILURE); + } + shm = (shmStruct *)info.addr; + memset((void *)shm, 0, sizeof(*shm)); + cudaMalloc(&ptr, DATA_SIZE); + int *hostptr = (int *)malloc(DATA_SIZE); + // CHECK: /* + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaIpcGetMemHandle is not supported, please try to remigrate with option: --use-experimental-features=level_zero. + // CHECK-NEXT: */ + cudaIpcGetMemHandle(&shm->memHandle, ptr); + + char *const args[] = {app, "0", NULL}; + Process process; + spawnProcess(&process, app, args); + wait(NULL); + + cudaMemcpy(hostptr, ptr, DATA_SIZE, cudaMemcpyDeviceToHost); + for (int i = 0; i < DATA_SIZE / sizeof(int); i++) { + if (hostptr[i] != i - 10) { + std::cout << "Error: " << hostptr[i] << " != " << i - 10 << "\n"; + return -1; + } + } + std::cout << "verified Pass.\n"; + // CHECK: /* + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaIpcCloseMemHandle is not supported, please try to remigrate with option: --use-experimental-features=level_zero. + // CHECK-NEXT: */ + cudaIpcCloseMemHandle(ptr); + return 0; +} + +int main(int argc, char **argv) { + if (argc == 1) { + return parentProcess(argv[0]); + } else { + return childProcess(atoi(argv[1])); + } +} diff --git a/clang/test/dpct/IPC/share_mem_exp_option.cu b/clang/test/dpct/IPC/share_mem_exp_option.cu new file mode 100644 index 000000000000..0cbfa00560b0 --- /dev/null +++ b/clang/test/dpct/IPC/share_mem_exp_option.cu @@ -0,0 +1,168 @@ +// UNSUPPORTED: system-windows +// RUN: dpct --use-experimental-features=level_zero --format-range=none -out-root %T/share_mem_exp_option %s --cuda-include-path="%cuda-path/include" --sycl-named-lambda -- -x cuda --cuda-host-only +// RUN: FileCheck --input-file %T/share_mem_exp_option/share_mem_exp_option.dp.cpp --match-full-lines %s +// RUN: %if build_lit %{icpx -c -fsycl -DNO_BUILD_TEST %T/share_mem_exp_option/share_mem_exp_option.dp.cpp -o %T/share_mem_exp_option/share_mem_exp_option.dp.o %} + + +#include +#include +#include +#include +#include +#include +#include +#include +#define DATA_SIZE 1024 + +#define shName "shared_memory" + +typedef struct sharedMemoryInfo_st +{ + void *addr; + size_t size; + int shmFd; +} sharedMemoryInfo; + +int sharedMemoryCreate(const char *name, size_t sz, sharedMemoryInfo *info) +{ + int status = 0; + info->size = sz; + info->shmFd = shm_open(name, O_RDWR | O_CREAT, 0777); + if (info->shmFd < 0) + { + return errno; + } + + status = ftruncate(info->shmFd, sz); + if (status != 0) + { + return status; + } + + info->addr = mmap(0, sz, PROT_READ | PROT_WRITE, MAP_SHARED, info->shmFd, 0); + if (info->addr == NULL) + { + return errno; + } + + return 0; +} + +int sharedMemoryOpen(const char *name, size_t sz, sharedMemoryInfo *info) +{ + info->size = sz; + + info->shmFd = shm_open(name, O_RDWR, 0777); + if (info->shmFd < 0) + { + return errno; + } + + info->addr = mmap(0, sz, PROT_READ | PROT_WRITE, MAP_SHARED, info->shmFd, 0); + if (info->addr == NULL) + { + return errno; + } + + return 0; +} + +typedef struct shmStruct_st +{ + // CHECK: dpct::experimental::ipc_mem_handle_ext_t memHandle; + cudaIpcMemHandle_t memHandle; +} shmStruct; + +__global__ void simpleKernel(int *ptr) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + ptr[idx] = idx - 10; +} + +typedef pid_t Process; +int spawnProcess(Process *process, const char *app, char *const *args) +{ + *process = fork(); + if (*process == 0) + { + if (0 > execvp(app, args)) + { + return errno; + } + } + else if (*process < 0) + { + return errno; + } + return 0; +} + +int childProcess(int id) +{ + int threads = 256; + sharedMemoryInfo info; + shmStruct *shm = NULL; + if (sharedMemoryCreate(shName, sizeof(shmStruct), &info) != 0) + { + printf("Failed to create shared memory slab\n"); + exit(EXIT_FAILURE); + } + shm = (shmStruct *)info.addr; + int *ptr; + // CHECK: dpct::experimental::open_mem_ipc_handle(*(dpct::experimental::ipc_mem_handle_ext_t *)&shm->memHandle, (void **)&ptr); + cudaIpcOpenMemHandle((void **)&ptr, *(cudaIpcMemHandle_t *)&shm->memHandle, + cudaIpcMemLazyEnablePeerAccess); + + simpleKernel<<<1, threads, 0>>>(ptr); + return 0; +} + +int parentProcess(char *app) +{ + + shmStruct *shm; + sharedMemoryInfo info; + void *ptr; + if (sharedMemoryCreate(shName, sizeof(*shm), &info) != 0) + { + printf("Failed to create shared memory slab\n"); + exit(EXIT_FAILURE); + } + shm = (shmStruct *)info.addr; + memset((void *)shm, 0, sizeof(*shm)); + cudaMalloc(&ptr, DATA_SIZE); + int *hostptr = (int *)malloc(DATA_SIZE); + // CHECK: dpct::experimental::get_mem_ipc_handle(ptr, (dpct::experimental::ipc_mem_handle_ext_t *)&shm->memHandle); + cudaIpcGetMemHandle((cudaIpcMemHandle_t *)&shm->memHandle, ptr); + + char *const args[] = {app, "0", NULL}; + Process process; + spawnProcess(&process, app, args); + wait(NULL); + + cudaMemcpy(hostptr, ptr, DATA_SIZE, cudaMemcpyDeviceToHost); + for (int i = 0; i < DATA_SIZE / sizeof(int); i++) + { + if (hostptr[i] != i - 10) + { + std::cout << "Error: " << hostptr[i] << " != " << i - 10 << "\n"; + return -1; + } + } + std::cout << "verified Pass.\n"; + // CHECK: zeMemCloseIpcHandle(sycl::get_native(dpct::get_current_device().get_context()), ptr); + cudaIpcCloseMemHandle(ptr); + return 0; +} + +int main(int argc, char **argv) +{ + if (argc == 1) + { + return parentProcess(argv[0]); + } + else + { + return childProcess(atoi(argv[1])); + } +} \ No newline at end of file diff --git a/clang/test/dpct/check_header_files.cpp b/clang/test/dpct/check_header_files.cpp index ffefd65fcd15..befcf076a149 100644 --- a/clang/test/dpct/check_header_files.cpp +++ b/clang/test/dpct/check_header_files.cpp @@ -21,6 +21,8 @@ // RUN: diff %T/check_header_files/out/include/dpct/rng_utils.hpp %S/../../runtime/dpct-rt/include/dpct/rng_utils.hpp >> %T/check_header_files/diff_res.txt +// RUN: diff %T/check_header_files/out/include/dpct/ze_utils.hpp %S/../../runtime/dpct-rt/include/dpct/ze_utils.hpp >> %T/check_header_files/diff_res.txt + // RUN: diff %T/check_header_files/out/include/dpct/lib_common_utils.hpp %S/../../runtime/dpct-rt/include/dpct/lib_common_utils.hpp >> %T/check_header_files/diff_res.txt // RUN: diff %T/check_header_files/out/include/dpct/ccl_utils.hpp %S/../../runtime/dpct-rt/include/dpct/ccl_utils.hpp >> %T/check_header_files/diff_res.txt diff --git a/clang/test/dpct/cuda-device-api.cu b/clang/test/dpct/cuda-device-api.cu index 27380d51a640..44cbb838280d 100644 --- a/clang/test/dpct/cuda-device-api.cu +++ b/clang/test/dpct/cuda-device-api.cu @@ -11,13 +11,17 @@ void foo() { int peerDevice; int *canAccessPeer; int device; + // CHECK: /* + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaIpcMemHandle_t is not supported, please try to + // CHECK-NEXT: remigrate with option: --use-experimental-features=level_zero. + // CHECK-NEXT: */ cudaIpcEventHandle_t *handleEvent; cudaEvent_t event; cudaIpcMemHandle_t *handleMem; void *devPtr; // CHECK: /* - // CHECK-NEXT: DPCT1029:0: SYCL currently does not support getting device resource limits. + // CHECK-NEXT: DPCT1029:{{[0-9]+}}: SYCL currently does not support getting device resource limits. // CHECK-NEXT: The output parameter(s) are set to 0. // CHECK-NEXT: */ // CHECK-NEXT: *pValue = 0; @@ -48,33 +52,31 @@ void foo() { cudaDeviceCanAccessPeer(canAccessPeer, device, peerDevice); // CHECK: /* - // CHECK-NEXT: DPCT1030:{{[0-9]+}}: SYCL currently does not support inter-process communication (IPC) - // CHECK-NEXT: operations. You may need to rewrite the code. + // CHECK-NEXT: DPCT1007:{{[0-9]+}}: Migration of cudaIpcGetEventHandle is not supported. // CHECK-NEXT: */ cudaIpcGetEventHandle(handleEvent, event); // CHECK: /* - // CHECK-NEXT: DPCT1030:{{[0-9]+}}: SYCL currently does not support inter-process communication (IPC) - // CHECK-NEXT: operations. You may need to rewrite the code. + // CHECK-NEXT: DPCT1007:{{[0-9]+}}: Migration of cudaIpcOpenEventHandle is not supported. // CHECK-NEXT: */ cudaIpcOpenEventHandle(&event, *handleEvent); // CHECK: /* - // CHECK-NEXT: DPCT1030:{{[0-9]+}}: SYCL currently does not support inter-process communication (IPC) - // CHECK-NEXT: operations. You may need to rewrite the code. + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaIpcGetMemHandle is not supported, please try to + // CHECK-NEXT: remigrate with option: --use-experimental-features=level_zero. // CHECK-NEXT: */ cudaIpcGetMemHandle(handleMem, devPtr); // CHECK: /* - // CHECK-NEXT: DPCT1030:{{[0-9]+}}: SYCL currently does not support inter-process communication (IPC) - // CHECK-NEXT: operations. You may need to rewrite the code. + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaIpcOpenMemHandle is not supported, please try to + // CHECK-NEXT: remigrate with option: --use-experimental-features=level_zero. // CHECK-NEXT: */ cudaIpcOpenMemHandle(&devPtr, *handleMem, flags); // CHECK: /* - // CHECK-NEXT: DPCT1030:{{[0-9]+}}: SYCL currently does not support inter-process communication (IPC) - // CHECK-NEXT: operations. You may need to rewrite the code. + // CHECK-NEXT: DPCT1119:{{[0-9]+}}: Migration of cudaIpcCloseMemHandle is not supported, please try to + // CHECK-NEXT: remigrate with option: --use-experimental-features=level_zero. // CHECK-NEXT: */ cudaIpcCloseMemHandle(devPtr); diff --git a/clang/test/dpct/help_option_check/lin/help_advanced.txt b/clang/test/dpct/help_option_check/lin/help_advanced.txt index 784340d69c8f..6aa0102cd796 100644 --- a/clang/test/dpct/help_option_check/lin/help_advanced.txt +++ b/clang/test/dpct/help_option_check/lin/help_advanced.txt @@ -63,6 +63,7 @@ Advanced DPCT options =in_order_queue_events - Experimental extension that allows placing the event from the last command submission into the queue and setting an external event as an implicit dependence on the next command submitted to the queue. =non-stdandard-sycl-builtins - Experimental extension that allows use of non standard SYCL builtin functions. =prefetch - Experimental extension that allows use of SYCL prefetch APIs. + =level_zero - Experimental migration feature that enables the use of Level Zero APIs to migrate target code, like CUDA Inter-Process Communication (IPC) APIs. =all - Enable all experimental extensions listed in this option. ... Paths of input source files. These paths are looked up in the compilation database. diff --git a/clang/test/dpct/help_option_check/lin/help_all.txt b/clang/test/dpct/help_option_check/lin/help_all.txt index 82c369d266d4..69197f9365d4 100644 --- a/clang/test/dpct/help_option_check/lin/help_all.txt +++ b/clang/test/dpct/help_option_check/lin/help_all.txt @@ -170,6 +170,7 @@ All DPCT options =in_order_queue_events - Experimental extension that allows placing the event from the last command submission into the queue and setting an external event as an implicit dependence on the next command submitted to the queue. =non-stdandard-sycl-builtins - Experimental extension that allows use of non standard SYCL builtin functions. =prefetch - Experimental extension that allows use of SYCL prefetch APIs. + =level_zero - Experimental migration feature that enables the use of Level Zero APIs to migrate target code, like CUDA Inter-Process Communication (IPC) APIs. =all - Enable all experimental extensions listed in this option. --use-explicit-namespace= - Define the namespaces to use explicitly in generated code. The is a comma separated list. Default: dpct/syclcompat, sycl. diff --git a/clang/test/dpct/help_option_check/win/help_advanced.txt b/clang/test/dpct/help_option_check/win/help_advanced.txt index 9cd23a1b9461..0dd2df3b1b0f 100644 --- a/clang/test/dpct/help_option_check/win/help_advanced.txt +++ b/clang/test/dpct/help_option_check/win/help_advanced.txt @@ -63,6 +63,7 @@ Advanced DPCT options =in_order_queue_events - Experimental extension that allows placing the event from the last command submission into the queue and setting an external event as an implicit dependence on the next command submitted to the queue. =non-stdandard-sycl-builtins - Experimental extension that allows use of non standard SYCL builtin functions. =prefetch - Experimental extension that allows use of SYCL prefetch APIs. + =level_zero - Experimental migration feature that enables the use of Level Zero APIs to migrate target code, like CUDA Inter-Process Communication (IPC) APIs. =all - Enable all experimental extensions listed in this option. ... Paths of input source files. These paths are looked up in the compilation database. diff --git a/clang/test/dpct/help_option_check/win/help_all.txt b/clang/test/dpct/help_option_check/win/help_all.txt index daea6b592a07..545d26ac272f 100644 --- a/clang/test/dpct/help_option_check/win/help_all.txt +++ b/clang/test/dpct/help_option_check/win/help_all.txt @@ -169,6 +169,7 @@ All DPCT options =in_order_queue_events - Experimental extension that allows placing the event from the last command submission into the queue and setting an external event as an implicit dependence on the next command submitted to the queue. =non-stdandard-sycl-builtins - Experimental extension that allows use of non standard SYCL builtin functions. =prefetch - Experimental extension that allows use of SYCL prefetch APIs. + =level_zero - Experimental migration feature that enables the use of Level Zero APIs to migrate target code, like CUDA Inter-Process Communication (IPC) APIs. =all - Enable all experimental extensions listed in this option. --use-explicit-namespace= - Define the namespaces to use explicitly in generated code. The is a comma separated list. Default: dpct/syclcompat, sycl.