Skip to content

Commit 186a2fa

Browse files
zhaomaosubb-ur
authored andcommitted
Remove urKernelSetArg* and urEnqueueKernelLaunch APIs in sanitizer layer (#21621)
1 parent 9b101be commit 186a2fa

10 files changed

Lines changed: 94 additions & 662 deletions

File tree

source/adapters/opencl/kernel.cpp

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -532,9 +532,51 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize(
532532
UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSizeWithArgs(
533533
ur_kernel_handle_t hKernel, ur_queue_handle_t hQueue, uint32_t workDim,
534534
const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
535-
[[maybe_unused]] uint32_t numArgs,
536-
[[maybe_unused]] const ur_exp_kernel_arg_properties_t *pArgs,
535+
uint32_t numArgs, const ur_exp_kernel_arg_properties_t *pArgs,
537536
size_t *pSuggestedLocalWorkSize) {
537+
clSetKernelArgMemPointerINTEL_fn SetKernelArgMemPointerPtr = nullptr;
538+
UR_RETURN_ON_FAILURE(
539+
cl_ext::getExtFuncFromContext<clSetKernelArgMemPointerINTEL_fn>(
540+
hQueue->Context->CLContext,
541+
ur::cl::getAdapter()->fnCache.clSetKernelArgMemPointerINTELCache,
542+
cl_ext::SetKernelArgMemPointerName, &SetKernelArgMemPointerPtr));
543+
544+
for (uint32_t i = 0; i < numArgs; i++) {
545+
switch (pArgs[i].type) {
546+
case UR_EXP_KERNEL_ARG_TYPE_LOCAL:
547+
CL_RETURN_ON_FAILURE(clSetKernelArg(hKernel->CLKernel,
548+
static_cast<cl_uint>(pArgs[i].index),
549+
pArgs[i].size, nullptr));
550+
break;
551+
case UR_EXP_KERNEL_ARG_TYPE_VALUE:
552+
CL_RETURN_ON_FAILURE(clSetKernelArg(hKernel->CLKernel,
553+
static_cast<cl_uint>(pArgs[i].index),
554+
pArgs[i].size, pArgs[i].value.value));
555+
break;
556+
case UR_EXP_KERNEL_ARG_TYPE_MEM_OBJ: {
557+
cl_mem mem = pArgs[i].value.memObjTuple.hMem
558+
? pArgs[i].value.memObjTuple.hMem->CLMemory
559+
: nullptr;
560+
CL_RETURN_ON_FAILURE(clSetKernelArg(hKernel->CLKernel,
561+
static_cast<cl_uint>(pArgs[i].index),
562+
pArgs[i].size, &mem));
563+
break;
564+
}
565+
case UR_EXP_KERNEL_ARG_TYPE_POINTER:
566+
CL_RETURN_ON_FAILURE(SetKernelArgMemPointerPtr(
567+
hKernel->CLKernel, static_cast<cl_uint>(pArgs[i].index),
568+
pArgs[i].value.pointer));
569+
break;
570+
case UR_EXP_KERNEL_ARG_TYPE_SAMPLER: {
571+
CL_RETURN_ON_FAILURE(clSetKernelArg(
572+
hKernel->CLKernel, static_cast<cl_uint>(pArgs[i].index),
573+
pArgs[i].size, &pArgs[i].value.sampler->CLSampler));
574+
break;
575+
}
576+
default:
577+
return UR_RESULT_ERROR_INVALID_ENUMERATION;
578+
}
579+
}
538580
return urKernelGetSuggestedLocalWorkSize(hKernel, hQueue, workDim,
539581
pGlobalWorkOffset, pGlobalWorkSize,
540582
pSuggestedLocalWorkSize);

source/loader/layers/sanitizer/asan/asan_ddi.cpp

Lines changed: 20 additions & 244 deletions
Original file line numberDiff line numberDiff line change
@@ -486,80 +486,6 @@ ur_result_t UR_APICALL urProgramRelease(
486486
return UR_RESULT_SUCCESS;
487487
}
488488

489-
///////////////////////////////////////////////////////////////////////////////
490-
/// @brief Intercept function for urEnqueueKernelLaunch
491-
__urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch(
492-
/// [in] handle of the queue object
493-
ur_queue_handle_t hQueue,
494-
/// [in] handle of the kernel object
495-
ur_kernel_handle_t hKernel,
496-
/// [in] number of dimensions, from 1 to 3, to specify the global and
497-
/// work-group work-items
498-
uint32_t workDim,
499-
/// [in] pointer to an array of workDim unsigned values that specify the
500-
/// offset used to calculate the global ID of a work-item
501-
const size_t *pGlobalWorkOffset,
502-
/// [in] pointer to an array of workDim unsigned values that specify the
503-
/// number of global work-items in workDim that will execute the kernel
504-
/// function
505-
const size_t *pGlobalWorkSize,
506-
/// [in][optional] pointer to an array of workDim unsigned values that
507-
/// specify the number of local work-items forming a work-group that will
508-
/// execute the kernel function. If nullptr, the runtime implementation will
509-
/// choose the work-group size.
510-
const size_t *pLocalWorkSize,
511-
/// [in][optional] pointer to a single linked list of launch properties
512-
const ur_kernel_launch_ext_properties_t *launchPropList,
513-
/// [in] size of the event wait list
514-
uint32_t numEventsInWaitList,
515-
/// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
516-
/// events that must be complete before the kernel execution. If
517-
/// nullptr, the numEventsInWaitList must be 0, indicating that no wait
518-
/// event.
519-
const ur_event_handle_t *phEventWaitList,
520-
/// [out][optional] return an event object that identifies this
521-
/// particular kernel execution instance.
522-
ur_event_handle_t *phEvent) {
523-
524-
// This mutex is to prevent concurrent kernel launches across different queues
525-
// as the DeviceASAN local/private shadow memory does not support concurrent
526-
// kernel launches now.
527-
std::scoped_lock<ur_shared_mutex> Guard(
528-
getAsanInterceptor()->KernelLaunchMutex);
529-
530-
auto pfnKernelLaunch = getContext()->urDdiTable.Enqueue.pfnKernelLaunch;
531-
532-
if (nullptr == pfnKernelLaunch) {
533-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
534-
}
535-
536-
UR_LOG_L(getContext()->logger, DEBUG, "==== urEnqueueKernelLaunch");
537-
538-
LaunchInfo LaunchInfo(GetContext(hQueue), GetDevice(hQueue), pGlobalWorkSize,
539-
pLocalWorkSize, pGlobalWorkOffset, workDim);
540-
UR_CALL(LaunchInfo.Data.syncToDevice(hQueue));
541-
542-
UR_CALL(getAsanInterceptor()->preLaunchKernel(hKernel, hQueue, LaunchInfo));
543-
544-
ur_result_t UrRes = getContext()->urDdiTable.Enqueue.pfnKernelLaunch(
545-
hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
546-
LaunchInfo.LocalWorkSize.data(), launchPropList, numEventsInWaitList,
547-
phEventWaitList, phEvent);
548-
if (UrRes != UR_RESULT_SUCCESS) {
549-
if (UrRes == UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY) {
550-
UR_LOG_L(
551-
getContext()->logger, ERR,
552-
"urEnqueueKernelLaunch failed due to out of device memory, maybe "
553-
"SLM is fully used.");
554-
}
555-
return UrRes;
556-
}
557-
558-
UR_CALL(getAsanInterceptor()->postLaunchKernel(hKernel, hQueue, LaunchInfo));
559-
560-
return UR_RESULT_SUCCESS;
561-
}
562-
563489
///////////////////////////////////////////////////////////////////////////////
564490
/// @brief Intercept function for urContextCreate
565491
__urdlllocal ur_result_t UR_APICALL urContextCreate(
@@ -1482,143 +1408,6 @@ __urdlllocal ur_result_t urKernelRelease(
14821408
return UR_RESULT_SUCCESS;
14831409
}
14841410

1485-
///////////////////////////////////////////////////////////////////////////////
1486-
/// @brief Intercept function for urKernelSetArgValue
1487-
__urdlllocal ur_result_t UR_APICALL urKernelSetArgValue(
1488-
/// [in] handle of the kernel object
1489-
ur_kernel_handle_t hKernel,
1490-
/// [in] argument index in range [0, num args - 1]
1491-
uint32_t argIndex,
1492-
/// [in] size of argument type
1493-
size_t argSize,
1494-
/// [in][optional] pointer to value properties.
1495-
const ur_kernel_arg_value_properties_t *pProperties,
1496-
/// [in] argument value represented as matching arg type.
1497-
const void *pArgValue) {
1498-
auto pfnSetArgValue = getContext()->urDdiTable.Kernel.pfnSetArgValue;
1499-
1500-
if (nullptr == pfnSetArgValue) {
1501-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
1502-
}
1503-
1504-
UR_LOG_L(getContext()->logger, DEBUG, "==== urKernelSetArgValue");
1505-
1506-
std::shared_ptr<MemBuffer> MemBuffer;
1507-
if (argSize == sizeof(ur_mem_handle_t) &&
1508-
(MemBuffer = getAsanInterceptor()->getMemBuffer(
1509-
*ur_cast<const ur_mem_handle_t *>(pArgValue)))) {
1510-
auto &KernelInfo = getAsanInterceptor()->getOrCreateKernelInfo(hKernel);
1511-
std::scoped_lock<ur_shared_mutex> Guard(KernelInfo.Mutex);
1512-
KernelInfo.BufferArgs[argIndex] = std::move(MemBuffer);
1513-
} else {
1514-
UR_CALL(pfnSetArgValue(hKernel, argIndex, argSize, pProperties, pArgValue));
1515-
}
1516-
1517-
return UR_RESULT_SUCCESS;
1518-
}
1519-
1520-
///////////////////////////////////////////////////////////////////////////////
1521-
/// @brief Intercept function for urKernelSetArgMemObj
1522-
__urdlllocal ur_result_t UR_APICALL urKernelSetArgMemObj(
1523-
/// [in] handle of the kernel object
1524-
ur_kernel_handle_t hKernel,
1525-
/// [in] argument index in range [0, num args - 1]
1526-
uint32_t argIndex,
1527-
/// [in][optional] pointer to Memory object properties.
1528-
const ur_kernel_arg_mem_obj_properties_t *pProperties,
1529-
/// [in][optional] handle of Memory object.
1530-
ur_mem_handle_t hArgValue) {
1531-
auto pfnSetArgMemObj = getContext()->urDdiTable.Kernel.pfnSetArgMemObj;
1532-
1533-
if (nullptr == pfnSetArgMemObj) {
1534-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
1535-
}
1536-
1537-
UR_LOG_L(getContext()->logger, DEBUG, "==== urKernelSetArgMemObj");
1538-
1539-
std::shared_ptr<MemBuffer> MemBuffer;
1540-
if ((MemBuffer = getAsanInterceptor()->getMemBuffer(hArgValue))) {
1541-
auto &KernelInfo = getAsanInterceptor()->getOrCreateKernelInfo(hKernel);
1542-
std::scoped_lock<ur_shared_mutex> Guard(KernelInfo.Mutex);
1543-
KernelInfo.BufferArgs[argIndex] = std::move(MemBuffer);
1544-
} else {
1545-
UR_CALL(pfnSetArgMemObj(hKernel, argIndex, pProperties, hArgValue));
1546-
}
1547-
1548-
return UR_RESULT_SUCCESS;
1549-
}
1550-
1551-
///////////////////////////////////////////////////////////////////////////////
1552-
/// @brief Intercept function for urKernelSetArgLocal
1553-
__urdlllocal ur_result_t UR_APICALL urKernelSetArgLocal(
1554-
/// [in] handle of the kernel object
1555-
ur_kernel_handle_t hKernel,
1556-
/// [in] argument index in range [0, num args - 1]
1557-
uint32_t argIndex,
1558-
/// [in] size of the local buffer to be allocated by the runtime
1559-
size_t argSize,
1560-
/// [in][optional] pointer to local buffer properties.
1561-
const ur_kernel_arg_local_properties_t *pProperties) {
1562-
auto pfnSetArgLocal = getContext()->urDdiTable.Kernel.pfnSetArgLocal;
1563-
1564-
if (nullptr == pfnSetArgLocal) {
1565-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
1566-
}
1567-
1568-
UR_LOG_L(getContext()->logger, DEBUG,
1569-
"==== urKernelSetArgLocal (argIndex={}, argSize={})", argIndex,
1570-
argSize);
1571-
1572-
{
1573-
auto &KI = getAsanInterceptor()->getOrCreateKernelInfo(hKernel);
1574-
std::scoped_lock<ur_shared_mutex> Guard(KI.Mutex);
1575-
// TODO: get local variable alignment
1576-
auto argSizeWithRZ = GetSizeAndRedzoneSizeForLocal(
1577-
argSize, ASAN_SHADOW_GRANULARITY, ASAN_SHADOW_GRANULARITY);
1578-
KI.LocalArgs[argIndex] = LocalArgsInfo{argSize, argSizeWithRZ};
1579-
argSize = argSizeWithRZ;
1580-
}
1581-
1582-
ur_result_t result = pfnSetArgLocal(hKernel, argIndex, argSize, pProperties);
1583-
1584-
return result;
1585-
}
1586-
1587-
///////////////////////////////////////////////////////////////////////////////
1588-
/// @brief Intercept function for urKernelSetArgPointer
1589-
__urdlllocal ur_result_t UR_APICALL urKernelSetArgPointer(
1590-
/// [in] handle of the kernel object
1591-
ur_kernel_handle_t hKernel,
1592-
/// [in] argument index in range [0, num args - 1]
1593-
uint32_t argIndex,
1594-
/// [in][optional] pointer to USM pointer properties.
1595-
const ur_kernel_arg_pointer_properties_t *pProperties,
1596-
/// [in][optional] Pointer obtained by USM allocation or virtual memory
1597-
/// mapping operation. If null then argument value is considered null.
1598-
const void *pArgValue) {
1599-
auto pfnSetArgPointer = getContext()->urDdiTable.Kernel.pfnSetArgPointer;
1600-
1601-
if (nullptr == pfnSetArgPointer) {
1602-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
1603-
}
1604-
1605-
UR_LOG_L(getContext()->logger, DEBUG,
1606-
"==== urKernelSetArgPointer (argIndex={}, pArgValue={})", argIndex,
1607-
pArgValue);
1608-
1609-
std::shared_ptr<KernelInfo> KI;
1610-
if (getContext()->Options.DetectKernelArguments) {
1611-
auto &KI = getAsanInterceptor()->getOrCreateKernelInfo(hKernel);
1612-
std::scoped_lock<ur_shared_mutex> Guard(KI.Mutex);
1613-
KI.PointerArgs[argIndex] = {pArgValue, GetCurrentBacktrace()};
1614-
}
1615-
1616-
ur_result_t result =
1617-
pfnSetArgPointer(hKernel, argIndex, pProperties, pArgValue);
1618-
1619-
return result;
1620-
}
1621-
16221411
__urdlllocal ur_result_t UR_APICALL urKernelSetExecInfo(
16231412
/// [in] handle of the kernel object
16241413
ur_kernel_handle_t hKernel,
@@ -1744,39 +1533,37 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunchWithArgsExp(
17441533
std::memcpy(KernelInfo.ArgProps.data(), pArgs,
17451534
numArgs * sizeof(ur_exp_kernel_arg_properties_t));
17461535

1747-
// We need to set all the args now rather than letting LaunchWithArgs handle
1748-
// them. This is because some implementations of
1749-
// urKernelGetSuggestedLocalWorkSize, which is used in preLaunchKernel, rely
1750-
// on all the args being set.
17511536
for (uint32_t ArgPropIndex = 0; ArgPropIndex < numArgs; ArgPropIndex++) {
17521537
switch (pArgs[ArgPropIndex].type) {
17531538
case UR_EXP_KERNEL_ARG_TYPE_LOCAL: {
1754-
UR_CALL(ur_sanitizer_layer::asan::urKernelSetArgLocal(
1755-
hKernel, pArgs[ArgPropIndex].index, pArgs[ArgPropIndex].size,
1756-
nullptr));
1757-
KernelInfo.ArgProps[ArgPropIndex].size =
1758-
KernelInfo.LocalArgs[ArgPropIndex].SizeWithRedZone;
1539+
auto argSizeWithRZ = GetSizeAndRedzoneSizeForLocal(
1540+
pArgs[ArgPropIndex].size, ASAN_SHADOW_GRANULARITY,
1541+
ASAN_SHADOW_GRANULARITY);
1542+
KernelInfo.LocalArgs[pArgs[ArgPropIndex].index] =
1543+
LocalArgsInfo{pArgs[ArgPropIndex].size, argSizeWithRZ};
1544+
KernelInfo.ArgProps[ArgPropIndex].size = argSizeWithRZ;
17591545
break;
17601546
}
17611547
case UR_EXP_KERNEL_ARG_TYPE_POINTER: {
1762-
UR_CALL(ur_sanitizer_layer::asan::urKernelSetArgPointer(
1763-
hKernel, pArgs[ArgPropIndex].index, nullptr,
1764-
pArgs[ArgPropIndex].value.pointer));
1548+
KernelInfo.PointerArgs[pArgs[ArgPropIndex].index] = {
1549+
pArgs[ArgPropIndex].value.pointer, GetCurrentBacktrace()};
17651550
break;
17661551
}
17671552
case UR_EXP_KERNEL_ARG_TYPE_VALUE: {
1768-
UR_CALL(ur_sanitizer_layer::asan::urKernelSetArgValue(
1769-
hKernel, pArgs[ArgPropIndex].index, pArgs[ArgPropIndex].size, nullptr,
1770-
pArgs[ArgPropIndex].value.value));
1553+
std::shared_ptr<MemBuffer> MemBuffer;
1554+
if (pArgs[ArgPropIndex].size == sizeof(ur_mem_handle_t) &&
1555+
(MemBuffer = getAsanInterceptor()->getMemBuffer(
1556+
*ur_cast<const ur_mem_handle_t *>(
1557+
pArgs[ArgPropIndex].value.value)))) {
1558+
char *Handle = nullptr;
1559+
UR_CALL(MemBuffer->getHandle(GetDevice(hQueue), Handle));
1560+
KernelInfo.ArgProps[ArgPropIndex].type =
1561+
ur_exp_kernel_arg_type_t::UR_EXP_KERNEL_ARG_TYPE_POINTER;
1562+
KernelInfo.ArgProps[ArgPropIndex].value.pointer = Handle;
1563+
}
17711564
break;
17721565
}
17731566
case UR_EXP_KERNEL_ARG_TYPE_MEM_OBJ: {
1774-
ur_kernel_arg_mem_obj_properties_t Properties = {
1775-
UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES, nullptr,
1776-
pArgs[ArgPropIndex].value.memObjTuple.flags};
1777-
UR_CALL(ur_sanitizer_layer::asan::urKernelSetArgMemObj(
1778-
hKernel, pArgs[ArgPropIndex].index, &Properties,
1779-
pArgs[ArgPropIndex].value.memObjTuple.hMem));
17801567
if (std::shared_ptr<MemBuffer> MemBuffer =
17811568
getAsanInterceptor()->getMemBuffer(
17821569
pArgs[ArgPropIndex].value.memObjTuple.hMem)) {
@@ -1788,14 +1575,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunchWithArgsExp(
17881575
}
17891576
break;
17901577
}
1791-
case UR_EXP_KERNEL_ARG_TYPE_SAMPLER: {
1792-
auto pfnKernelSetArgSampler =
1793-
getContext()->urDdiTable.Kernel.pfnSetArgSampler;
1794-
UR_CALL(pfnKernelSetArgSampler(hKernel, pArgs[ArgPropIndex].index,
1795-
nullptr,
1796-
pArgs[ArgPropIndex].value.sampler));
1578+
case UR_EXP_KERNEL_ARG_TYPE_SAMPLER:
17971579
break;
1798-
}
17991580
default:
18001581
return UR_RESULT_ERROR_INVALID_ENUMERATION;
18011582
}
@@ -1949,10 +1730,6 @@ __urdlllocal ur_result_t UR_APICALL urGetKernelProcAddrTable(
19491730

19501731
pDdiTable->pfnRetain = ur_sanitizer_layer::asan::urKernelRetain;
19511732
pDdiTable->pfnRelease = ur_sanitizer_layer::asan::urKernelRelease;
1952-
pDdiTable->pfnSetArgValue = ur_sanitizer_layer::asan::urKernelSetArgValue;
1953-
pDdiTable->pfnSetArgMemObj = ur_sanitizer_layer::asan::urKernelSetArgMemObj;
1954-
pDdiTable->pfnSetArgLocal = ur_sanitizer_layer::asan::urKernelSetArgLocal;
1955-
pDdiTable->pfnSetArgPointer = ur_sanitizer_layer::asan::urKernelSetArgPointer;
19561733
pDdiTable->pfnSetExecInfo = ur_sanitizer_layer::asan::urKernelSetExecInfo;
19571734

19581735
return result;
@@ -2068,7 +1845,6 @@ __urdlllocal ur_result_t UR_APICALL urGetEnqueueProcAddrTable(
20681845
ur_sanitizer_layer::asan::urEnqueueMemBufferFill;
20691846
pDdiTable->pfnMemBufferMap = ur_sanitizer_layer::asan::urEnqueueMemBufferMap;
20701847
pDdiTable->pfnMemUnmap = ur_sanitizer_layer::asan::urEnqueueMemUnmap;
2071-
pDdiTable->pfnKernelLaunch = ur_sanitizer_layer::asan::urEnqueueKernelLaunch;
20721848

20731849
return result;
20741850
}

0 commit comments

Comments
 (0)