@@ -486,80 +486,6 @@ ur_result_t UR_APICALL urProgramRelease(
486486 return UR_RESULT_SUCCESS;
487487}
488488
489- // /////////////////////////////////////////////////////////////////////////////
490- // / @brief Intercept function for urEnqueueKernelLaunch
491- __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch (
492- // / [in] handle of the queue object
493- ur_queue_handle_t hQueue,
494- // / [in] handle of the kernel object
495- ur_kernel_handle_t hKernel,
496- // / [in] number of dimensions, from 1 to 3, to specify the global and
497- // / work-group work-items
498- uint32_t workDim,
499- // / [in] pointer to an array of workDim unsigned values that specify the
500- // / offset used to calculate the global ID of a work-item
501- const size_t *pGlobalWorkOffset,
502- // / [in] pointer to an array of workDim unsigned values that specify the
503- // / number of global work-items in workDim that will execute the kernel
504- // / function
505- const size_t *pGlobalWorkSize,
506- // / [in][optional] pointer to an array of workDim unsigned values that
507- // / specify the number of local work-items forming a work-group that will
508- // / execute the kernel function. If nullptr, the runtime implementation will
509- // / choose the work-group size.
510- const size_t *pLocalWorkSize,
511- // / [in][optional] pointer to a single linked list of launch properties
512- const ur_kernel_launch_ext_properties_t *launchPropList,
513- // / [in] size of the event wait list
514- uint32_t numEventsInWaitList,
515- // / [in][optional][range(0, numEventsInWaitList)] pointer to a list of
516- // / events that must be complete before the kernel execution. If
517- // / nullptr, the numEventsInWaitList must be 0, indicating that no wait
518- // / event.
519- const ur_event_handle_t *phEventWaitList,
520- // / [out][optional] return an event object that identifies this
521- // / particular kernel execution instance.
522- ur_event_handle_t *phEvent) {
523-
524- // This mutex is to prevent concurrent kernel launches across different queues
525- // as the DeviceASAN local/private shadow memory does not support concurrent
526- // kernel launches now.
527- std::scoped_lock<ur_shared_mutex> Guard (
528- getAsanInterceptor ()->KernelLaunchMutex );
529-
530- auto pfnKernelLaunch = getContext ()->urDdiTable .Enqueue .pfnKernelLaunch ;
531-
532- if (nullptr == pfnKernelLaunch) {
533- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
534- }
535-
536- UR_LOG_L (getContext ()->logger , DEBUG, " ==== urEnqueueKernelLaunch" );
537-
538- LaunchInfo LaunchInfo (GetContext (hQueue), GetDevice (hQueue), pGlobalWorkSize,
539- pLocalWorkSize, pGlobalWorkOffset, workDim);
540- UR_CALL (LaunchInfo.Data .syncToDevice (hQueue));
541-
542- UR_CALL (getAsanInterceptor ()->preLaunchKernel (hKernel, hQueue, LaunchInfo));
543-
544- ur_result_t UrRes = getContext ()->urDdiTable .Enqueue .pfnKernelLaunch (
545- hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
546- LaunchInfo.LocalWorkSize .data (), launchPropList, numEventsInWaitList,
547- phEventWaitList, phEvent);
548- if (UrRes != UR_RESULT_SUCCESS) {
549- if (UrRes == UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY) {
550- UR_LOG_L (
551- getContext ()->logger , ERR,
552- " urEnqueueKernelLaunch failed due to out of device memory, maybe "
553- " SLM is fully used." );
554- }
555- return UrRes;
556- }
557-
558- UR_CALL (getAsanInterceptor ()->postLaunchKernel (hKernel, hQueue, LaunchInfo));
559-
560- return UR_RESULT_SUCCESS;
561- }
562-
563489// /////////////////////////////////////////////////////////////////////////////
564490// / @brief Intercept function for urContextCreate
565491__urdlllocal ur_result_t UR_APICALL urContextCreate (
@@ -1482,143 +1408,6 @@ __urdlllocal ur_result_t urKernelRelease(
14821408 return UR_RESULT_SUCCESS;
14831409}
14841410
1485- // /////////////////////////////////////////////////////////////////////////////
1486- // / @brief Intercept function for urKernelSetArgValue
1487- __urdlllocal ur_result_t UR_APICALL urKernelSetArgValue (
1488- // / [in] handle of the kernel object
1489- ur_kernel_handle_t hKernel,
1490- // / [in] argument index in range [0, num args - 1]
1491- uint32_t argIndex,
1492- // / [in] size of argument type
1493- size_t argSize,
1494- // / [in][optional] pointer to value properties.
1495- const ur_kernel_arg_value_properties_t *pProperties,
1496- // / [in] argument value represented as matching arg type.
1497- const void *pArgValue) {
1498- auto pfnSetArgValue = getContext ()->urDdiTable .Kernel .pfnSetArgValue ;
1499-
1500- if (nullptr == pfnSetArgValue) {
1501- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
1502- }
1503-
1504- UR_LOG_L (getContext ()->logger , DEBUG, " ==== urKernelSetArgValue" );
1505-
1506- std::shared_ptr<MemBuffer> MemBuffer;
1507- if (argSize == sizeof (ur_mem_handle_t ) &&
1508- (MemBuffer = getAsanInterceptor ()->getMemBuffer (
1509- *ur_cast<const ur_mem_handle_t *>(pArgValue)))) {
1510- auto &KernelInfo = getAsanInterceptor ()->getOrCreateKernelInfo (hKernel);
1511- std::scoped_lock<ur_shared_mutex> Guard (KernelInfo.Mutex );
1512- KernelInfo.BufferArgs [argIndex] = std::move (MemBuffer);
1513- } else {
1514- UR_CALL (pfnSetArgValue (hKernel, argIndex, argSize, pProperties, pArgValue));
1515- }
1516-
1517- return UR_RESULT_SUCCESS;
1518- }
1519-
1520- // /////////////////////////////////////////////////////////////////////////////
1521- // / @brief Intercept function for urKernelSetArgMemObj
1522- __urdlllocal ur_result_t UR_APICALL urKernelSetArgMemObj (
1523- // / [in] handle of the kernel object
1524- ur_kernel_handle_t hKernel,
1525- // / [in] argument index in range [0, num args - 1]
1526- uint32_t argIndex,
1527- // / [in][optional] pointer to Memory object properties.
1528- const ur_kernel_arg_mem_obj_properties_t *pProperties,
1529- // / [in][optional] handle of Memory object.
1530- ur_mem_handle_t hArgValue) {
1531- auto pfnSetArgMemObj = getContext ()->urDdiTable .Kernel .pfnSetArgMemObj ;
1532-
1533- if (nullptr == pfnSetArgMemObj) {
1534- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
1535- }
1536-
1537- UR_LOG_L (getContext ()->logger , DEBUG, " ==== urKernelSetArgMemObj" );
1538-
1539- std::shared_ptr<MemBuffer> MemBuffer;
1540- if ((MemBuffer = getAsanInterceptor ()->getMemBuffer (hArgValue))) {
1541- auto &KernelInfo = getAsanInterceptor ()->getOrCreateKernelInfo (hKernel);
1542- std::scoped_lock<ur_shared_mutex> Guard (KernelInfo.Mutex );
1543- KernelInfo.BufferArgs [argIndex] = std::move (MemBuffer);
1544- } else {
1545- UR_CALL (pfnSetArgMemObj (hKernel, argIndex, pProperties, hArgValue));
1546- }
1547-
1548- return UR_RESULT_SUCCESS;
1549- }
1550-
1551- // /////////////////////////////////////////////////////////////////////////////
1552- // / @brief Intercept function for urKernelSetArgLocal
1553- __urdlllocal ur_result_t UR_APICALL urKernelSetArgLocal (
1554- // / [in] handle of the kernel object
1555- ur_kernel_handle_t hKernel,
1556- // / [in] argument index in range [0, num args - 1]
1557- uint32_t argIndex,
1558- // / [in] size of the local buffer to be allocated by the runtime
1559- size_t argSize,
1560- // / [in][optional] pointer to local buffer properties.
1561- const ur_kernel_arg_local_properties_t *pProperties) {
1562- auto pfnSetArgLocal = getContext ()->urDdiTable .Kernel .pfnSetArgLocal ;
1563-
1564- if (nullptr == pfnSetArgLocal) {
1565- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
1566- }
1567-
1568- UR_LOG_L (getContext ()->logger , DEBUG,
1569- " ==== urKernelSetArgLocal (argIndex={}, argSize={})" , argIndex,
1570- argSize);
1571-
1572- {
1573- auto &KI = getAsanInterceptor ()->getOrCreateKernelInfo (hKernel);
1574- std::scoped_lock<ur_shared_mutex> Guard (KI.Mutex );
1575- // TODO: get local variable alignment
1576- auto argSizeWithRZ = GetSizeAndRedzoneSizeForLocal (
1577- argSize, ASAN_SHADOW_GRANULARITY, ASAN_SHADOW_GRANULARITY);
1578- KI.LocalArgs [argIndex] = LocalArgsInfo{argSize, argSizeWithRZ};
1579- argSize = argSizeWithRZ;
1580- }
1581-
1582- ur_result_t result = pfnSetArgLocal (hKernel, argIndex, argSize, pProperties);
1583-
1584- return result;
1585- }
1586-
1587- // /////////////////////////////////////////////////////////////////////////////
1588- // / @brief Intercept function for urKernelSetArgPointer
1589- __urdlllocal ur_result_t UR_APICALL urKernelSetArgPointer (
1590- // / [in] handle of the kernel object
1591- ur_kernel_handle_t hKernel,
1592- // / [in] argument index in range [0, num args - 1]
1593- uint32_t argIndex,
1594- // / [in][optional] pointer to USM pointer properties.
1595- const ur_kernel_arg_pointer_properties_t *pProperties,
1596- // / [in][optional] Pointer obtained by USM allocation or virtual memory
1597- // / mapping operation. If null then argument value is considered null.
1598- const void *pArgValue) {
1599- auto pfnSetArgPointer = getContext ()->urDdiTable .Kernel .pfnSetArgPointer ;
1600-
1601- if (nullptr == pfnSetArgPointer) {
1602- return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
1603- }
1604-
1605- UR_LOG_L (getContext ()->logger , DEBUG,
1606- " ==== urKernelSetArgPointer (argIndex={}, pArgValue={})" , argIndex,
1607- pArgValue);
1608-
1609- std::shared_ptr<KernelInfo> KI;
1610- if (getContext ()->Options .DetectKernelArguments ) {
1611- auto &KI = getAsanInterceptor ()->getOrCreateKernelInfo (hKernel);
1612- std::scoped_lock<ur_shared_mutex> Guard (KI.Mutex );
1613- KI.PointerArgs [argIndex] = {pArgValue, GetCurrentBacktrace ()};
1614- }
1615-
1616- ur_result_t result =
1617- pfnSetArgPointer (hKernel, argIndex, pProperties, pArgValue);
1618-
1619- return result;
1620- }
1621-
16221411__urdlllocal ur_result_t UR_APICALL urKernelSetExecInfo (
16231412 // / [in] handle of the kernel object
16241413 ur_kernel_handle_t hKernel,
@@ -1744,39 +1533,37 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunchWithArgsExp(
17441533 std::memcpy (KernelInfo.ArgProps .data (), pArgs,
17451534 numArgs * sizeof (ur_exp_kernel_arg_properties_t ));
17461535
1747- // We need to set all the args now rather than letting LaunchWithArgs handle
1748- // them. This is because some implementations of
1749- // urKernelGetSuggestedLocalWorkSize, which is used in preLaunchKernel, rely
1750- // on all the args being set.
17511536 for (uint32_t ArgPropIndex = 0 ; ArgPropIndex < numArgs; ArgPropIndex++) {
17521537 switch (pArgs[ArgPropIndex].type ) {
17531538 case UR_EXP_KERNEL_ARG_TYPE_LOCAL: {
1754- UR_CALL (ur_sanitizer_layer::asan::urKernelSetArgLocal (
1755- hKernel, pArgs[ArgPropIndex].index , pArgs[ArgPropIndex].size ,
1756- nullptr ));
1757- KernelInfo.ArgProps [ArgPropIndex].size =
1758- KernelInfo.LocalArgs [ArgPropIndex].SizeWithRedZone ;
1539+ auto argSizeWithRZ = GetSizeAndRedzoneSizeForLocal (
1540+ pArgs[ArgPropIndex].size , ASAN_SHADOW_GRANULARITY,
1541+ ASAN_SHADOW_GRANULARITY);
1542+ KernelInfo.LocalArgs [pArgs[ArgPropIndex].index ] =
1543+ LocalArgsInfo{pArgs[ArgPropIndex].size , argSizeWithRZ};
1544+ KernelInfo.ArgProps [ArgPropIndex].size = argSizeWithRZ;
17591545 break ;
17601546 }
17611547 case UR_EXP_KERNEL_ARG_TYPE_POINTER: {
1762- UR_CALL (ur_sanitizer_layer::asan::urKernelSetArgPointer (
1763- hKernel, pArgs[ArgPropIndex].index , nullptr ,
1764- pArgs[ArgPropIndex].value .pointer ));
1548+ KernelInfo.PointerArgs [pArgs[ArgPropIndex].index ] = {
1549+ pArgs[ArgPropIndex].value .pointer , GetCurrentBacktrace ()};
17651550 break ;
17661551 }
17671552 case UR_EXP_KERNEL_ARG_TYPE_VALUE: {
1768- UR_CALL (ur_sanitizer_layer::asan::urKernelSetArgValue (
1769- hKernel, pArgs[ArgPropIndex].index , pArgs[ArgPropIndex].size , nullptr ,
1770- pArgs[ArgPropIndex].value .value ));
1553+ std::shared_ptr<MemBuffer> MemBuffer;
1554+ if (pArgs[ArgPropIndex].size == sizeof (ur_mem_handle_t ) &&
1555+ (MemBuffer = getAsanInterceptor ()->getMemBuffer (
1556+ *ur_cast<const ur_mem_handle_t *>(
1557+ pArgs[ArgPropIndex].value .value )))) {
1558+ char *Handle = nullptr ;
1559+ UR_CALL (MemBuffer->getHandle (GetDevice (hQueue), Handle));
1560+ KernelInfo.ArgProps [ArgPropIndex].type =
1561+ ur_exp_kernel_arg_type_t ::UR_EXP_KERNEL_ARG_TYPE_POINTER;
1562+ KernelInfo.ArgProps [ArgPropIndex].value .pointer = Handle;
1563+ }
17711564 break ;
17721565 }
17731566 case UR_EXP_KERNEL_ARG_TYPE_MEM_OBJ: {
1774- ur_kernel_arg_mem_obj_properties_t Properties = {
1775- UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES, nullptr ,
1776- pArgs[ArgPropIndex].value .memObjTuple .flags };
1777- UR_CALL (ur_sanitizer_layer::asan::urKernelSetArgMemObj (
1778- hKernel, pArgs[ArgPropIndex].index , &Properties,
1779- pArgs[ArgPropIndex].value .memObjTuple .hMem ));
17801567 if (std::shared_ptr<MemBuffer> MemBuffer =
17811568 getAsanInterceptor ()->getMemBuffer (
17821569 pArgs[ArgPropIndex].value .memObjTuple .hMem )) {
@@ -1788,14 +1575,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunchWithArgsExp(
17881575 }
17891576 break ;
17901577 }
1791- case UR_EXP_KERNEL_ARG_TYPE_SAMPLER: {
1792- auto pfnKernelSetArgSampler =
1793- getContext ()->urDdiTable .Kernel .pfnSetArgSampler ;
1794- UR_CALL (pfnKernelSetArgSampler (hKernel, pArgs[ArgPropIndex].index ,
1795- nullptr ,
1796- pArgs[ArgPropIndex].value .sampler ));
1578+ case UR_EXP_KERNEL_ARG_TYPE_SAMPLER:
17971579 break ;
1798- }
17991580 default :
18001581 return UR_RESULT_ERROR_INVALID_ENUMERATION;
18011582 }
@@ -1949,10 +1730,6 @@ __urdlllocal ur_result_t UR_APICALL urGetKernelProcAddrTable(
19491730
19501731 pDdiTable->pfnRetain = ur_sanitizer_layer::asan::urKernelRetain;
19511732 pDdiTable->pfnRelease = ur_sanitizer_layer::asan::urKernelRelease;
1952- pDdiTable->pfnSetArgValue = ur_sanitizer_layer::asan::urKernelSetArgValue;
1953- pDdiTable->pfnSetArgMemObj = ur_sanitizer_layer::asan::urKernelSetArgMemObj;
1954- pDdiTable->pfnSetArgLocal = ur_sanitizer_layer::asan::urKernelSetArgLocal;
1955- pDdiTable->pfnSetArgPointer = ur_sanitizer_layer::asan::urKernelSetArgPointer;
19561733 pDdiTable->pfnSetExecInfo = ur_sanitizer_layer::asan::urKernelSetExecInfo;
19571734
19581735 return result;
@@ -2068,7 +1845,6 @@ __urdlllocal ur_result_t UR_APICALL urGetEnqueueProcAddrTable(
20681845 ur_sanitizer_layer::asan::urEnqueueMemBufferFill;
20691846 pDdiTable->pfnMemBufferMap = ur_sanitizer_layer::asan::urEnqueueMemBufferMap;
20701847 pDdiTable->pfnMemUnmap = ur_sanitizer_layer::asan::urEnqueueMemUnmap;
2071- pDdiTable->pfnKernelLaunch = ur_sanitizer_layer::asan::urEnqueueKernelLaunch;
20721848
20731849 return result;
20741850}
0 commit comments