Skip to content

Commit 27f4395

Browse files
zhaomaosukbenzie
authored andcommitted
Sync thread clocks before/after barrier call (#17900)
Barrier instructions will synchronize work items execution status, we need to sync thread clocks before/after barrier call to avoid false positive reports.
1 parent e4e05b3 commit 27f4395

2 files changed

Lines changed: 73 additions & 1 deletion

File tree

source/loader/layers/sanitizer/tsan/tsan_ddi.cpp

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1159,6 +1159,53 @@ ur_result_t urEnqueueKernelLaunch(
11591159
return UR_RESULT_SUCCESS;
11601160
}
11611161

1162+
ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp(
1163+
/// [in] handle of the queue object
1164+
ur_queue_handle_t hQueue,
1165+
/// [in] handle of the kernel object
1166+
ur_kernel_handle_t hKernel,
1167+
/// [in] number of dimensions, from 1 to 3, to specify the global and
1168+
/// work-group work-items
1169+
uint32_t workDim,
1170+
/// [in] pointer to an array of workDim unsigned values that specify the
1171+
/// offset used to calculate the global ID of a work-item
1172+
const size_t *pGlobalWorkOffset,
1173+
/// [in] pointer to an array of workDim unsigned values that specify the
1174+
/// number of global work-items in workDim that will execute the kernel
1175+
/// function
1176+
const size_t *pGlobalWorkSize,
1177+
/// [in][optional] pointer to an array of workDim unsigned values that
1178+
/// specify the number of local work-items forming a work-group that will
1179+
/// execute the kernel function.
1180+
/// If nullptr, the runtime implementation will choose the work-group size.
1181+
const size_t *pLocalWorkSize,
1182+
/// [in] size of the event wait list
1183+
uint32_t numEventsInWaitList,
1184+
/// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
1185+
/// events that must be complete before the kernel execution.
1186+
/// If nullptr, the numEventsInWaitList must be 0, indicating that no wait
1187+
/// event.
1188+
const ur_event_handle_t *phEventWaitList,
1189+
/// [out][optional][alloc] return an event object that identifies this
1190+
/// particular kernel execution instance. If phEventWaitList and phEvent
1191+
/// are not NULL, phEvent must not refer to an element of the
1192+
/// phEventWaitList array.
1193+
ur_event_handle_t *phEvent) {
1194+
getContext()->logger.debug("==== urEnqueueCooperativeKernelLaunchExp");
1195+
1196+
LaunchInfo LaunchInfo(GetContext(hQueue), GetDevice(hQueue));
1197+
1198+
UR_CALL(getTsanInterceptor()->preLaunchKernel(hKernel, hQueue, LaunchInfo));
1199+
1200+
UR_CALL(getContext()->urDdiTable.EnqueueExp.pfnCooperativeKernelLaunchExp(
1201+
hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
1202+
pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent));
1203+
1204+
UR_CALL(getTsanInterceptor()->postLaunchKernel(hKernel, hQueue, LaunchInfo));
1205+
1206+
return UR_RESULT_SUCCESS;
1207+
}
1208+
11621209
ur_result_t urCheckVersion(ur_api_version_t version) {
11631210
if (UR_MAJOR_VERSION(ur_sanitizer_layer::getContext()->version) !=
11641211
UR_MAJOR_VERSION(version) ||
@@ -1337,6 +1384,25 @@ __urdlllocal ur_result_t UR_APICALL urGetEnqueueProcAddrTable(
13371384
return UR_RESULT_SUCCESS;
13381385
}
13391386

1387+
///////////////////////////////////////////////////////////////////////////////
1388+
/// @brief Exported function for filling application's EnqueueExp table
1389+
/// with current process' addresses
1390+
///
1391+
/// @returns
1392+
/// - ::UR_RESULT_SUCCESS
1393+
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
1394+
__urdlllocal ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable(
1395+
/// [in,out] pointer to table of DDI function pointers
1396+
ur_enqueue_exp_dditable_t *pDdiTable) {
1397+
if (nullptr == pDdiTable) {
1398+
return UR_RESULT_ERROR_INVALID_NULL_POINTER;
1399+
}
1400+
1401+
pDdiTable->pfnCooperativeKernelLaunchExp =
1402+
ur_sanitizer_layer::tsan::urEnqueueCooperativeKernelLaunchExp;
1403+
return UR_RESULT_SUCCESS;
1404+
}
1405+
13401406
} // namespace tsan
13411407

13421408
ur_result_t initTsanDDITable(ur_dditable_t *dditable) {
@@ -1381,6 +1447,11 @@ ur_result_t initTsanDDITable(ur_dditable_t *dditable) {
13811447
ur_sanitizer_layer::tsan::urGetEnqueueProcAddrTable(&dditable->Enqueue);
13821448
}
13831449

1450+
if (UR_RESULT_SUCCESS == result) {
1451+
result = ur_sanitizer_layer::tsan::urGetEnqueueExpProcAddrTable(
1452+
&dditable->EnqueueExp);
1453+
}
1454+
13841455
if (result != UR_RESULT_SUCCESS) {
13851456
getContext()->logger.error("Initialize TSAN DDI table failed: {}", result);
13861457
}

source/loader/layers/sanitizer/tsan/tsan_libdevice.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,8 @@ struct TsanRuntimeData {
8181

8282
uintptr_t GlobalShadowOffsetEnd = 0;
8383

84-
VectorClock Clock[kThreadSlotCount];
84+
// The last one is to record global state
85+
VectorClock Clock[kThreadSlotCount + 1];
8586

8687
DeviceType DeviceTy = DeviceType::UNKNOWN;
8788

0 commit comments

Comments
 (0)