@@ -39,6 +39,26 @@ const cl_mutable_dispatch_fields_khr g_MutableDispatchCaps =
3939 CL_MUTABLE_DISPATCH_ARGUMENTS_KHR |
4040 CL_MUTABLE_DISPATCH_EXEC_INFO_KHR;
4141
42+ static cl_int enqueueProfilingKernel (
43+ cl_command_queue queue,
44+ cl_kernel kernel,
45+ cl_uint num_events_in_wait_list,
46+ const cl_event* event_wait_list,
47+ cl_event* event )
48+ {
49+ const size_t one = 1 ;
50+ return g_pNextDispatch->clEnqueueNDRangeKernel (
51+ queue,
52+ kernel,
53+ 1 ,
54+ nullptr ,
55+ &one,
56+ nullptr ,
57+ num_events_in_wait_list,
58+ event_wait_list,
59+ event );
60+ }
61+
4262typedef struct _cl_mutable_command_khr
4363{
4464 static bool isValid ( cl_mutable_command_khr command )
@@ -1254,6 +1274,11 @@ typedef struct _cl_command_buffer_khr
12541274 {
12551275 g_pNextDispatch->clReleaseCommandQueue (queue);
12561276 }
1277+
1278+ for ( auto kernel : ProfilingKernels )
1279+ {
1280+ g_pNextDispatch->clReleaseKernel (kernel);
1281+ }
12571282 }
12581283
12591284 static bool isValid ( cl_command_buffer_khr cmdbuf )
@@ -1297,20 +1322,17 @@ typedef struct _cl_command_buffer_khr
12971322
12981323 cl_command_queue getQueue () const
12991324 {
1300- if ( Queues.size () > 0 )
1301- {
1302- return Queues[0 ];
1303- }
1304- return nullptr ;
1325+ return Queues.empty () ? nullptr : Queues[0 ];
13051326 }
13061327
13071328 cl_command_queue getTestQueue () const
13081329 {
1309- if ( TestQueues.size () > 0 )
1310- {
1311- return TestQueues[0 ];
1312- }
1313- return nullptr ;
1330+ return TestQueues.empty () ? nullptr : TestQueues[0 ];
1331+ }
1332+
1333+ cl_kernel getProfilingKernel () const
1334+ {
1335+ return ProfilingKernels.empty () ? nullptr : ProfilingKernels[0 ];
13141336 }
13151337
13161338 cl_mutable_dispatch_asserts_khr getMutableDispatchAsserts () const
@@ -1671,6 +1693,7 @@ typedef struct _cl_command_buffer_khr
16711693 std::vector<bool > IsInOrder;
16721694 std::vector<cl_command_queue> TestQueues;
16731695 std::vector<cl_event> BlockingEvents;
1696+ std::vector<cl_kernel> ProfilingKernels;
16741697
16751698 std::vector<std::unique_ptr<Command>> Commands;
16761699 std::atomic<uint32_t > NextSyncPoint;
@@ -1747,6 +1770,52 @@ typedef struct _cl_command_buffer_khr
17471770 }
17481771 }
17491772
1773+ void setupProfilingKernel (cl_command_queue queue)
1774+ {
1775+ if ( g_KernelForProfiling )
1776+ {
1777+ cl_context context = nullptr ;
1778+ g_pNextDispatch->clGetCommandQueueInfo (
1779+ queue,
1780+ CL_QUEUE_CONTEXT,
1781+ sizeof (context),
1782+ &context,
1783+ nullptr );
1784+
1785+ cl_device_id device = nullptr ;
1786+ g_pNextDispatch->clGetCommandQueueInfo (
1787+ queue,
1788+ CL_QUEUE_DEVICE,
1789+ sizeof (device),
1790+ &device,
1791+ nullptr );
1792+
1793+ const char * kernelString = " kernel void Empty() {}" ;
1794+ cl_program program = g_pNextDispatch->clCreateProgramWithSource (
1795+ context,
1796+ 1 ,
1797+ &kernelString,
1798+ nullptr ,
1799+ nullptr );
1800+ g_pNextDispatch->clBuildProgram (
1801+ program,
1802+ 1 ,
1803+ &device,
1804+ nullptr ,
1805+ nullptr ,
1806+ nullptr );
1807+
1808+ cl_kernel kernel = g_pNextDispatch->clCreateKernel (
1809+ program,
1810+ " Empty" ,
1811+ nullptr );
1812+ g_pNextDispatch->clReleaseProgram (
1813+ program );
1814+
1815+ ProfilingKernels.push_back (kernel);
1816+ }
1817+ }
1818+
17501819 _cl_command_buffer_khr (
17511820 cl_command_buffer_flags_khr flags,
17521821 cl_mutable_dispatch_asserts_khr mutableDispatchAsserts) :
@@ -1993,7 +2062,16 @@ cl_int CL_API_CALL clEnqueueCommandBufferKHR_EMU(
19932062 queue,
19942063 num_events_in_wait_list,
19952064 event_wait_list,
1996- event ? &startEvent : nullptr );
2065+ event == nullptr || g_KernelForProfiling ? nullptr : &startEvent );
2066+ if ( errorCode == CL_SUCCESS && event && g_KernelForProfiling )
2067+ {
2068+ errorCode = enqueueProfilingKernel (
2069+ queue,
2070+ cmdbuf->getProfilingKernel (),
2071+ 0 ,
2072+ nullptr ,
2073+ &startEvent );
2074+ }
19972075 }
19982076
19992077 if ( errorCode == CL_SUCCESS )
@@ -2007,7 +2085,16 @@ cl_int CL_API_CALL clEnqueueCommandBufferKHR_EMU(
20072085 queue,
20082086 0 ,
20092087 nullptr ,
2010- event );
2088+ g_KernelForProfiling ? nullptr : event );
2089+ if ( errorCode == CL_SUCCESS && g_KernelForProfiling )
2090+ {
2091+ errorCode = enqueueProfilingKernel (
2092+ queue,
2093+ cmdbuf->getProfilingKernel (),
2094+ 0 ,
2095+ nullptr ,
2096+ event );
2097+ }
20112098 }
20122099
20132100 if ( event )
0 commit comments