diff --git a/sycl/doc/design/SYCLInstrumentationUsingXPTI.md b/sycl/doc/design/SYCLInstrumentationUsingXPTI.md
index d0c7c3e59869a..5079972359c7a 100644
--- a/sycl/doc/design/SYCLInstrumentationUsingXPTI.md
+++ b/sycl/doc/design/SYCLInstrumentationUsingXPTI.md
@@ -244,6 +244,18 @@ All trace point types in bold provide semantic information about the graph, node
The `"sycl.debug"` stream emits the same notifications as the `"sycl"` stream, but with additional metadata. If toolchains want to keep the overhead low then subscribing to `"sycl"` stream is the right option, if toolchains want to get more data and keeping overheads low is not important then they should subscribe to `"sycl.debug"`.
If a tool subscribes to both `"sycl"` and `"sycl.debug"`, only notifications from `"sycl.debug"` will be delivered to avoid duplication.
+
+### Stream Detail Level Control
+
+The SYCL runtime implements XPTI stream detail level control for the `"sycl"` stream (see [XPTI Stream Detail Level Control](https://github.com/intel/llvm/tree/sycl/xptifw/doc/XPTI_Framework.md#stream-detail-level-control) for the XPTI framework feature description). This allows subscribers to control the amount of optional metadata emitted based on their needs, providing fine-grained control over tracing overhead.
+
+The SYCL runtime respects the effective detail level when emitting metadata and uses the following strategy:
+- Metadata is emitted at VERBOSE level if either:
+ - A subscriber requests VERBOSE level on the `"sycl"` stream, OR
+ - A subscriber is registered for the `"sycl.debug"` stream (legacy behavior)
+- Otherwise, metadata is emitted according to the requested detail level (BASIC or NORMAL)
+
+See the metadata table below for which fields are emitted at each detail level. Note that stream detail level control is only implemented for the `"sycl"` stream; the `"sycl.debug"` stream always emits all metadata (equivalent to VERBOSE level).
| Trace Point Type | Parameter Description | Metadata |
| :----------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| **`graph_create`** |
**trace_type**: `xpti::trace_point_type_t::graph_create` that marks the creation of an asynchronous graph. **parent**: `nullptr` **event**: The global asynchronous graph object ID. All other graph related events such as node and edge creation will always this ID as the parent ID. **instance**: Unique ID related to the event, but not a correlation ID as there are other events to correlate to. **user_data**: `nullptr` SYCL runtime will always have one instance of a graph object with many disjoint subgraphs that get created during the execution of an application.
| None |
@@ -262,28 +274,28 @@ If a tool subscribes to both `"sycl"` and `"sycl.debug"`, only notifications fro
### Metadata description
-| Metadata | Type | Description |
-| :--------------------: | :-------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `access_mode` | `int` | Value of `sycl::access::mode` enum |
-| `access_range_start` | `size_t` | Start of accessor range |
-| `access_range_end` | `size_t` | End of accessor range |
-| `allocation_type` | C-style string | Allocation type |
-| `copy_from` | `size_t` | ID of source device |
-| `copy_to` | `size_t` | ID of target device |
-| `event` | `size_t` | Unique identifier of event |
-| `from_source` | `bool` | `true` if kernel comes from user source |
-| `kernel_name` | C-style string | Kernel name |
-| `memory_object` | `size_t` | Unique identifier of memory object |
-| `offset` | `size_t` | Accessor offset size in bytes |
-| `sycl_device` | `size_t` | Unique identifier of SYCL device |
-| `sycl_device_type` | C-style string | `CPU`, `GPU`, `ACC`, or `HOST` |
-| `sycl_device_name` | C-style string | Result of `sycl::device::get_info()` |
-| `sym_function_name` | C-style string | Function name |
-| `sym_source_file_name` | C-style string | Source file name |
-| `sym_line_no` | `int32_t` | File line number |
-| `sym_column_no` | `int32_t` | File column number |
-| `enqueue_kernel_data` | `xpti::offload_kernel_arg_data_t` | Includes kernel execution parameters (global size, local size, offset) and number of kernel arguments |
-| `argN` | `xpti::offload_kernel_arg_data_t` | Description for the Nth kernel argument. It includes argument kind (sycl::detail::kernel_param_kind_t), pointer to the value, size and index in the argument list. |
+| Metadata | Type | Detail Level | Description |
+| :--------------------: | :-------------------------------: | :----------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `kernel_name` | C-style string | BASIC | Kernel name |
+| `memory_object` | `size_t` | BASIC | Unique identifier of memory object |
+| `offset` | `size_t` | NORMAL | Accessor offset size in bytes |
+| `access_mode` | `int` | NORMAL | Value of `sycl::access::mode` enum |
+| `access_range_start` | `size_t` | NORMAL | Start of accessor range |
+| `access_range_end` | `size_t` | NORMAL | End of accessor range |
+| `allocation_type` | C-style string | NORMAL | Allocation type |
+| `copy_from` | `size_t` | NORMAL | ID of source device |
+| `copy_to` | `size_t` | NORMAL | ID of target device |
+| `event` | `size_t` | NORMAL | Unique identifier of event |
+| `sycl_device` | `size_t` | NORMAL | Unique identifier of SYCL device |
+| `sycl_device_type` | C-style string | NORMAL | `CPU`, `GPU`, `ACC`, or `HOST` |
+| `sycl_device_name` | C-style string | NORMAL | Result of `sycl::device::get_info()` |
+| `from_source` | `bool` | VERBOSE | `true` if kernel comes from user source |
+| `sym_function_name` | C-style string | VERBOSE | Function name |
+| `sym_source_file_name` | C-style string | VERBOSE | Source file name |
+| `sym_line_no` | `int32_t` | VERBOSE | File line number |
+| `sym_column_no` | `int32_t` | VERBOSE | File column number |
+| `enqueue_kernel_data` | `xpti::offload_kernel_arg_data_t` | VERBOSE | Includes kernel execution parameters (global size, local size, offset) and number of kernel arguments |
+| `argN` | `xpti::offload_kernel_arg_data_t` | VERBOSE | Description for the Nth kernel argument. It includes argument kind (sycl::detail::kernel_param_kind_t), pointer to the value, size and index in the argument list. |
## Buffer management stream `"sycl.experimental.buffer"` Notification Signatures
diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp
index 1f57038c51cf7..3d3c31cda6d9a 100644
--- a/sycl/source/detail/queue_impl.cpp
+++ b/sycl/source/detail/queue_impl.cpp
@@ -135,12 +135,17 @@ EventImplPtr queue_impl::memset(void *Ptr, int Value, size_t Count,
// This information is necessary for memset, so we will not guard it by debug
// stream check.
TP.addMetadata([&](auto TEvent) {
- xpti::addMetadata(TEvent, "sycl_device",
- reinterpret_cast(MDevice.getHandleRef()));
- xpti::addMetadata(TEvent, "memory_ptr", reinterpret_cast(Ptr));
- xpti::addMetadata(TEvent, "value_set", Value);
xpti::addMetadata(TEvent, "memory_size", Count);
- xpti::addMetadata(TEvent, "queue_id", MQueueID);
+ if (detail::GSYCLStreamDetailLevel >=
+ xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL ||
+ isDebugStream(detail::getActiveXPTIStreamID())) {
+ xpti::addMetadata(TEvent, "sycl_device",
+ reinterpret_cast(MDevice.getHandleRef()));
+ xpti::addMetadata(TEvent, "memory_ptr", reinterpret_cast(Ptr));
+ xpti::addMetadata(TEvent, "value_set", Value);
+
+ xpti::addMetadata(TEvent, "queue_id", MQueueID);
+ }
});
// Before we notifiy the subscribers, we broadcast the 'queue_id', which was a
@@ -189,13 +194,18 @@ EventImplPtr queue_impl::memcpy(void *Dest, const void *Src, size_t Count,
const char *UserData = "memory_transfer_node::memcpy";
// We will include this metadata information as it is required for memcpy.
TP.addMetadata([&](auto TEvent) {
- xpti::addMetadata(TEvent, "sycl_device",
- reinterpret_cast(MDevice.getHandleRef()));
- xpti::addMetadata(TEvent, "src_memory_ptr", reinterpret_cast(Src));
- xpti::addMetadata(TEvent, "dest_memory_ptr",
- reinterpret_cast(Dest));
xpti::addMetadata(TEvent, "memory_size", Count);
- xpti::addMetadata(TEvent, "queue_id", MQueueID);
+ if (detail::GSYCLStreamDetailLevel >=
+ xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL ||
+ isDebugStream(detail::getActiveXPTIStreamID())) {
+ xpti::addMetadata(TEvent, "sycl_device",
+ reinterpret_cast(MDevice.getHandleRef()));
+ xpti::addMetadata(TEvent, "src_memory_ptr",
+ reinterpret_cast(Src));
+ xpti::addMetadata(TEvent, "dest_memory_ptr",
+ reinterpret_cast(Dest));
+ xpti::addMetadata(TEvent, "queue_id", MQueueID);
+ }
});
// Before we notify the subscribers, we stash the 'queue_id', which was a
// metadata entry to TLS for use by callback handlers
@@ -997,12 +1007,16 @@ void *queue_impl::instrumentationProlog(const detail::code_location &CodeLoc,
IId = xptiGetUniqueId();
auto WaitEvent = Event->event_ref();
- // We will allow the device type to be set
- xpti::addMetadata(WaitEvent, "sycl_device_type", queueDeviceToString(this));
- // We limit the amount of metadata that is added to the regular stream.
- // Only "sycl.debug" stream will have the full information. This improves the
- // performance when this data is not required by the tool or the collector.
- if (isDebugStream(StreamID)) {
+ if (detail::GSYCLStreamDetailLevel >=
+ xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL ||
+ isDebugStream(StreamID)) {
+ xpti::addMetadata(WaitEvent, "sycl_device_type", queueDeviceToString(this));
+ }
+ // Full metadata is added only at VERBOSE level or if subscribing to
+ // sycl.debug stream.
+ if (detail::GSYCLStreamDetailLevel >=
+ xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_VERBOSE ||
+ isDebugStream(StreamID)) {
if (HasSourceInfo) {
xpti::addMetadata(WaitEvent, "sym_function_name", CodeLoc.functionName());
xpti::addMetadata(WaitEvent, "sym_source_file_name", CodeLoc.fileName());
@@ -1130,18 +1144,20 @@ void queue_impl::constructorNotification() {
xpti_td *TEvent = Event->event_ref();
// Cache the trace event, stream id and instance IDs for the destructor.
MTraceEvent = (void *)TEvent;
- // We will allow the queue metadata to be set as this is performed
- // infrequently.
- xpti::addMetadata(TEvent, "sycl_context",
- reinterpret_cast(MContext->getHandleRef()));
- xpti::addMetadata(TEvent, "sycl_device_name",
- MDevice.get_info());
- xpti::addMetadata(TEvent, "sycl_device",
- reinterpret_cast(MDevice.getHandleRef()));
- xpti::addMetadata(TEvent, "is_inorder", MIsInorder);
- xpti::addMetadata(TEvent, "queue_id", MQueueID);
- xpti::addMetadata(TEvent, "queue_handle",
- reinterpret_cast(getHandleRef()));
+ if (detail::GSYCLStreamDetailLevel >=
+ xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL ||
+ isDebugStream(detail::getActiveXPTIStreamID())) {
+ xpti::addMetadata(TEvent, "sycl_context",
+ reinterpret_cast(MContext->getHandleRef()));
+ xpti::addMetadata(TEvent, "sycl_device_name",
+ MDevice.get_info());
+ xpti::addMetadata(TEvent, "sycl_device",
+ reinterpret_cast(MDevice.getHandleRef()));
+ xpti::addMetadata(TEvent, "is_inorder", MIsInorder);
+ xpti::addMetadata(TEvent, "queue_id", MQueueID);
+ xpti::addMetadata(TEvent, "queue_handle",
+ reinterpret_cast(getHandleRef()));
+ }
// Also publish to TLS before notification.
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID);
xptiNotifySubscribers(detail::getActiveXPTIStreamID(),
diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp
index d0ca0667b2139..447ff0225eb03 100644
--- a/sycl/source/detail/scheduler/commands.cpp
+++ b/sycl/source/detail/scheduler/commands.cpp
@@ -91,7 +91,13 @@ static size_t deviceToID(const device &Device) {
return reinterpret_cast(getSyclObjImpl(Device)->getHandleRef());
}
-static void addDeviceMetadata(xpti_td *TraceEvent, queue_impl *Queue) {
+static void addDeviceMetadata(xpti_td *TraceEvent, queue_impl *Queue,
+ xpti::stream_id_t StreamID) {
+ if (detail::GSYCLStreamDetailLevel <
+ xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL &&
+ !detail::isDebugStream(StreamID))
+ return;
+
xpti::addMetadata(TraceEvent, "sycl_device_type", queueDeviceToString(Queue));
if (Queue) {
xpti::addMetadata(TraceEvent, "sycl_device",
@@ -102,8 +108,9 @@ static void addDeviceMetadata(xpti_td *TraceEvent, queue_impl *Queue) {
}
}
static void addDeviceMetadata(xpti_td *TraceEvent,
- const std::shared_ptr &Queue) {
- addDeviceMetadata(TraceEvent, Queue.get());
+ const std::shared_ptr &Queue,
+ xpti::stream_id_t StreamID) {
+ addDeviceMetadata(TraceEvent, Queue.get(), StreamID);
}
static unsigned long long getQueueID(queue_impl *Queue) {
@@ -594,14 +601,22 @@ void Command::emitEdgeEventForCommandDependence(
xpti_td *TgtEvent = static_cast(MTraceEvent);
EdgeEvent->source_id = SrcEvent->unique_id;
EdgeEvent->target_id = TgtEvent->unique_id;
- // We allow this metadata to be set as it describes the edge.
if (IsCommand) {
- xpti::addMetadata(EdgeEvent, "access_mode",
- static_cast(AccMode.value()));
xpti::addMetadata(EdgeEvent, "memory_object",
reinterpret_cast(ObjAddr));
+ if (detail::GSYCLStreamDetailLevel >=
+ xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL ||
+ detail::isDebugStream(MStreamID)) {
+ xpti::addMetadata(EdgeEvent, "access_mode",
+ static_cast(AccMode.value()));
+ }
} else {
- xpti::addMetadata(EdgeEvent, "event", reinterpret_cast(ObjAddr));
+ if (detail::GSYCLStreamDetailLevel >=
+ xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL ||
+ detail::isDebugStream(MStreamID)) {
+ xpti::addMetadata(EdgeEvent, "event",
+ reinterpret_cast(ObjAddr));
+ }
}
xptiNotifySubscribers(MStreamID, NotificationTraceType,
detail::GSYCLGraphEvent, EdgeEvent, EdgeInstanceNo,
@@ -671,10 +686,12 @@ void Command::emitEdgeEventForEventDependence(Command *Cmd,
xpti_td *TgtEvent = static_cast(MTraceEvent);
EdgeEvent->source_id = NodeEvent->unique_id;
EdgeEvent->target_id = TgtEvent->unique_id;
- // We allow this metadata to be set as an edge without the event address
- // will be less useful.
- xpti::addMetadata(EdgeEvent, "event",
- reinterpret_cast(UrEventAddr));
+ if (detail::GSYCLStreamDetailLevel >=
+ xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL ||
+ detail::isDebugStream(MStreamID)) {
+ xpti::addMetadata(EdgeEvent, "event",
+ reinterpret_cast(UrEventAddr));
+ }
xptiNotifySubscribers(MStreamID, xpti::trace_edge_create,
detail::GSYCLGraphEvent, EdgeEvent, EdgeInstanceNo,
nullptr);
@@ -1030,7 +1047,7 @@ void AllocaCommandBase::emitInstrumentationData() {
// internal infrastructure to guarantee collision free universal IDs.
if (MTraceEvent) {
xpti_td *TE = static_cast(MTraceEvent);
- addDeviceMetadata(TE, MQueue);
+ addDeviceMetadata(TE, MQueue, MStreamID);
// Memory-object is used frequently, so it is always added.
xpti::addMetadata(TE, "memory_object", reinterpret_cast(MAddress));
// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
@@ -1149,10 +1166,15 @@ void AllocaSubBufCommand::emitInstrumentationData() {
return;
xpti_td *TE = static_cast(MTraceEvent);
- xpti::addMetadata(TE, "offset", this->MRequirement.MOffsetInBytes);
- xpti::addMetadata(TE, "access_range_start",
- this->MRequirement.MAccessRange[0]);
- xpti::addMetadata(TE, "access_range_end", this->MRequirement.MAccessRange[1]);
+ if (detail::GSYCLStreamDetailLevel >=
+ xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL ||
+ detail::isDebugStream(MStreamID)) {
+ xpti::addMetadata(TE, "offset", this->MRequirement.MOffsetInBytes);
+ xpti::addMetadata(TE, "access_range_start",
+ this->MRequirement.MAccessRange[0]);
+ xpti::addMetadata(TE, "access_range_end",
+ this->MRequirement.MAccessRange[1]);
+ }
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue));
makeTraceEventEpilog();
#endif
@@ -1226,9 +1248,13 @@ void ReleaseCommand::emitInstrumentationData() {
makeTraceEventProlog(MAddress);
xpti_td *TE = static_cast(MTraceEvent);
- addDeviceMetadata(TE, MQueue);
- xpti::addMetadata(TE, "allocation_type",
- commandToName(MAllocaCmd->getType()));
+ addDeviceMetadata(TE, MQueue, MStreamID);
+ if (detail::GSYCLStreamDetailLevel >=
+ xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL ||
+ detail::isDebugStream(MStreamID)) {
+ xpti::addMetadata(TE, "allocation_type",
+ commandToName(MAllocaCmd->getType()));
+ }
// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
// as this data is mutable and the metadata is supposed to be invariant
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue));
@@ -1353,7 +1379,7 @@ void MapMemObject::emitInstrumentationData() {
makeTraceEventProlog(MAddress);
xpti_td *TE = static_cast(MTraceEvent);
- addDeviceMetadata(TE, MQueue);
+ addDeviceMetadata(TE, MQueue, MStreamID);
xpti::addMetadata(TE, "memory_object", reinterpret_cast(MAddress));
// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
// as this data is mutable and the metadata is supposed to be invariant
@@ -1416,7 +1442,7 @@ void UnMapMemObject::emitInstrumentationData() {
makeTraceEventProlog(MAddress);
xpti_td *TE = static_cast(MTraceEvent);
- addDeviceMetadata(TE, MQueue);
+ addDeviceMetadata(TE, MQueue, MStreamID);
xpti::addMetadata(TE, "memory_object", reinterpret_cast(MAddress));
// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
// as this data is mutable and the metadata is supposed to be invariant
@@ -1511,13 +1537,17 @@ void MemCpyCommand::emitInstrumentationData() {
makeTraceEventProlog(MAddress);
xpti_td *CmdTraceEvent = static_cast(MTraceEvent);
- addDeviceMetadata(CmdTraceEvent, MQueue);
+ addDeviceMetadata(CmdTraceEvent, MQueue, MStreamID);
xpti::addMetadata(CmdTraceEvent, "memory_object",
reinterpret_cast(MAddress));
- xpti::addMetadata(CmdTraceEvent, "copy_from",
- MSrcQueue ? deviceToID(MSrcQueue->get_device()) : 0);
- xpti::addMetadata(CmdTraceEvent, "copy_to",
- MQueue ? deviceToID(MQueue->get_device()) : 0);
+ if (detail::GSYCLStreamDetailLevel >=
+ xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL ||
+ detail::isDebugStream(MStreamID)) {
+ xpti::addMetadata(CmdTraceEvent, "copy_from",
+ MSrcQueue ? deviceToID(MSrcQueue->get_device()) : 0);
+ xpti::addMetadata(CmdTraceEvent, "copy_to",
+ MQueue ? deviceToID(MQueue->get_device()) : 0);
+ }
// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
// as this data is mutable and the metadata is supposed to be invariant
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue));
@@ -1684,13 +1714,17 @@ void MemCpyCommandHost::emitInstrumentationData() {
makeTraceEventProlog(MAddress);
xpti_td *CmdTraceEvent = static_cast(MTraceEvent);
- addDeviceMetadata(CmdTraceEvent, MQueue);
+ addDeviceMetadata(CmdTraceEvent, MQueue, MStreamID);
xpti::addMetadata(CmdTraceEvent, "memory_object",
reinterpret_cast(MAddress));
- xpti::addMetadata(CmdTraceEvent, "copy_from",
- MSrcQueue ? deviceToID(MSrcQueue->get_device()) : 0);
- xpti::addMetadata(CmdTraceEvent, "copy_to",
- MQueue ? deviceToID(MQueue->get_device()) : 0);
+ if (detail::GSYCLStreamDetailLevel >=
+ xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL ||
+ detail::isDebugStream(MStreamID)) {
+ xpti::addMetadata(CmdTraceEvent, "copy_from",
+ MSrcQueue ? deviceToID(MSrcQueue->get_device()) : 0);
+ xpti::addMetadata(CmdTraceEvent, "copy_to",
+ MQueue ? deviceToID(MQueue->get_device()) : 0);
+ }
// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
// as this data is mutable and the metadata is supposed to be invariant
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue));
@@ -1780,7 +1814,7 @@ void EmptyCommand::emitInstrumentationData() {
makeTraceEventProlog(MAddress);
xpti_td *CmdTraceEvent = static_cast(MTraceEvent);
- addDeviceMetadata(CmdTraceEvent, MQueue);
+ addDeviceMetadata(CmdTraceEvent, MQueue, MStreamID);
xpti::addMetadata(CmdTraceEvent, "memory_object",
reinterpret_cast(MAddress));
// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
@@ -1845,7 +1879,7 @@ void UpdateHostRequirementCommand::emitInstrumentationData() {
makeTraceEventProlog(MAddress);
xpti_td *CmdTraceEvent = static_cast(MTraceEvent);
- addDeviceMetadata(CmdTraceEvent, MQueue);
+ addDeviceMetadata(CmdTraceEvent, MQueue, MStreamID);
xpti::addMetadata(CmdTraceEvent, "memory_object",
reinterpret_cast(MAddress));
// Since we do NOT add queue_id value to metadata, we are stashing it to TLS
@@ -2041,13 +2075,16 @@ void instrumentationFillCommonData(
OutInstanceID = CGKernelInstanceNo;
OutTraceEvent = CmdTraceEvent;
- addDeviceMetadata(CmdTraceEvent, Queue);
+ addDeviceMetadata(CmdTraceEvent, Queue, StreamID);
if (!KernelName.empty()) {
xpti::addMetadata(CmdTraceEvent, "kernel_name", KernelName);
}
- // We limit the metadata to only include the kernel name and device
- // information by default.
- if (detail::isDebugStream(StreamID)) {
+
+ // Debug metadata is added at VERBOSE level or if subscribing to sycl.debug
+ // stream.
+ if (detail::GSYCLStreamDetailLevel >=
+ xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_VERBOSE ||
+ detail::isDebugStream(StreamID)) {
if (FromSource.has_value()) {
xpti::addMetadata(CmdTraceEvent, "from_source", FromSource.value());
}
@@ -2108,9 +2145,11 @@ std::pair emitKernelInstrumentationData(
if (Queue)
xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY,
getQueueID(Queue));
- // Add the additional metadata only if the debug information is subscribed
- // to; in this case, it is the kernel and its parameters.
- if (detail::isDebugStream(StreamID)) {
+ // Add the additional metadata only if VERBOSE level is enabled or
+ // subscribing to sycl.debug stream.
+ if (detail::GSYCLStreamDetailLevel >=
+ xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_VERBOSE ||
+ detail::isDebugStream(StreamID)) {
instrumentationAddExtraKernelMetadata(
CmdTraceEvent, NDRDesc, KernelBundleImplPtr, DeviceKernelInfo,
SyclKernel, Queue, CGArgs);
@@ -2164,7 +2203,9 @@ void ExecCGCommand::emitInstrumentationData() {
getQueueID(MQueue));
MTraceEvent = static_cast(CmdTraceEvent);
if (MCommandGroup->getType() == detail::CGType::Kernel) {
- if (detail::isDebugStream(MStreamID)) {
+ if (detail::GSYCLStreamDetailLevel >=
+ xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_VERBOSE ||
+ detail::isDebugStream(MStreamID)) {
auto KernelCG =
reinterpret_cast(MCommandGroup.get());
instrumentationAddExtraKernelMetadata(
diff --git a/sycl/source/detail/xpti_registry.cpp b/sycl/source/detail/xpti_registry.cpp
index 8abd2ea6e4b20..fa34c2480ae1c 100644
--- a/sycl/source/detail/xpti_registry.cpp
+++ b/sycl/source/detail/xpti_registry.cpp
@@ -29,6 +29,10 @@ uint8_t GSYCLDebugStreamID = xpti::invalid_id;
uint8_t GUrCallStreamID = xpti::invalid_id;
uint8_t GUrApiStreamID = xpti::invalid_id;
+// Effective stream detail level for the "sycl" stream
+xpti::stream_detail_level_t GSYCLStreamDetailLevel =
+ xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL;
+
xpti::trace_event_data_t *GMemAllocEvent = nullptr;
xpti::trace_event_data_t *GSYCLGraphEvent = nullptr;
xpti::trace_event_data_t *GSYCLCallEvent = nullptr;
diff --git a/sycl/source/detail/xpti_registry.hpp b/sycl/source/detail/xpti_registry.hpp
index 9598eefab7b27..30380fc730c2a 100644
--- a/sycl/source/detail/xpti_registry.hpp
+++ b/sycl/source/detail/xpti_registry.hpp
@@ -61,6 +61,8 @@ extern uint8_t GSYCLStreamID;
extern uint8_t GSYCLDebugStreamID;
extern uint8_t GUrApiStreamID;
+extern xpti::stream_detail_level_t GSYCLStreamDetailLevel;
+
extern xpti::trace_event_data_t *GMemAllocEvent;
extern xpti::trace_event_data_t *GSYCLGraphEvent;
extern xpti::trace_event_data_t *GSYCLCallEvent;
@@ -101,6 +103,10 @@ class XPTIRegistry {
// SYCL events
detail::GSYCLStreamID =
this->initializeStream(SYCL_STREAM_NAME, GMajVer, GMinVer, GVerStr);
+ // Cache the effective stream detail level - it's immutable after stream
+ // initialization, so we query it once here
+ detail::GSYCLStreamDetailLevel =
+ xptiGetEffectiveStreamDetailLevel(detail::GSYCLStreamID);
// Register the SYCL Debug event stream; tools subscribing to this stream
// will receive additional metadata in the regular "sycl" stream.
detail::GSYCLDebugStreamID = this->initializeStream(