From 0b837eaf9cf0cf2e671a9df7abc8a38c133fa4ec Mon Sep 17 00:00:00 2001 From: "Gainullin, Artur" Date: Wed, 22 Apr 2026 14:07:27 -0700 Subject: [PATCH 1/4] [XPTI] Add per-subscriber stream detail level control Introduces a mechanism for subscribers to request different levels of optional data emission from producers on a per-stream basis, enabling fine-grained control over tracing overhead: - Added stream_detail_level_t enum (NONE, BASIC, NORMAL, VERBOSE) - Added optional xptiQuerySubscriberStreamDetailLevel subscriber callback - Added xptiGetEffectiveStreamDetailLevel producer API - Aggregation rule: effective level = max across all subscribers Producers can use threshold checks to conditionally emit expensive optional data based on the effective detail level. Also add XPTI_HAS_STREAM_DETAIL_LEVEL macro to enable compile-time feature detection for stream detail level functionality. This approach allows to stay fully backward compatible: subscribers that define new xptiQuerySubscriberStreamDetailLevel callback can work with both old frameworks (that will ignore the callback) and new frameworks (that will recognize the callback). Assisted-By: Claude Sonnet 4.5 --- xpti/include/xpti/xpti_data_types.h | 39 +++++ xpti/include/xpti/xpti_trace_framework.h | 9 ++ xpti/src/xpti_proxy.cpp | 17 ++- xptifw/doc/XPTI_Framework.md | 134 +++++++++++++++++- .../basic_collector/basic_collector.cpp | 14 ++ xptifw/src/xpti_trace_framework.cpp | 103 +++++++++++++- 6 files changed, 312 insertions(+), 4 deletions(-) diff --git a/xpti/include/xpti/xpti_data_types.h b/xpti/include/xpti/xpti_data_types.h index 0f9e4f2811829..9301954be354c 100644 --- a/xpti/include/xpti/xpti_data_types.h +++ b/xpti/include/xpti/xpti_data_types.h @@ -352,6 +352,24 @@ enum class payload_flag_t { using stream_id_t = uint8_t; +/// @enum stream_detail_level_t +/// @brief Defines detail level for optional stream data emission. +/// Values are ordered to allow threshold checks. Effective level is the +/// maximum requested across all subscribers for a stream. +enum class stream_detail_level_t : uint8_t { + XPTI_STREAM_DETAIL_LEVEL_NONE = 0, ///< No optional data + XPTI_STREAM_DETAIL_LEVEL_BASIC = 1, ///< Basic optional data + XPTI_STREAM_DETAIL_LEVEL_NORMAL = 2, ///< Normal detail (default) + XPTI_STREAM_DETAIL_LEVEL_VERBOSE = 3 ///< Maximum detail +}; + +/// @def XPTI_HAS_STREAM_DETAIL_LEVEL +/// @brief Feature detection macro for stream detail level support. +/// Subscribers can use this macro to conditionally compile code that uses +/// stream_detail_level_t, xptiQuerySubscriberStreamDetailLevel, and +/// xptiGetEffectiveStreamDetailLevel. +#define XPTI_HAS_STREAM_DETAIL_LEVEL 1 + // // Helper macros for creating new tracepoint and // event types @@ -1246,6 +1264,16 @@ typedef void (*plugin_init_t)(unsigned int, unsigned int, const char *, const char *); typedef void (*plugin_fini_t)(const char *); +/// @typedef query_subscriber_stream_detail_level_t +/// @brief Optional callback for querying subscriber's requested detail level +/// per stream. +/// @param stream_name Stream name being queried. +/// @param level Output: subscriber's desired detail level for the stream. +/// @return true if level was set, false otherwise. +/// @note If not implemented, defaults to NORMAL for all streams. +typedef bool (*query_subscriber_stream_detail_level_t)( + const char *stream_name, xpti::stream_detail_level_t *level); + constexpr uint16_t trace_task_begin = static_cast(xpti::trace_point_type_t::task_begin); constexpr uint16_t trace_task_end = @@ -1445,4 +1473,15 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int major_version, /// subscribed to this stream can now free up all internal data structures and /// memory that has been allocated to manage the stream data. XPTI_CALLBACK_API void xptiTraceFinish(const char *stream_name); + +/// @brief Optional callback for querying subscriber's stream detail level +/// preference. Called by framework during stream initialization to compute +/// effective detail level. +/// @param stream_name Stream name being queried. +/// @param level Output: subscriber's requested detail level. +/// @return true if level was set, false otherwise. +/// @note Optional. If not implemented, defaults to NORMAL. +XPTI_CALLBACK_API bool +xptiQuerySubscriberStreamDetailLevel(const char *stream_name, + xpti::stream_detail_level_t *level); } diff --git a/xpti/include/xpti/xpti_trace_framework.h b/xpti/include/xpti/xpti_trace_framework.h index 99ee1a5fb2f16..e6a7ddc205b79 100644 --- a/xpti/include/xpti/xpti_trace_framework.h +++ b/xpti/include/xpti/xpti_trace_framework.h @@ -1003,6 +1003,13 @@ XPTI_EXPORT_API bool xptiCheckTracepointScopeNotification(); /// @param e The event for which associated data will be removed XPTI_EXPORT_API void xptiReleaseEvent(xpti::trace_event_data_t *e); +/// @brief Gets effective stream detail level (max across all subscribers). +/// @param stream Stream ID to query. +/// @return Effective detail level. Defaults to NORMAL if not set. +/// @note Lock-free; safe to call in hot paths. +XPTI_EXPORT_API xpti::stream_detail_level_t +xptiGetEffectiveStreamDetailLevel(xpti::stream_id_t stream); + typedef xpti::result_t (*xpti_framework_initialize_t)(); typedef xpti::result_t (*xpti_framework_finalize_t)(); typedef xpti::result_t (*xpti_initialize_t)(const char *, uint32_t, uint32_t, @@ -1072,4 +1079,6 @@ typedef xpti_tracepoint_t *(*xpti_create_tracepoint_t)(const char *, const char *, uint32_t, uint32_t, void *); typedef xpti::result_t (*xpti_delete_tracepoint_t)(xpti_tracepoint_t *); +typedef xpti::stream_detail_level_t (*xpti_get_effective_stream_detail_level_t)( + xpti::stream_id_t); } diff --git a/xpti/src/xpti_proxy.cpp b/xpti/src/xpti_proxy.cpp index 6400b6824c1a2..0585d168f19f6 100644 --- a/xpti/src/xpti_proxy.cpp +++ b/xpti/src/xpti_proxy.cpp @@ -62,6 +62,7 @@ enum functions_t : unsigned { XPTI_SET_DEFAULT_EVENT_TYPE, XPTI_GET_DEFAULT_TRACE_TYPE, XPTI_SET_DEFAULT_TRACE_TYPE, + XPTI_GET_EFFECTIVE_STREAM_DETAIL_LEVEL, // All additional functions need to appear before // the XPTI_FW_API_COUNT enum XPTI_FW_API_COUNT ///< This enum must always be the last one in the list @@ -119,7 +120,9 @@ class ProxyLoader { {XPTI_SET_DEFAULT_EVENT_TYPE, "xptiSetDefaultEventType"}, {XPTI_GET_DEFAULT_TRACE_TYPE, "xptiGetDefaultTraceType"}, {XPTI_SET_DEFAULT_TRACE_TYPE, "xptiSetDefaultTraceType"}, - {XPTI_RELEASE_EVENT, "xptiReleaseEvent"}}; + {XPTI_RELEASE_EVENT, "xptiReleaseEvent"}, + {XPTI_GET_EFFECTIVE_STREAM_DETAIL_LEVEL, + "xptiGetEffectiveStreamDetailLevel"}}; public: typedef std::vector dispatch_table_t; @@ -750,3 +753,15 @@ xptiSetDefaultTraceType(xpti::trace_point_type_t trace_type) { } return xpti::result_t::XPTI_RESULT_FAIL; } + +XPTI_EXPORT_API xpti::stream_detail_level_t +xptiGetEffectiveStreamDetailLevel(xpti::stream_id_t stream) { + if (xpti::ProxyLoader::instance().noErrors()) { + auto f = xpti::ProxyLoader::instance().functionByIndex( + XPTI_GET_EFFECTIVE_STREAM_DETAIL_LEVEL); + if (f) { + return (*(xpti_get_effective_stream_detail_level_t)f)(stream); + } + } + return xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL; +} diff --git a/xptifw/doc/XPTI_Framework.md b/xptifw/doc/XPTI_Framework.md index 0bee8d33cf37e..a09412c157f8a 100644 --- a/xptifw/doc/XPTI_Framework.md +++ b/xptifw/doc/XPTI_Framework.md @@ -20,6 +20,13 @@ - [`xptiMakeEvent`](#xptimakeevent) - [Notifying the registered listeners](#notifying-the-registered-listeners) - [`xptiNotifySubscribers`](#xptinotifysubscribers) + - [Stream Detail Level Control](#stream-detail-level-control) + - [Overview](#overview-1) + - [Detail Level Enum](#detail-level-enum) + - [Aggregation Rule](#aggregation-rule) + - [Subscriber Usage](#subscriber-usage) + - [Producer Usage](#producer-usage) + - [API Reference](#api-reference) - [Performance of the Framework](#performance-of-the-framework) - [Modeling and projection](#modeling-and-projection) - [Computing the cost incurred in the framework](#computing-the-cost-incurred-in-the-framework) @@ -119,7 +126,8 @@ functional: (1) `xptiTraceInit`, (2) `xptiTraceFinish` and (3) callback handlers. The `xptiTraceInit` and `xptiTraceFinish` API calls are used by the dispatcher loading the subscriber dynamically to determine if the subscriber is a valid subscriber. If these entry points are not present, then the -subscriber is not loaded. +subscriber is not loaded. Optionally, subscriber may implement +`xptiQuerySubscriberStreamDetailLevel` to control detail level per stream. The `xptiTraceInit` callback is called by the dispatcher when the generator of a new stream of data makes a call to `xptiInitialize` for the new stream. The @@ -173,6 +181,27 @@ allocated to handle the stream. The `xptiTraceFinish` call is made by the dispatcher when the instrumented code is winding down a data stream by calling `xptiFinalize` for the stream. +In addition to the per-stream callbacks, subscribers may optionally implement +a detail level query callback to control the amount of optional data emitted: + +```cpp +XPTI_CALLBACK_API bool xptiQuerySubscriberStreamDetailLevel( + const char *stream_name, xpti::stream_detail_level_t *level) { + if (level) { + if (std::string(stream_name) == "sycl") { + *level = xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_VERBOSE; + } else { + *level = xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL; + } + return true; + } + return false; +} +``` + +Called during stream initialization. Framework uses max level across all subscribers. +Defaults to NORMAL if not implemented. + The implementation of the callbacks is where attention needs to be given to the handshake protocol or specification for a given stream the subscriber wants to attach to and consume the data. The instrumented library may send additional @@ -217,6 +246,7 @@ invariant across all instances of that tracepoint. > **NOTE:** A subscriber **must** implement the `xptiTraceInit` and > `xptiTraceFinish` APIs for the dispatcher to successfully load the subscriber. +> Optionally, implement `xptiQuerySubscriberStreamDetailLevel` to control detail level. > **NOTE:** The specification for a given event stream **must** be consulted > before implementing the callback handlers for various trace types. @@ -699,6 +729,108 @@ void function1() { } ``` +### Stream Detail Level Control + +Subscribers can request different detail levels per stream to control optional data emission. +Effective level is the max across all subscribers. + +**Feature Detection**: Use `#ifdef XPTI_HAS_STREAM_DETAIL_LEVEL` to conditionally compile code +that depends on this feature. This ensures backward compatibility with older XPTI versions. + +**Important**: Stream detail level controls the amount of *optional metadata* emitted for trace +points on a stream. It does not replace or affect `xptiCheckTraceEnabled()`, which remains the +primary mechanism for deciding whether a trace notification is emitted at all. If a subscriber +requests a detail level for a stream but does not subscribe to any trace points on that stream, +no trace points will be emitted - the requested detail level only affects the amount of optional +data attached to trace points that are already being emitted due to active subscriptions. + +#### Detail Level Enum + +The `xpti::stream_detail_level_t` enum defines four ordered levels: + +```cpp +enum class stream_detail_level_t : uint8_t { + XPTI_STREAM_DETAIL_LEVEL_NONE = 0, // No optional data + XPTI_STREAM_DETAIL_LEVEL_BASIC = 1, // Basic optional data + XPTI_STREAM_DETAIL_LEVEL_NORMAL = 2, // Normal detail (default) + XPTI_STREAM_DETAIL_LEVEL_VERBOSE = 3 // Maximum detail +}; +``` + +The values are ordered to support threshold checks in producer code: + +```cpp +auto level = xptiGetEffectiveStreamDetailLevel(stream_id); +if (level >= xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + // Emit normal-level optional data +} +if (level >= xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_VERBOSE) { + // Emit verbose-level optional data +} +``` + +#### Aggregation Rule + +Effective level = max across all subscribers. Defaults to NORMAL if not set. + +#### Subscriber Usage + +Use `XPTI_HAS_STREAM_DETAIL_LEVEL` for backward compatibility: + +```cpp +#ifdef XPTI_HAS_STREAM_DETAIL_LEVEL +XPTI_CALLBACK_API bool xptiQuerySubscriberStreamDetailLevel( + const char *stream_name, xpti::stream_detail_level_t *level) { + if (!level) return false; + if (std::string(stream_name) == "sycl") { + *level = xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_VERBOSE; + } else { + *level = xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL; + } + return true; +} +#endif +``` + +#### Producer Usage + +Producers should query the effective detail level before emitting optional data. +Use `XPTI_HAS_STREAM_DETAIL_LEVEL` to ensure backward compatibility: + +```cpp +void emit_trace_data(xpti::stream_id_t stream_id, const TraceData& data) { + // Always emit essential trace points + xptiNotifySubscribers(stream_id, trace_type, parent, event, instance, &data); + +#ifdef XPTI_HAS_STREAM_DETAIL_LEVEL + // Check if we should emit optional metadata + auto level = xptiGetEffectiveStreamDetailLevel(stream_id); + + if (level >= xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + // Emit normal-level optional metadata + xpti::object_id_t value_id = xptiRegisterObject(&data.optional_info, + sizeof(data.optional_info), + 0); + xptiAddMetadata(event, "optional_info", value_id); + } + + if (level >= xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_VERBOSE) { + // Emit verbose-level optional metadata (potentially expensive) + compute_and_emit_detailed_analysis(event); + } +#endif +} +``` + +#### API Reference + +##### `xptiQuerySubscriberStreamDetailLevel` (Subscriber Callback) +Optional callback queried during stream init. Returns requested detail level per stream. +Defaults to NORMAL if not implemented. + +##### `xptiGetEffectiveStreamDetailLevel` +Returns max detail level across all subscribers for a stream. Lock-free; suitable for hot paths. + ## Performance of the Framework In order to estimate the overheads one could experience by using the framework, diff --git a/xptifw/samples/basic_collector/basic_collector.cpp b/xptifw/samples/basic_collector/basic_collector.cpp index b325a48b3eb96..b2942028f0c0e 100644 --- a/xptifw/samples/basic_collector/basic_collector.cpp +++ b/xptifw/samples/basic_collector/basic_collector.cpp @@ -111,6 +111,20 @@ XPTI_CALLBACK_API void xptiTraceFinish(const char *stream_name) { // We do nothing here } +// Optional: query callback for stream detail level (defaults to NORMAL if +// omitted). Use XPTI_HAS_STREAM_DETAIL_LEVEL for backward compatibility. +#ifdef XPTI_HAS_STREAM_DETAIL_LEVEL +XPTI_CALLBACK_API bool +xptiQuerySubscriberStreamDetailLevel(const char *stream_name, + xpti::stream_detail_level_t *level) { + if (level) { + *level = xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_VERBOSE; + return true; + } + return false; +} +#endif + XPTI_CALLBACK_API void tpCallback(uint16_t TraceType, xpti::trace_event_data_t *Parent, xpti::trace_event_data_t *Event, diff --git a/xptifw/src/xpti_trace_framework.cpp b/xptifw/src/xpti_trace_framework.cpp index 35c3e0c15d84b..de5367bd193f5 100644 --- a/xptifw/src/xpti_trace_framework.cpp +++ b/xptifw/src/xpti_trace_framework.cpp @@ -392,6 +392,8 @@ class Subscribers { xpti::plugin_init_t init = nullptr; /// The finalization entry point xpti::plugin_fini_t fini = nullptr; + /// Optional callback for querying stream detail level (optional) + xpti::query_subscriber_stream_detail_level_t query_detail_level = nullptr; /// The name of the shared object (in UTF8?)) std::string name; /// indicates whether the data structure is valid @@ -399,7 +401,7 @@ class Subscribers { }; // Data structures defined to hold the plugin data that can be looked up by - // plugin name or the handle + // plugin name or library handle using plugin_handle_lut_t = std::map; using plugin_name_lut_t = std::map; @@ -463,13 +465,20 @@ class Subscribers { g_helper.findFunction(Handle, "xptiTraceFinish")); if (InitFunc && FiniFunc) { // We appear to have loaded a valid plugin, so we will insert the - // plugin information into the two maps guarded by a lock + // plugin information into the maps guarded by a lock plugin_data_t Data; Data.valid = true; Data.handle = Handle; Data.name = Path; Data.init = InitFunc; Data.fini = FiniFunc; + + // Look for optional detail level query callback + Data.query_detail_level = + reinterpret_cast( + g_helper.findFunction(Handle, + "xptiQuerySubscriberStreamDetailLevel")); + { std::lock_guard Lock(MMutex); MNameLUT[Path] = Data; @@ -587,6 +596,28 @@ class Subscribers { MNameLUT.clear(); } + /// Query all subscribers for requested detail level, return max. + xpti::stream_detail_level_t + querySubscribersForStreamDetailLevel(const char *stream_name) { + std::lock_guard Lock(MMutex); + + xpti::stream_detail_level_t max_level = + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NONE; + + for (const auto &handle_entry : MHandleLUT) { + const plugin_data_t &plugin = handle_entry.second; + xpti::stream_detail_level_t sub_level = + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL; + if (plugin.query_detail_level) + plugin.query_detail_level(stream_name, &sub_level); + + if (static_cast(sub_level) > static_cast(max_level)) + max_level = sub_level; + } + + return max_level; + } + private: /// Hash map that maps shared object name to the plugin data plugin_name_lut_t MNameLUT; @@ -1979,6 +2010,15 @@ class Framework { MSubscribers.loadFromEnvironmentVariable(); MTraceEnabled = (g_helper.checkTraceEnv() && MSubscribers.hasValidSubscribers()); + + // Initialize all effective stream detail levels to default (NORMAL) + for (size_t i = 0; i < 256; ++i) { + MEffectiveStreamDetailLevels[i].store( + static_cast( + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL), + std::memory_order_relaxed); + } + // We create a default stream "xpti.framework" and save it in // `g_default_stream g_default_stream_id = registerStream(g_default_stream); @@ -2000,6 +2040,13 @@ class Framework { MTracepoints.clear(); MStringTableRef.clear(); MNotifier.clear(); + // Reset all effective levels to default + for (size_t i = 0; i < 256; ++i) { + MEffectiveStreamDetailLevels[i].store( + static_cast( + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL), + std::memory_order_relaxed); + } } /// @brief Enables or disables tracing globally. @@ -2271,8 +2318,15 @@ class Framework { if (!Stream || !VersionString) return xpti::result_t::XPTI_RESULT_INVALIDARG; + // Initialize subscribers for the stream MSubscribers.initializeForStream(Stream, MajorRevision, MinorRevision, VersionString); + + // Query subscribers for their requested detail level and compute effective + // level + xpti::stream_id_t stream_id = registerStream(Stream); + computeEffectiveStreamDetailLevel(stream_id, Stream); + return xpti::result_t::XPTI_RESULT_SUCCESS; } @@ -2474,6 +2528,42 @@ class Framework { bool hasSubscribers() { return MSubscribers.hasValidSubscribers(); } + /// @brief Computes effective detail level by querying all subscribers. + /// + /// Queries all registered subscribers for their requested detail level for + /// the given stream and caches the maximum (most verbose) level requested. + /// This allows lock-free reads via getEffectiveStreamDetailLevel. + /// + /// @param stream The stream ID to compute. + /// @param stream_name The stream name for querying subscribers. + void computeEffectiveStreamDetailLevel(xpti::stream_id_t stream, + const char *stream_name) { + // Query all subscribers for their requested detail level for this stream + xpti::stream_detail_level_t max_level = + MSubscribers.querySubscribersForStreamDetailLevel(stream_name); + + // Update the cached effective level atomically + MEffectiveStreamDetailLevels[stream].store(static_cast(max_level), + std::memory_order_release); + } + + /// @brief Gets the effective stream detail level for a stream. + /// + /// The effective detail level is the maximum requested level across all + /// subscribers for the given stream. If no subscriber has set a level for + /// the stream, the default (NORMAL) is returned. + /// + /// @param stream The stream ID for which the effective detail level is + /// requested. + /// + /// @return The effective detail level for the stream. + xpti::stream_detail_level_t + getEffectiveStreamDetailLevel(xpti::stream_id_t stream) { + uint8_t level = + MEffectiveStreamDetailLevels[stream].load(std::memory_order_acquire); + return static_cast(level); + } + xpti::result_t finalizeStream(const char *Stream) { if (!Stream) return xpti::result_t::XPTI_RESULT_INVALIDARG; @@ -2577,6 +2667,10 @@ class Framework { xpti::Tracepoints MTracepoints; /// Flag indicates whether tracing should be enabled bool MTraceEnabled; + /// Cached effective detail levels per stream (indexed by stream_id) + /// Lock-free array of atomics for fast reads by producers + /// stream_id is uint8_t, so we need 256 entries + std::array, 256> MEffectiveStreamDetailLevels; }; /// @var static int GFrameworkReferenceCounter @@ -3732,6 +3826,11 @@ XPTI_EXPORT_API void xptiReleaseEvent(xpti::trace_event_data_t *Event) { return xpti::Framework::instance().releaseEvent(Event); } +XPTI_EXPORT_API xpti::stream_detail_level_t +xptiGetEffectiveStreamDetailLevel(xpti::stream_id_t stream) { + return xpti::Framework::instance().getEffectiveStreamDetailLevel(stream); +} + } // extern "C" #if (defined(_WIN32) || defined(_WIN64)) From 854768492b192623aa04c1e4b7f603db61838452 Mon Sep 17 00:00:00 2001 From: "Gainullin, Artur" Date: Wed, 22 Apr 2026 14:07:37 -0700 Subject: [PATCH 2/4] [XPTI] Emit metadata to sycl stream based on requested detail level Take into account requested detail level when emitting metadata into the "sycl" stream. Add metadata table showing which detail level provides each field. Existing tools using sycl or sycl.debug continue to work unchanged Optionally tools can request BASIC level for minimal overhead (PTI use case). --- .../design/SYCLInstrumentationUsingXPTI.md | 56 +++++---- sycl/source/detail/queue_impl.cpp | 70 ++++++----- sycl/source/detail/scheduler/commands.cpp | 115 +++++++++++------- sycl/source/detail/xpti_registry.cpp | 4 + sycl/source/detail/xpti_registry.hpp | 6 + 5 files changed, 159 insertions(+), 92 deletions(-) diff --git a/sycl/doc/design/SYCLInstrumentationUsingXPTI.md b/sycl/doc/design/SYCLInstrumentationUsingXPTI.md index d0c7c3e59869a..5079972359c7a 100644 --- a/sycl/doc/design/SYCLInstrumentationUsingXPTI.md +++ b/sycl/doc/design/SYCLInstrumentationUsingXPTI.md @@ -244,6 +244,18 @@ All trace point types in bold provide semantic information about the graph, node The `"sycl.debug"` stream emits the same notifications as the `"sycl"` stream, but with additional metadata. If toolchains want to keep the overhead low then subscribing to `"sycl"` stream is the right option, if toolchains want to get more data and keeping overheads low is not important then they should subscribe to `"sycl.debug"`. If a tool subscribes to both `"sycl"` and `"sycl.debug"`, only notifications from `"sycl.debug"` will be delivered to avoid duplication. + +### Stream Detail Level Control + +The SYCL runtime implements XPTI stream detail level control for the `"sycl"` stream (see [XPTI Stream Detail Level Control](https://github.com/intel/llvm/tree/sycl/xptifw/doc/XPTI_Framework.md#stream-detail-level-control) for the XPTI framework feature description). This allows subscribers to control the amount of optional metadata emitted based on their needs, providing fine-grained control over tracing overhead. + +The SYCL runtime respects the effective detail level when emitting metadata and uses the following strategy: +- Metadata is emitted at VERBOSE level if either: + - A subscriber requests VERBOSE level on the `"sycl"` stream, OR + - A subscriber is registered for the `"sycl.debug"` stream (legacy behavior) +- Otherwise, metadata is emitted according to the requested detail level (BASIC or NORMAL) + +See the metadata table below for which fields are emitted at each detail level. Note that stream detail level control is only implemented for the `"sycl"` stream; the `"sycl.debug"` stream always emits all metadata (equivalent to VERBOSE level). | Trace Point Type | Parameter Description | Metadata | | :----------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | **`graph_create`** |
  • **trace_type**: `xpti::trace_point_type_t::graph_create` that marks the creation of an asynchronous graph.
  • **parent**: `nullptr`
  • **event**: The global asynchronous graph object ID. All other graph related events such as node and edge creation will always this ID as the parent ID.
  • **instance**: Unique ID related to the event, but not a correlation ID as there are other events to correlate to.
  • **user_data**: `nullptr`
  • SYCL runtime will always have one instance of a graph object with many disjoint subgraphs that get created during the execution of an application.
    | None | @@ -262,28 +274,28 @@ If a tool subscribes to both `"sycl"` and `"sycl.debug"`, only notifications fro ### Metadata description -| Metadata | Type | Description | -| :--------------------: | :-------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `access_mode` | `int` | Value of `sycl::access::mode` enum | -| `access_range_start` | `size_t` | Start of accessor range | -| `access_range_end` | `size_t` | End of accessor range | -| `allocation_type` | C-style string | Allocation type | -| `copy_from` | `size_t` | ID of source device | -| `copy_to` | `size_t` | ID of target device | -| `event` | `size_t` | Unique identifier of event | -| `from_source` | `bool` | `true` if kernel comes from user source | -| `kernel_name` | C-style string | Kernel name | -| `memory_object` | `size_t` | Unique identifier of memory object | -| `offset` | `size_t` | Accessor offset size in bytes | -| `sycl_device` | `size_t` | Unique identifier of SYCL device | -| `sycl_device_type` | C-style string | `CPU`, `GPU`, `ACC`, or `HOST` | -| `sycl_device_name` | C-style string | Result of `sycl::device::get_info()` | -| `sym_function_name` | C-style string | Function name | -| `sym_source_file_name` | C-style string | Source file name | -| `sym_line_no` | `int32_t` | File line number | -| `sym_column_no` | `int32_t` | File column number | -| `enqueue_kernel_data` | `xpti::offload_kernel_arg_data_t` | Includes kernel execution parameters (global size, local size, offset) and number of kernel arguments | -| `argN` | `xpti::offload_kernel_arg_data_t` | Description for the Nth kernel argument. It includes argument kind (sycl::detail::kernel_param_kind_t), pointer to the value, size and index in the argument list. | +| Metadata | Type | Detail Level | Description | +| :--------------------: | :-------------------------------: | :----------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `kernel_name` | C-style string | BASIC | Kernel name | +| `memory_object` | `size_t` | BASIC | Unique identifier of memory object | +| `offset` | `size_t` | NORMAL | Accessor offset size in bytes | +| `access_mode` | `int` | NORMAL | Value of `sycl::access::mode` enum | +| `access_range_start` | `size_t` | NORMAL | Start of accessor range | +| `access_range_end` | `size_t` | NORMAL | End of accessor range | +| `allocation_type` | C-style string | NORMAL | Allocation type | +| `copy_from` | `size_t` | NORMAL | ID of source device | +| `copy_to` | `size_t` | NORMAL | ID of target device | +| `event` | `size_t` | NORMAL | Unique identifier of event | +| `sycl_device` | `size_t` | NORMAL | Unique identifier of SYCL device | +| `sycl_device_type` | C-style string | NORMAL | `CPU`, `GPU`, `ACC`, or `HOST` | +| `sycl_device_name` | C-style string | NORMAL | Result of `sycl::device::get_info()` | +| `from_source` | `bool` | VERBOSE | `true` if kernel comes from user source | +| `sym_function_name` | C-style string | VERBOSE | Function name | +| `sym_source_file_name` | C-style string | VERBOSE | Source file name | +| `sym_line_no` | `int32_t` | VERBOSE | File line number | +| `sym_column_no` | `int32_t` | VERBOSE | File column number | +| `enqueue_kernel_data` | `xpti::offload_kernel_arg_data_t` | VERBOSE | Includes kernel execution parameters (global size, local size, offset) and number of kernel arguments | +| `argN` | `xpti::offload_kernel_arg_data_t` | VERBOSE | Description for the Nth kernel argument. It includes argument kind (sycl::detail::kernel_param_kind_t), pointer to the value, size and index in the argument list. | ## Buffer management stream `"sycl.experimental.buffer"` Notification Signatures diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 5719496c5ac33..026f476f410f6 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -135,12 +135,16 @@ EventImplPtr queue_impl::memset(void *Ptr, int Value, size_t Count, // This information is necessary for memset, so we will not guard it by debug // stream check. TP.addMetadata([&](auto TEvent) { - xpti::addMetadata(TEvent, "sycl_device", - reinterpret_cast(MDevice.getHandleRef())); - xpti::addMetadata(TEvent, "memory_ptr", reinterpret_cast(Ptr)); - xpti::addMetadata(TEvent, "value_set", Value); xpti::addMetadata(TEvent, "memory_size", Count); - xpti::addMetadata(TEvent, "queue_id", MQueueID); + if (detail::GSYCLStreamDetailLevel >= + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::addMetadata(TEvent, "sycl_device", + reinterpret_cast(MDevice.getHandleRef())); + xpti::addMetadata(TEvent, "memory_ptr", reinterpret_cast(Ptr)); + xpti::addMetadata(TEvent, "value_set", Value); + + xpti::addMetadata(TEvent, "queue_id", MQueueID); + } }); // Before we notifiy the subscribers, we broadcast the 'queue_id', which was a @@ -189,13 +193,17 @@ EventImplPtr queue_impl::memcpy(void *Dest, const void *Src, size_t Count, const char *UserData = "memory_transfer_node::memcpy"; // We will include this metadata information as it is required for memcpy. TP.addMetadata([&](auto TEvent) { - xpti::addMetadata(TEvent, "sycl_device", - reinterpret_cast(MDevice.getHandleRef())); - xpti::addMetadata(TEvent, "src_memory_ptr", reinterpret_cast(Src)); - xpti::addMetadata(TEvent, "dest_memory_ptr", - reinterpret_cast(Dest)); xpti::addMetadata(TEvent, "memory_size", Count); - xpti::addMetadata(TEvent, "queue_id", MQueueID); + if (detail::GSYCLStreamDetailLevel >= + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::addMetadata(TEvent, "sycl_device", + reinterpret_cast(MDevice.getHandleRef())); + xpti::addMetadata(TEvent, "src_memory_ptr", + reinterpret_cast(Src)); + xpti::addMetadata(TEvent, "dest_memory_ptr", + reinterpret_cast(Dest)); + xpti::addMetadata(TEvent, "queue_id", MQueueID); + } }); // Before we notify the subscribers, we stash the 'queue_id', which was a // metadata entry to TLS for use by callback handlers @@ -915,12 +923,15 @@ void *queue_impl::instrumentationProlog(const detail::code_location &CodeLoc, IId = xptiGetUniqueId(); auto WaitEvent = Event->event_ref(); - // We will allow the device type to be set - xpti::addMetadata(WaitEvent, "sycl_device_type", queueDeviceToString(this)); - // We limit the amount of metadata that is added to the regular stream. - // Only "sycl.debug" stream will have the full information. This improves the - // performance when this data is not required by the tool or the collector. - if (isDebugStream(StreamID)) { + if (detail::GSYCLStreamDetailLevel >= + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::addMetadata(WaitEvent, "sycl_device_type", queueDeviceToString(this)); + } + // Full metadata is added only at VERBOSE level or if subscribing to + // sycl.debug stream. + if (detail::GSYCLStreamDetailLevel >= + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_VERBOSE || + isDebugStream(StreamID)) { if (HasSourceInfo) { xpti::addMetadata(WaitEvent, "sym_function_name", CodeLoc.functionName()); xpti::addMetadata(WaitEvent, "sym_source_file_name", CodeLoc.fileName()); @@ -1048,18 +1059,19 @@ void queue_impl::constructorNotification() { xpti_td *TEvent = Event->event_ref(); // Cache the trace event, stream id and instance IDs for the destructor. MTraceEvent = (void *)TEvent; - // We will allow the queue metadata to be set as this is performed - // infrequently. - xpti::addMetadata(TEvent, "sycl_context", - reinterpret_cast(MContext->getHandleRef())); - xpti::addMetadata(TEvent, "sycl_device_name", - MDevice.get_info()); - xpti::addMetadata(TEvent, "sycl_device", - reinterpret_cast(MDevice.getHandleRef())); - xpti::addMetadata(TEvent, "is_inorder", MIsInorder); - xpti::addMetadata(TEvent, "queue_id", MQueueID); - xpti::addMetadata(TEvent, "queue_handle", - reinterpret_cast(getHandleRef())); + if (detail::GSYCLStreamDetailLevel >= + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::addMetadata(TEvent, "sycl_context", + reinterpret_cast(MContext->getHandleRef())); + xpti::addMetadata(TEvent, "sycl_device_name", + MDevice.get_info()); + xpti::addMetadata(TEvent, "sycl_device", + reinterpret_cast(MDevice.getHandleRef())); + xpti::addMetadata(TEvent, "is_inorder", MIsInorder); + xpti::addMetadata(TEvent, "queue_id", MQueueID); + xpti::addMetadata(TEvent, "queue_handle", + reinterpret_cast(getHandleRef())); + } // Also publish to TLS before notification. xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, MQueueID); xptiNotifySubscribers(detail::getActiveXPTIStreamID(), diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index d0ca0667b2139..5028fb6f1cc61 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -91,7 +91,12 @@ static size_t deviceToID(const device &Device) { return reinterpret_cast(getSyclObjImpl(Device)->getHandleRef()); } -static void addDeviceMetadata(xpti_td *TraceEvent, queue_impl *Queue) { +static void addDeviceMetadata(xpti_td *TraceEvent, queue_impl *Queue, + xpti::stream_id_t StreamID) { + if (detail::GSYCLStreamDetailLevel < + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) + return; + xpti::addMetadata(TraceEvent, "sycl_device_type", queueDeviceToString(Queue)); if (Queue) { xpti::addMetadata(TraceEvent, "sycl_device", @@ -102,8 +107,9 @@ static void addDeviceMetadata(xpti_td *TraceEvent, queue_impl *Queue) { } } static void addDeviceMetadata(xpti_td *TraceEvent, - const std::shared_ptr &Queue) { - addDeviceMetadata(TraceEvent, Queue.get()); + const std::shared_ptr &Queue, + xpti::stream_id_t StreamID) { + addDeviceMetadata(TraceEvent, Queue.get(), StreamID); } static unsigned long long getQueueID(queue_impl *Queue) { @@ -594,14 +600,20 @@ void Command::emitEdgeEventForCommandDependence( xpti_td *TgtEvent = static_cast(MTraceEvent); EdgeEvent->source_id = SrcEvent->unique_id; EdgeEvent->target_id = TgtEvent->unique_id; - // We allow this metadata to be set as it describes the edge. if (IsCommand) { - xpti::addMetadata(EdgeEvent, "access_mode", - static_cast(AccMode.value())); xpti::addMetadata(EdgeEvent, "memory_object", reinterpret_cast(ObjAddr)); + if (detail::GSYCLStreamDetailLevel >= + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::addMetadata(EdgeEvent, "access_mode", + static_cast(AccMode.value())); + } } else { - xpti::addMetadata(EdgeEvent, "event", reinterpret_cast(ObjAddr)); + if (detail::GSYCLStreamDetailLevel >= + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::addMetadata(EdgeEvent, "event", + reinterpret_cast(ObjAddr)); + } } xptiNotifySubscribers(MStreamID, NotificationTraceType, detail::GSYCLGraphEvent, EdgeEvent, EdgeInstanceNo, @@ -671,10 +683,11 @@ void Command::emitEdgeEventForEventDependence(Command *Cmd, xpti_td *TgtEvent = static_cast(MTraceEvent); EdgeEvent->source_id = NodeEvent->unique_id; EdgeEvent->target_id = TgtEvent->unique_id; - // We allow this metadata to be set as an edge without the event address - // will be less useful. - xpti::addMetadata(EdgeEvent, "event", - reinterpret_cast(UrEventAddr)); + if (detail::GSYCLStreamDetailLevel >= + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::addMetadata(EdgeEvent, "event", + reinterpret_cast(UrEventAddr)); + } xptiNotifySubscribers(MStreamID, xpti::trace_edge_create, detail::GSYCLGraphEvent, EdgeEvent, EdgeInstanceNo, nullptr); @@ -1030,7 +1043,7 @@ void AllocaCommandBase::emitInstrumentationData() { // internal infrastructure to guarantee collision free universal IDs. if (MTraceEvent) { xpti_td *TE = static_cast(MTraceEvent); - addDeviceMetadata(TE, MQueue); + addDeviceMetadata(TE, MQueue, MStreamID); // Memory-object is used frequently, so it is always added. xpti::addMetadata(TE, "memory_object", reinterpret_cast(MAddress)); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS @@ -1149,10 +1162,14 @@ void AllocaSubBufCommand::emitInstrumentationData() { return; xpti_td *TE = static_cast(MTraceEvent); - xpti::addMetadata(TE, "offset", this->MRequirement.MOffsetInBytes); - xpti::addMetadata(TE, "access_range_start", - this->MRequirement.MAccessRange[0]); - xpti::addMetadata(TE, "access_range_end", this->MRequirement.MAccessRange[1]); + if (detail::GSYCLStreamDetailLevel >= + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::addMetadata(TE, "offset", this->MRequirement.MOffsetInBytes); + xpti::addMetadata(TE, "access_range_start", + this->MRequirement.MAccessRange[0]); + xpti::addMetadata(TE, "access_range_end", + this->MRequirement.MAccessRange[1]); + } xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); makeTraceEventEpilog(); #endif @@ -1226,9 +1243,12 @@ void ReleaseCommand::emitInstrumentationData() { makeTraceEventProlog(MAddress); xpti_td *TE = static_cast(MTraceEvent); - addDeviceMetadata(TE, MQueue); - xpti::addMetadata(TE, "allocation_type", - commandToName(MAllocaCmd->getType())); + addDeviceMetadata(TE, MQueue, MStreamID); + if (detail::GSYCLStreamDetailLevel >= + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::addMetadata(TE, "allocation_type", + commandToName(MAllocaCmd->getType())); + } // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); @@ -1353,7 +1373,7 @@ void MapMemObject::emitInstrumentationData() { makeTraceEventProlog(MAddress); xpti_td *TE = static_cast(MTraceEvent); - addDeviceMetadata(TE, MQueue); + addDeviceMetadata(TE, MQueue, MStreamID); xpti::addMetadata(TE, "memory_object", reinterpret_cast(MAddress)); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant @@ -1416,7 +1436,7 @@ void UnMapMemObject::emitInstrumentationData() { makeTraceEventProlog(MAddress); xpti_td *TE = static_cast(MTraceEvent); - addDeviceMetadata(TE, MQueue); + addDeviceMetadata(TE, MQueue, MStreamID); xpti::addMetadata(TE, "memory_object", reinterpret_cast(MAddress)); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant @@ -1511,13 +1531,16 @@ void MemCpyCommand::emitInstrumentationData() { makeTraceEventProlog(MAddress); xpti_td *CmdTraceEvent = static_cast(MTraceEvent); - addDeviceMetadata(CmdTraceEvent, MQueue); + addDeviceMetadata(CmdTraceEvent, MQueue, MStreamID); xpti::addMetadata(CmdTraceEvent, "memory_object", reinterpret_cast(MAddress)); - xpti::addMetadata(CmdTraceEvent, "copy_from", - MSrcQueue ? deviceToID(MSrcQueue->get_device()) : 0); - xpti::addMetadata(CmdTraceEvent, "copy_to", - MQueue ? deviceToID(MQueue->get_device()) : 0); + if (detail::GSYCLStreamDetailLevel >= + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::addMetadata(CmdTraceEvent, "copy_from", + MSrcQueue ? deviceToID(MSrcQueue->get_device()) : 0); + xpti::addMetadata(CmdTraceEvent, "copy_to", + MQueue ? deviceToID(MQueue->get_device()) : 0); + } // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); @@ -1684,13 +1707,16 @@ void MemCpyCommandHost::emitInstrumentationData() { makeTraceEventProlog(MAddress); xpti_td *CmdTraceEvent = static_cast(MTraceEvent); - addDeviceMetadata(CmdTraceEvent, MQueue); + addDeviceMetadata(CmdTraceEvent, MQueue, MStreamID); xpti::addMetadata(CmdTraceEvent, "memory_object", reinterpret_cast(MAddress)); - xpti::addMetadata(CmdTraceEvent, "copy_from", - MSrcQueue ? deviceToID(MSrcQueue->get_device()) : 0); - xpti::addMetadata(CmdTraceEvent, "copy_to", - MQueue ? deviceToID(MQueue->get_device()) : 0); + if (detail::GSYCLStreamDetailLevel >= + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::addMetadata(CmdTraceEvent, "copy_from", + MSrcQueue ? deviceToID(MSrcQueue->get_device()) : 0); + xpti::addMetadata(CmdTraceEvent, "copy_to", + MQueue ? deviceToID(MQueue->get_device()) : 0); + } // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); @@ -1780,7 +1806,7 @@ void EmptyCommand::emitInstrumentationData() { makeTraceEventProlog(MAddress); xpti_td *CmdTraceEvent = static_cast(MTraceEvent); - addDeviceMetadata(CmdTraceEvent, MQueue); + addDeviceMetadata(CmdTraceEvent, MQueue, MStreamID); xpti::addMetadata(CmdTraceEvent, "memory_object", reinterpret_cast(MAddress)); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS @@ -1845,7 +1871,7 @@ void UpdateHostRequirementCommand::emitInstrumentationData() { makeTraceEventProlog(MAddress); xpti_td *CmdTraceEvent = static_cast(MTraceEvent); - addDeviceMetadata(CmdTraceEvent, MQueue); + addDeviceMetadata(CmdTraceEvent, MQueue, MStreamID); xpti::addMetadata(CmdTraceEvent, "memory_object", reinterpret_cast(MAddress)); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS @@ -2041,13 +2067,16 @@ void instrumentationFillCommonData( OutInstanceID = CGKernelInstanceNo; OutTraceEvent = CmdTraceEvent; - addDeviceMetadata(CmdTraceEvent, Queue); + addDeviceMetadata(CmdTraceEvent, Queue, StreamID); if (!KernelName.empty()) { xpti::addMetadata(CmdTraceEvent, "kernel_name", KernelName); } - // We limit the metadata to only include the kernel name and device - // information by default. - if (detail::isDebugStream(StreamID)) { + + // Debug metadata is added at VERBOSE level or if subscribing to sycl.debug + // stream. + if (detail::GSYCLStreamDetailLevel >= + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_VERBOSE || + detail::isDebugStream(StreamID)) { if (FromSource.has_value()) { xpti::addMetadata(CmdTraceEvent, "from_source", FromSource.value()); } @@ -2108,9 +2137,11 @@ std::pair emitKernelInstrumentationData( if (Queue) xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(Queue)); - // Add the additional metadata only if the debug information is subscribed - // to; in this case, it is the kernel and its parameters. - if (detail::isDebugStream(StreamID)) { + // Add the additional metadata only if VERBOSE level is enabled or + // subscribing to sycl.debug stream. + if (detail::GSYCLStreamDetailLevel >= + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_VERBOSE || + detail::isDebugStream(StreamID)) { instrumentationAddExtraKernelMetadata( CmdTraceEvent, NDRDesc, KernelBundleImplPtr, DeviceKernelInfo, SyclKernel, Queue, CGArgs); @@ -2164,7 +2195,9 @@ void ExecCGCommand::emitInstrumentationData() { getQueueID(MQueue)); MTraceEvent = static_cast(CmdTraceEvent); if (MCommandGroup->getType() == detail::CGType::Kernel) { - if (detail::isDebugStream(MStreamID)) { + if (detail::GSYCLStreamDetailLevel >= + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_VERBOSE || + detail::isDebugStream(MStreamID)) { auto KernelCG = reinterpret_cast(MCommandGroup.get()); instrumentationAddExtraKernelMetadata( diff --git a/sycl/source/detail/xpti_registry.cpp b/sycl/source/detail/xpti_registry.cpp index 8abd2ea6e4b20..fa34c2480ae1c 100644 --- a/sycl/source/detail/xpti_registry.cpp +++ b/sycl/source/detail/xpti_registry.cpp @@ -29,6 +29,10 @@ uint8_t GSYCLDebugStreamID = xpti::invalid_id; uint8_t GUrCallStreamID = xpti::invalid_id; uint8_t GUrApiStreamID = xpti::invalid_id; +// Effective stream detail level for the "sycl" stream +xpti::stream_detail_level_t GSYCLStreamDetailLevel = + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL; + xpti::trace_event_data_t *GMemAllocEvent = nullptr; xpti::trace_event_data_t *GSYCLGraphEvent = nullptr; xpti::trace_event_data_t *GSYCLCallEvent = nullptr; diff --git a/sycl/source/detail/xpti_registry.hpp b/sycl/source/detail/xpti_registry.hpp index 9598eefab7b27..30380fc730c2a 100644 --- a/sycl/source/detail/xpti_registry.hpp +++ b/sycl/source/detail/xpti_registry.hpp @@ -61,6 +61,8 @@ extern uint8_t GSYCLStreamID; extern uint8_t GSYCLDebugStreamID; extern uint8_t GUrApiStreamID; +extern xpti::stream_detail_level_t GSYCLStreamDetailLevel; + extern xpti::trace_event_data_t *GMemAllocEvent; extern xpti::trace_event_data_t *GSYCLGraphEvent; extern xpti::trace_event_data_t *GSYCLCallEvent; @@ -101,6 +103,10 @@ class XPTIRegistry { // SYCL events detail::GSYCLStreamID = this->initializeStream(SYCL_STREAM_NAME, GMajVer, GMinVer, GVerStr); + // Cache the effective stream detail level - it's immutable after stream + // initialization, so we query it once here + detail::GSYCLStreamDetailLevel = + xptiGetEffectiveStreamDetailLevel(detail::GSYCLStreamID); // Register the SYCL Debug event stream; tools subscribing to this stream // will receive additional metadata in the regular "sycl" stream. detail::GSYCLDebugStreamID = this->initializeStream( From 9000036d2cc0d90806f9b3f3c577756842e2026f Mon Sep 17 00:00:00 2001 From: "Gainullin, Artur" Date: Tue, 28 Apr 2026 13:05:29 -0700 Subject: [PATCH 3/4] Address review --- sycl/source/detail/queue_impl.cpp | 3 ++- sycl/source/detail/scheduler/commands.cpp | 24 +++++++++++++++-------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 026f476f410f6..362f515edd124 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -924,7 +924,8 @@ void *queue_impl::instrumentationProlog(const detail::code_location &CodeLoc, IId = xptiGetUniqueId(); auto WaitEvent = Event->event_ref(); if (detail::GSYCLStreamDetailLevel >= - xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL || + isDebugStream(StreamID)) { xpti::addMetadata(WaitEvent, "sycl_device_type", queueDeviceToString(this)); } // Full metadata is added only at VERBOSE level or if subscribing to diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 5028fb6f1cc61..447ff0225eb03 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -94,7 +94,8 @@ static size_t deviceToID(const device &Device) { static void addDeviceMetadata(xpti_td *TraceEvent, queue_impl *Queue, xpti::stream_id_t StreamID) { if (detail::GSYCLStreamDetailLevel < - xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL && + !detail::isDebugStream(StreamID)) return; xpti::addMetadata(TraceEvent, "sycl_device_type", queueDeviceToString(Queue)); @@ -604,13 +605,15 @@ void Command::emitEdgeEventForCommandDependence( xpti::addMetadata(EdgeEvent, "memory_object", reinterpret_cast(ObjAddr)); if (detail::GSYCLStreamDetailLevel >= - xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL || + detail::isDebugStream(MStreamID)) { xpti::addMetadata(EdgeEvent, "access_mode", static_cast(AccMode.value())); } } else { if (detail::GSYCLStreamDetailLevel >= - xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL || + detail::isDebugStream(MStreamID)) { xpti::addMetadata(EdgeEvent, "event", reinterpret_cast(ObjAddr)); } @@ -684,7 +687,8 @@ void Command::emitEdgeEventForEventDependence(Command *Cmd, EdgeEvent->source_id = NodeEvent->unique_id; EdgeEvent->target_id = TgtEvent->unique_id; if (detail::GSYCLStreamDetailLevel >= - xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL || + detail::isDebugStream(MStreamID)) { xpti::addMetadata(EdgeEvent, "event", reinterpret_cast(UrEventAddr)); } @@ -1163,7 +1167,8 @@ void AllocaSubBufCommand::emitInstrumentationData() { xpti_td *TE = static_cast(MTraceEvent); if (detail::GSYCLStreamDetailLevel >= - xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL || + detail::isDebugStream(MStreamID)) { xpti::addMetadata(TE, "offset", this->MRequirement.MOffsetInBytes); xpti::addMetadata(TE, "access_range_start", this->MRequirement.MAccessRange[0]); @@ -1245,7 +1250,8 @@ void ReleaseCommand::emitInstrumentationData() { xpti_td *TE = static_cast(MTraceEvent); addDeviceMetadata(TE, MQueue, MStreamID); if (detail::GSYCLStreamDetailLevel >= - xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL || + detail::isDebugStream(MStreamID)) { xpti::addMetadata(TE, "allocation_type", commandToName(MAllocaCmd->getType())); } @@ -1535,7 +1541,8 @@ void MemCpyCommand::emitInstrumentationData() { xpti::addMetadata(CmdTraceEvent, "memory_object", reinterpret_cast(MAddress)); if (detail::GSYCLStreamDetailLevel >= - xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL || + detail::isDebugStream(MStreamID)) { xpti::addMetadata(CmdTraceEvent, "copy_from", MSrcQueue ? deviceToID(MSrcQueue->get_device()) : 0); xpti::addMetadata(CmdTraceEvent, "copy_to", @@ -1711,7 +1718,8 @@ void MemCpyCommandHost::emitInstrumentationData() { xpti::addMetadata(CmdTraceEvent, "memory_object", reinterpret_cast(MAddress)); if (detail::GSYCLStreamDetailLevel >= - xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL || + detail::isDebugStream(MStreamID)) { xpti::addMetadata(CmdTraceEvent, "copy_from", MSrcQueue ? deviceToID(MSrcQueue->get_device()) : 0); xpti::addMetadata(CmdTraceEvent, "copy_to", From f1427e394efc1f90029ce443b7e32ba8b2edbb1c Mon Sep 17 00:00:00 2001 From: "Gainullin, Artur" Date: Tue, 28 Apr 2026 13:24:41 -0700 Subject: [PATCH 4/4] Fix missed checks --- sycl/source/detail/queue_impl.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 362f515edd124..73a145f2a5d81 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -137,7 +137,8 @@ EventImplPtr queue_impl::memset(void *Ptr, int Value, size_t Count, TP.addMetadata([&](auto TEvent) { xpti::addMetadata(TEvent, "memory_size", Count); if (detail::GSYCLStreamDetailLevel >= - xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL || + isDebugStream(detail::getActiveXPTIStreamID())) { xpti::addMetadata(TEvent, "sycl_device", reinterpret_cast(MDevice.getHandleRef())); xpti::addMetadata(TEvent, "memory_ptr", reinterpret_cast(Ptr)); @@ -195,7 +196,8 @@ EventImplPtr queue_impl::memcpy(void *Dest, const void *Src, size_t Count, TP.addMetadata([&](auto TEvent) { xpti::addMetadata(TEvent, "memory_size", Count); if (detail::GSYCLStreamDetailLevel >= - xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL || + isDebugStream(detail::getActiveXPTIStreamID())) { xpti::addMetadata(TEvent, "sycl_device", reinterpret_cast(MDevice.getHandleRef())); xpti::addMetadata(TEvent, "src_memory_ptr", @@ -1061,7 +1063,8 @@ void queue_impl::constructorNotification() { // Cache the trace event, stream id and instance IDs for the destructor. MTraceEvent = (void *)TEvent; if (detail::GSYCLStreamDetailLevel >= - xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL || + isDebugStream(detail::getActiveXPTIStreamID())) { xpti::addMetadata(TEvent, "sycl_context", reinterpret_cast(MContext->getHandleRef())); xpti::addMetadata(TEvent, "sycl_device_name",