From 3af7fdd1cbf543ccb0b78ad579fcf683305538ef Mon Sep 17 00:00:00 2001 From: JaiOCP Date: Tue, 21 Apr 2026 13:18:18 -0700 Subject: [PATCH] SAI API Performance Monitoring Signed-off-by: JaiOCP --- doc/perfmon/SAI-perfmon-Spec.md | 243 ++++++++++++++++++++++++++++++++ inc/sai.h | 2 + inc/saiperfmon.h | 190 +++++++++++++++++++++++++ inc/saiswitch.h | 10 ++ inc/saitypes.h | 1 + 5 files changed, 446 insertions(+) create mode 100755 doc/perfmon/SAI-perfmon-Spec.md create mode 100644 inc/saiperfmon.h diff --git a/doc/perfmon/SAI-perfmon-Spec.md b/doc/perfmon/SAI-perfmon-Spec.md new file mode 100755 index 000000000..22ad40a0b --- /dev/null +++ b/doc/perfmon/SAI-perfmon-Spec.md @@ -0,0 +1,243 @@ +# Performance Monitoring SAI Specification +------------------------------------------------------------------------------- + Title | SAI support for Performance Monitoring +:-------------|:----------------------------------------------------------------- + Authors | Jai Kumar, Broadcom Inc + Status | In review + Type | Standards track + Created | 03/18/2026: Initial Draft + SAI-Version | 1.19 +------------------------------------------------------------------------------- + + +## 1.0 Introduction +As network fabric scale increases and data centers require regional spine connectivity, the number of downlinks for cluster connectivity is growing. This leads to more LAGs, more prefixes, and larger ECMP. This is also true for large scale up and scale across fabrics for AI/ML. + +This increasing scale mandates that SAI be scalable, reliable, and high-performance. This specification addresses the performance component of SAI by introducing a new set of metrics to accurately measure the performance of various components within the SAI layer and below, such as SDK and hardware updates. + +Using these metrics, deployments can isolate components impacting performance and focus on their optimization. + + + +## 2.0 Terms and Acronyms + +| Term| Description | +|:---|:---| +| perfmon | Performance Metrics | + +## 3.0 Overview +The SAI infrastructure exposes a set of APIs as a standard interface to the upper layer. + +These APIs are synchronous and blocking, making the completion time of any given API a critical performance measure. Note that application-specific callbacks are not addressed by this specification. + +``` +/** + * @brief SAI common API type + */ +typedef enum _sai_common_api_t +{ + SAI_COMMON_API_CREATE = 0, + SAI_COMMON_API_REMOVE = 1, + SAI_COMMON_API_SET = 2, + SAI_COMMON_API_GET = 3, + SAI_COMMON_API_BULK_CREATE = 4, + SAI_COMMON_API_BULK_REMOVE = 5, + SAI_COMMON_API_BULK_SET = 6, + SAI_COMMON_API_BULK_GET = 7, + SAI_COMMON_API_MAX = 8, +} sai_common_api_t; + +``` + +This specification proposes API performance measures for the following metrics +1. Average Latency +2. Instantaneous Latency +3. Maximum Latency + +### 3.1 Average, Instantaneous, and Maximum Latency +API completion time consists of the time spent in the SAI adapter and the SDK, including hardware update or query time. Time measured is irrespetcive of the status of the API call i.e. if the API call completes with error status, adapter will still account the measured latency during the time interval of the metrics computation. NOS tracks the return status of API calls and can account for errors as needed. Discounting latency for specific error statuses would result in inconsistent measurements, requiring metric subscribers to implement manual workarounds for those cases. + +These metrics can be used to: +- Improve SAI adapter and SDK implementations +- Provide a baseline for comparing different hardware +- Instantaneous value: Provides [time, n], where n > 1 represents the number of objects in a bulk API, or n = 1 represents the last observed latency for a single object +- Maximum: The highest value observed across the last n invocations +- Average: The average value over the last n invocations. + + +## 4.0 SAI Specification +New perfmon object is introduced. Each perfmon object specifies the object of interest, set of APIs and metrics to be measured for each API. + + +Each perfmon object created has a binding to the switch object. + +### 4.2 Perfmon Object +New perfmon object is introduced specifying API and metrics of interest. + +#### 4.3.1 Metrics +Each API can be measure for a specific performance metrics as specified in sai_perfmon_metrics_t + +``` +/** + * @brief Performance Monitoring Metrics + */ +typedef enum _sai_perfmon_metrics_t +{ + /** + * @brief None + */ + SAI_PERFMON_METRICS_NONE, + + /** + * @brief Maximum latency observed + */ + SAI_PERFMON_METRICS_MAX_LATENCY, + + /** + * @brief Average latency observed + */ + SAI_PERFMON_METRICS_AVERAGE_LATENCY, + + /** + * @brief Instantaneous latency observed + */ + SAI_PERFMON_METRICS_INST_LATENCY, + +} sai_perfmon_metrics_t; + +``` + +#### 4.3.2 Perfmon Object Attributes +Type of API to be monitored for performance and its associated attributes are specified in the perfmon object attributes + +``` +/** + * @brief Performance Monitoring Attributes + */ +typedef enum _sai_perfmon_attr_t +{ + /** + * @brief Start of Attributes + */ + SAI_PERFMON_ATTR_START, + + /** + * @brief Object to be monitored + * + * @type sai_object_type_t + * @flags MANDATORY_ON_CREATE | CREATE_ONLY + */ + SAI_PERFMON_ATTR_OBJECT_TYPE = SAI_PERFMON_ATTR_START, + + /** + * @brief API to be monitored + * + * @type sai_common_api_t + * @flags CREATE_AND_SET + */ + SAI_PERFMON_ATTR_COMMON_API, + + /** + * @brief Performance metrics to be collected + * + * @type sai_perfmon_metrics_t + * @flags CREATE_AND_SET + * @default SAI_PERFMON_METRICS_NONE + */ + SAI_PERFMON_ATTR_PERFMON_METRICS, + + /** + * @brief Performance data as collected. This is clear on read. + * Performance data is computed once enabled and is cleared once read. + * + * @type sai_uint64_t + * @flags READ_ONLY + */ + SAI_PERFMON_ATTR_PERFDATA, + + /** + * @brief End of Performance Monitoring attributes + */ + SAI_PERFMON_ATTR_END, + + /** Custom range base value */ + SAI_PERFMON_ATTR_CUSTOM_RANGE_START = 0x10000000, + + /** End of custom range base */ + SAI_PERFMON_ATTR_CUSTOM_RANGE_END + +} sai_perfmon_attr_t; + +``` + +#### 4.3.3 Perfmon Object Switch Binding +List of perfmon objects can be bound to the switch object. This binding can be done as a SET operation when perfmon object is created. + +``` + /** + * @brief Performance Monitoring enabled on the switch + * + * @type sai_object_list_t + * @flags CREATE_AND_SET + * @objects SAI_OBJECT_TYPE_PERFMO$ + * @default empty + */ + SAI_SWITCH_ATTR_PERFMON_LIST, +``` + + +## 5.0 Sample Workflow + +This section talks about enabling performance monitoring for a given API and a metrics. + +### 5.1 Create perfmon object +- Each perfmon object supports a single API and a single set of metrics. To monitor additional metrics for the same API or to monitor a different API, a new perfmon object must be created. +- Monitoring in the SAI adapter will only begin once the perfmon object is bound to the switch object. + +``` +/* + * Configure CSIG Compact Tag for ABW signal processing and time interval of 256 micro seconds + */ + +// Specify the Object of intererst +sai_attr_list[0].id = SAI_PERFMON_ATTR_OBJECT_TYPE; +sai_attr_list[0].value.s32 = SAI_OBJECT_TYPE_ROUTE_ENTRY; + +// Specify the API of interest +sai_attr_list[1].id = SAI_PERFMON_ATTR_COMMON_API; +sai_attr_list[1].value.s32 = SAI_COMMON_API_BULK_SET; + +// Configure metrics to be measured +sai_attr_list[2].id = SAI_PERFMON_ATTR_PERFMON_METRICS; +sai_attr_list[2].value.s32 = SAI_PERFMON_METRICS_AVERAGE_LATENCY; + +// Configure Time Interval in msec +sai_attr_list[3].id = SAI_PERFMON_ATTR_METRICS_TIME_INTERVAL; +sai_attr_list[3].value.u32 = 2048; + + +// Create perfmon object +attr_count = 4; +create_perfmon( + &sai_perfmon_object, + switch_id, + attr_count, + sai_attr_list); +``` + +### 5.2 Read perfmon Metrics + +Read the perfmon attribute for getting the API related metrics. + +``` +// Specify the read attribute +sai_attr_list[1].id = SAI_PERFMON_ATTR_PERFDATA; + +// Read perfmon metrics +attr_count = 1; +get_perfmon_attribute( + sai_perfmon_object, + attr_count, + sai_attr_list); +... + diff --git a/inc/sai.h b/inc/sai.h index d8f90c44b..bfeb264fa 100644 --- a/inc/sai.h +++ b/inc/sai.h @@ -48,6 +48,7 @@ #include "sainexthopgroup.h" #include "sainexthop.h" #include "saiobject.h" +#include "saiperfmon.h" #include "saipolicer.h" #include "saiport.h" #include "saiqosmap.h" @@ -155,6 +156,7 @@ typedef enum _sai_api_t SAI_API_PREFIX_COMPRESSION = 53, /**< sai_prefix_compression_api_t */ SAI_API_SYNCE = 54, /**< sai_synce_api_t */ SAI_API_VIRTUAL_CHANNEL = 55, /**< sai_virtual_channel_api_t */ + SAI_API_PERFMON = 56, /**< sai_perfmon_api_t */ SAI_API_MAX, /**< total number of APIs */ /** diff --git a/inc/saiperfmon.h b/inc/saiperfmon.h new file mode 100644 index 000000000..a9c846bc5 --- /dev/null +++ b/inc/saiperfmon.h @@ -0,0 +1,190 @@ +/** + * Copyright (c) 2014 Microsoft Open Technologies, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. You may obtain + * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * THIS CODE IS PROVIDED ON AN *AS IS* BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT + * LIMITATION ANY IMPLIED WARRANTIES OR CONDITIONS OF TITLE, FITNESS + * FOR A PARTICULAR PURPOSE, MERCHANTABILITY OR NON-INFRINGEMENT. + * + * See the Apache Version 2.0 License for specific language governing + * permissions and limitations under the License. + * + * Microsoft would like to thank the following companies for their review and + * assistance with these files: Intel Corporation, Mellanox Technologies Ltd, + * Dell Products, L.P., Facebook, Inc., Marvell International Ltd. + * + * @file saiperfmon.h + * + * @brief This module defines SAI Performance Monitoring spec + */ + +#if !defined (__SAIPERFMON_H_) +#define __SAIPERFMON_H_ + +#include + +/** + * @defgroup SAIPERFMON SAI - Performance Monitoring specific API definitions + * + * @{ + */ + +/** + * @brief Performance Monitoring Metrics + */ +typedef enum _sai_perfmon_metrics_t +{ + /** + * @brief None + */ + SAI_PERFMON_METRICS_NONE, + + /** + * @brief Maximum latency observed + */ + SAI_PERFMON_METRICS_MAX_LATENCY, + + /** + * @brief Average latency observed + */ + SAI_PERFMON_METRICS_AVERAGE_LATENCY, + + /** + * @brief Instantaneous latency observed + */ + SAI_PERFMON_METRICS_INST_LATENCY, + +} sai_perfmon_metrics_t; + +/** + * @brief Performance Monitoring Attributes + */ +typedef enum _sai_perfmon_attr_t +{ + /** + * @brief Start of Attributes + */ + SAI_PERFMON_ATTR_START, + + /** + * @brief Object to be monitored + * + * @type sai_object_type_t + * @flags MANDATORY_ON_CREATE | CREATE_ONLY + */ + SAI_PERFMON_ATTR_OBJECT_TYPE = SAI_PERFMON_ATTR_START, + + /** + * @brief API to be monitored + * + * @type sai_common_api_t + * @flags CREATE_AND_SET + * @default SAI_COMMON_API_CREATE + */ + SAI_PERFMON_ATTR_COMMON_API, + + /** + * @brief Performance metrics to be collected + * + * @type sai_perfmon_metrics_t + * @flags CREATE_AND_SET + * @default SAI_PERFMON_METRICS_NONE + */ + SAI_PERFMON_ATTR_PERFMON_METRICS, + + /** + * @brief Performance data as collected. This is clear on read. + * Performance data is computed once enabled and is cleared once read. + * + * @type sai_uint64_t + * @flags READ_ONLY + */ + SAI_PERFMON_ATTR_PERFDATA, + + /** + * @brief End of Performance Monitoring attributes + */ + SAI_PERFMON_ATTR_END, + + /** Custom range base value */ + SAI_PERFMON_ATTR_CUSTOM_RANGE_START = 0x10000000, + + /** End of custom range base */ + SAI_PERFMON_ATTR_CUSTOM_RANGE_END + +} sai_perfmon_attr_t; + +/** + * @brief Create performance monitoring object + * + * @param[out] perfmon_id Performance Monitoring id + * @param[in] switch_id Switch id + * @param[in] attr_count Number of attributes + * @param[in] attr_list Array of attributes + * + * @return #SAI_STATUS_SUCCESS on success, failure status code on error + */ +typedef sai_status_t (*sai_create_perfmon_fn)( + _Out_ sai_object_id_t *perfmon_id, + _In_ sai_object_id_t switch_id, + _In_ uint32_t attr_count, + _In_ const sai_attribute_t *attr_list); + +/** + * @brief Remove performance monitoring object + * + * @param[in] perfmon_id Performance monitoring id + * + * @return #SAI_STATUS_SUCCESS on success, failure status code on error + */ +typedef sai_status_t (*sai_remove_perfmon_fn)( + _In_ sai_object_id_t perfmon_id); + +/** + * @brief Set performance monitoring attribute + * + * @param[in] perfmon_id Performance monitoring id + * @param[in] attr Attribute + * + * @return #SAI_STATUS_SUCCESS on success, failure status code on error + */ +typedef sai_status_t (*sai_set_perfmon_attribute_fn)( + _In_ sai_object_id_t perfmon_id, + _In_ const sai_attribute_t *attr); + +/** + * @brief Get Performance Monitoring attribute + * + * @param[in] perfmon_id Performance monitoring ID + * @param[in] attr_count Number of attributes + * @param[inout] attr_list Array of attributes + * + * @return #SAI_STATUS_SUCCESS on success, failure status code on error + */ +typedef sai_status_t (*sai_get_perfmon_attribute_fn)( + _In_ sai_object_id_t perfmon_id, + _In_ uint32_t attr_count, + _Inout_ sai_attribute_t *attr_list); + +/** + * @brief Performance Monitoring API methods table retrieved with sai_api_query() + */ +typedef struct _sai_perfmon_api_t +{ + /** + * @brief SAI Performance Monitoring API set + */ + sai_create_perfmon_fn create_perfmon; + sai_remove_perfmon_fn remove_perfmon; + sai_set_perfmon_attribute_fn set_perfmon_attribute; + sai_get_perfmon_attribute_fn get_perfmon_attribute; +} sai_perfmon_api_t; + +/** + * @} + */ +#endif /** __SAIPERFMON_H_ */ diff --git a/inc/saiswitch.h b/inc/saiswitch.h index 64072c22c..d92b1c2d8 100644 --- a/inc/saiswitch.h +++ b/inc/saiswitch.h @@ -3598,6 +3598,16 @@ typedef enum _sai_switch_attr_t */ SAI_SWITCH_ATTR_PTP_SYNTONIZE_ADJUST, + /** + * @brief Performance Monitoring enabled on the switch + * + * @type sai_object_list_t + * @flags CREATE_AND_SET + * @objects SAI_OBJECT_TYPE_PERFMON + * @default empty + */ + SAI_SWITCH_ATTR_PERFMON_LIST, + /** * @brief End of attributes */ diff --git a/inc/saitypes.h b/inc/saitypes.h index 287a9e0da..3d3c6a7a5 100644 --- a/inc/saitypes.h +++ b/inc/saitypes.h @@ -306,6 +306,7 @@ typedef enum _sai_object_type_t SAI_OBJECT_TYPE_VIRTUAL_CHANNEL = 116, SAI_OBJECT_TYPE_CBFC_CREDIT_POOL = 117, SAI_OBJECT_TYPE_CBFC_CREDIT_PROFILE = 118, + SAI_OBJECT_TYPE_PERFMON = 119, /** Must remain in last position */ SAI_OBJECT_TYPE_MAX,