From fddaa2bd053b43e798176534befc738576a38fb8 Mon Sep 17 00:00:00 2001 From: Nathan Date: Mon, 30 Mar 2026 22:15:16 -0400 Subject: [PATCH 1/2] UCT/IB: Propagate traffic class in IB address for symmetric QoS Pack the traffic class into the IB address exchanged during connection setup so the remote side can apply it when the local interface has no traffic class configured. This fixes one-directional DSCP/TC when UCX_IB_TRAFFIC_CLASS is set, ensuring both sides of an RC connection use the same traffic class value. Also update the DevX QP connect path to read traffic class from ah_attr (which may carry the remote value) instead of always using the local iface config. Closes #10325 --- src/uct/ib/base/ib_device.h | 5 +++- src/uct/ib/base/ib_iface.c | 42 +++++++++++++++++++++++++------ src/uct/ib/base/ib_iface.h | 6 ++++- src/uct/ib/mlx5/rc/rc_mlx5_devx.c | 4 +-- 4 files changed, 46 insertions(+), 11 deletions(-) diff --git a/src/uct/ib/base/ib_device.h b/src/uct/ib/base/ib_device.h index 907abd13d0e..81d01c8f7df 100644 --- a/src/uct/ib/base/ib_device.h +++ b/src/uct/ib/base/ib_device.h @@ -129,7 +129,10 @@ enum { /* Used for IB link layer. */ UCT_IB_ADDRESS_FLAG_SUBNET64 = UCS_BIT(5), /* Used for IB link layer. */ - UCT_IB_ADDRESS_FLAG_IF_ID = UCS_BIT(6) + UCT_IB_ADDRESS_FLAG_IF_ID = UCS_BIT(6), + + /* Traffic class value, used for both ETH or IB link layer. */ + UCT_IB_ADDRESS_FLAG_TRAFFIC_CLASS = UCS_BIT(7) }; diff --git a/src/uct/ib/base/ib_iface.c b/src/uct/ib/base/ib_iface.c index 31428345c99..0be020aed48 100644 --- a/src/uct/ib/base/ib_iface.c +++ b/src/uct/ib/base/ib_iface.c @@ -408,6 +408,10 @@ size_t uct_ib_address_size(const uct_ib_address_pack_params_t *params) size += sizeof(uint16_t); } + if (params->flags & UCT_IB_ADDRESS_PACK_FLAG_TRAFFIC_CLASS) { + size += sizeof(uint8_t); + } + return size; } @@ -486,6 +490,11 @@ void uct_ib_address_pack(const uct_ib_address_pack_params_t *params, ib_addr->flags |= UCT_IB_ADDRESS_FLAG_PKEY; *ucs_serialize_next(&ptr, uint16_t) = params->pkey; } + + if (params->flags & UCT_IB_ADDRESS_PACK_FLAG_TRAFFIC_CLASS) { + ib_addr->flags |= UCT_IB_ADDRESS_FLAG_TRAFFIC_CLASS; + *ucs_serialize_next(&ptr, uint8_t) = params->traffic_class; + } } unsigned uct_ib_iface_address_pack_flags(uct_ib_iface_t *iface) @@ -512,6 +521,10 @@ unsigned uct_ib_iface_address_pack_flags(uct_ib_iface_t *iface) pack_flags |= UCT_IB_ADDRESS_PACK_FLAG_PATH_MTU; } + if (iface->config.traffic_class != 0) { + pack_flags |= UCT_IB_ADDRESS_PACK_FLAG_TRAFFIC_CLASS; + } + return pack_flags; } @@ -529,14 +542,15 @@ void uct_ib_iface_address_pack(uct_ib_iface_t *iface, uct_ib_address_t *ib_addr) { uct_ib_address_pack_params_t params; - params.flags = uct_ib_iface_address_pack_flags(iface); - params.gid = iface->gid_info.gid; - params.lid = uct_ib_iface_port_attr(iface)->lid; - params.roce_info = iface->gid_info.roce_info; - params.path_mtu = iface->config.path_mtu; + params.flags = uct_ib_iface_address_pack_flags(iface); + params.gid = iface->gid_info.gid; + params.lid = uct_ib_iface_port_attr(iface)->lid; + params.roce_info = iface->gid_info.roce_info; + params.path_mtu = iface->config.path_mtu; /* to suppress gcc 4.3.4 warning */ - params.gid_index = UCT_IB_ADDRESS_INVALID_GID_INDEX; - params.pkey = iface->pkey; + params.gid_index = UCT_IB_ADDRESS_INVALID_GID_INDEX; + params.pkey = iface->pkey; + params.traffic_class = iface->config.traffic_class; uct_ib_address_pack(¶ms, ib_addr); } @@ -610,6 +624,11 @@ ucs_status_t uct_ib_address_unpack(const uct_ib_address_t *ib_addr, /* PKEY is always in params */ params.flags |= UCT_IB_ADDRESS_PACK_FLAG_PKEY; + if (ib_addr->flags & UCT_IB_ADDRESS_FLAG_TRAFFIC_CLASS) { + params.traffic_class = *ucs_serialize_next(&ptr, const uint8_t); + params.flags |= UCT_IB_ADDRESS_PACK_FLAG_TRAFFIC_CLASS; + } + *params_p = params; return UCS_OK; } @@ -1125,6 +1144,15 @@ uct_ib_iface_fill_ah_attr_from_addr(uct_ib_iface_t *iface, lid = (flid == 0) ? params.lid : flid; uct_ib_iface_fill_ah_attr_from_gid_lid(iface, lid, gid, params.gid_index, path_index, ah_attr); + + /* If the remote address carries a traffic class and the local interface + * does not have one configured, apply the remote value so that both + * directions of the connection use the same traffic class. */ + if ((params.flags & UCT_IB_ADDRESS_PACK_FLAG_TRAFFIC_CLASS) && + (iface->config.traffic_class == 0)) { + ah_attr->grh.traffic_class = params.traffic_class; + } + return UCS_OK; } diff --git a/src/uct/ib/base/ib_iface.h b/src/uct/ib/base/ib_iface.h index 717a9271b2c..44f36cde69a 100644 --- a/src/uct/ib/base/ib_iface.h +++ b/src/uct/ib/base/ib_iface.h @@ -105,7 +105,8 @@ enum { UCT_IB_ADDRESS_PACK_FLAG_SUBNET_PREFIX = UCS_BIT(2), UCT_IB_ADDRESS_PACK_FLAG_PATH_MTU = UCS_BIT(3), UCT_IB_ADDRESS_PACK_FLAG_GID_INDEX = UCS_BIT(4), - UCT_IB_ADDRESS_PACK_FLAG_PKEY = UCS_BIT(5) + UCT_IB_ADDRESS_PACK_FLAG_PKEY = UCS_BIT(5), + UCT_IB_ADDRESS_PACK_FLAG_TRAFFIC_CLASS = UCS_BIT(6) }; @@ -147,6 +148,9 @@ typedef struct uct_ib_address_pack_params { /* PKEY value, must be valid if @ref UCT_IB_ADDRESS_PACK_FLAG_PKEY is set. */ uint16_t pkey; + /* Traffic class value, + must be valid if @ref UCT_IB_ADDRESS_PACK_FLAG_TRAFFIC_CLASS is set. */ + uint8_t traffic_class; } uct_ib_address_pack_params_t; diff --git a/src/uct/ib/mlx5/rc/rc_mlx5_devx.c b/src/uct/ib/mlx5/rc/rc_mlx5_devx.c index d6c7dc12582..6702228a0eb 100644 --- a/src/uct/ib/mlx5/rc/rc_mlx5_devx.c +++ b/src/uct/ib/mlx5/rc/rc_mlx5_devx.c @@ -434,7 +434,7 @@ ucs_status_t uct_rc_mlx5_iface_common_devx_connect_qp( UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.udp_sport, ah_attr->dlid); UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.dscp, - uct_ib_iface_roce_dscp(&iface->super.super)); + ah_attr->grh.traffic_class >> 2); } uct_ib_mlx5_devx_set_qpc_port_affinity(md, path_index, qpc, @@ -457,7 +457,7 @@ ucs_status_t uct_rc_mlx5_iface_common_devx_connect_qp( UCT_IB_MLX5DV_FLD_SZ_BYTES(qpc, primary_address_path.rgid_rip)); /* TODO add flow_label support */ UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.tclass, - iface->super.super.config.traffic_class); + ah_attr->grh.traffic_class); } } From 8962d27d7e2b58878d198dc7887d479b5c3d46b5 Mon Sep 17 00:00:00 2001 From: Nathan Date: Mon, 30 Mar 2026 22:15:16 -0400 Subject: [PATCH 2/2] UCT/IB: Fix RoCE version extraction with traffic class flag The UCT_IB_ADDRESS_FLAG_TRAFFIC_CLASS at BIT(7) overlaps with the RoCE version bits in the flags byte. The version extraction does flags >> 5, which includes bit 7, corrupting the RoCE version when traffic class is packed. Mask out the traffic class flag before extracting the version. Also initialize traffic_class in test pack_params to avoid reading uninitialized memory when the traffic class flag is set. --- src/uct/ib/base/ib_iface.c | 6 +++++- test/gtest/uct/ib/test_ib.cc | 5 +++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/uct/ib/base/ib_iface.c b/src/uct/ib/base/ib_iface.c index 0be020aed48..f368a46cfd7 100644 --- a/src/uct/ib/base/ib_iface.c +++ b/src/uct/ib/base/ib_iface.c @@ -353,7 +353,11 @@ static inline uct_ib_roce_version_t uct_ib_address_flags_get_roce_version(uint8_t flags) { ucs_assert(flags & UCT_IB_ADDRESS_FLAG_LINK_LAYER_ETH); - return (uct_ib_roce_version_t)(flags >> ucs_ilog2(UCT_IB_ADDRESS_FLAG_ETH_LAST)); + /* Mask out the traffic class flag (bit 7) which is above the RoCE version + * bits (5-6) to avoid corrupting the extracted version */ + return (uct_ib_roce_version_t)((flags & + ~UCT_IB_ADDRESS_FLAG_TRAFFIC_CLASS) >> + ucs_ilog2(UCT_IB_ADDRESS_FLAG_ETH_LAST)); } static inline sa_family_t diff --git a/test/gtest/uct/ib/test_ib.cc b/test/gtest/uct/ib/test_ib.cc index 80db6e99651..c41b3c042dd 100644 --- a/test/gtest/uct/ib/test_ib.cc +++ b/test/gtest/uct/ib/test_ib.cc @@ -102,8 +102,9 @@ class test_uct_ib_addr : public test_uct_ib { pack_params.lid = lid_in; pack_params.roce_info = iface->gid_info.roce_info; pack_params.path_mtu = iface->config.path_mtu; - pack_params.gid_index = std::numeric_limits::max(); - pack_params.pkey = iface->pkey; + pack_params.gid_index = std::numeric_limits::max(); + pack_params.pkey = iface->pkey; + pack_params.traffic_class = iface->config.traffic_class; address_size = uct_ib_address_size(&pack_params); ib_addr = (uct_ib_address_t*)malloc(address_size); uct_ib_address_pack(&pack_params, ib_addr);