diff --git a/src/uct/ib/base/ib_device.h b/src/uct/ib/base/ib_device.h index 907abd13d0e..81d01c8f7df 100644 --- a/src/uct/ib/base/ib_device.h +++ b/src/uct/ib/base/ib_device.h @@ -129,7 +129,10 @@ enum { /* Used for IB link layer. */ UCT_IB_ADDRESS_FLAG_SUBNET64 = UCS_BIT(5), /* Used for IB link layer. */ - UCT_IB_ADDRESS_FLAG_IF_ID = UCS_BIT(6) + UCT_IB_ADDRESS_FLAG_IF_ID = UCS_BIT(6), + + /* Traffic class value, used for both ETH or IB link layer. */ + UCT_IB_ADDRESS_FLAG_TRAFFIC_CLASS = UCS_BIT(7) }; diff --git a/src/uct/ib/base/ib_iface.c b/src/uct/ib/base/ib_iface.c index 31428345c99..f368a46cfd7 100644 --- a/src/uct/ib/base/ib_iface.c +++ b/src/uct/ib/base/ib_iface.c @@ -353,7 +353,11 @@ static inline uct_ib_roce_version_t uct_ib_address_flags_get_roce_version(uint8_t flags) { ucs_assert(flags & UCT_IB_ADDRESS_FLAG_LINK_LAYER_ETH); - return (uct_ib_roce_version_t)(flags >> ucs_ilog2(UCT_IB_ADDRESS_FLAG_ETH_LAST)); + /* Mask out the traffic class flag (bit 7) which is above the RoCE version + * bits (5-6) to avoid corrupting the extracted version */ + return (uct_ib_roce_version_t)((flags & + ~UCT_IB_ADDRESS_FLAG_TRAFFIC_CLASS) >> + ucs_ilog2(UCT_IB_ADDRESS_FLAG_ETH_LAST)); } static inline sa_family_t @@ -408,6 +412,10 @@ size_t uct_ib_address_size(const uct_ib_address_pack_params_t *params) size += sizeof(uint16_t); } + if (params->flags & UCT_IB_ADDRESS_PACK_FLAG_TRAFFIC_CLASS) { + size += sizeof(uint8_t); + } + return size; } @@ -486,6 +494,11 @@ void uct_ib_address_pack(const uct_ib_address_pack_params_t *params, ib_addr->flags |= UCT_IB_ADDRESS_FLAG_PKEY; *ucs_serialize_next(&ptr, uint16_t) = params->pkey; } + + if (params->flags & UCT_IB_ADDRESS_PACK_FLAG_TRAFFIC_CLASS) { + ib_addr->flags |= UCT_IB_ADDRESS_FLAG_TRAFFIC_CLASS; + *ucs_serialize_next(&ptr, uint8_t) = params->traffic_class; + } } unsigned uct_ib_iface_address_pack_flags(uct_ib_iface_t *iface) @@ -512,6 +525,10 @@ unsigned uct_ib_iface_address_pack_flags(uct_ib_iface_t *iface) pack_flags |= UCT_IB_ADDRESS_PACK_FLAG_PATH_MTU; } + if (iface->config.traffic_class != 0) { + pack_flags |= UCT_IB_ADDRESS_PACK_FLAG_TRAFFIC_CLASS; + } + return pack_flags; } @@ -529,14 +546,15 @@ void uct_ib_iface_address_pack(uct_ib_iface_t *iface, uct_ib_address_t *ib_addr) { uct_ib_address_pack_params_t params; - params.flags = uct_ib_iface_address_pack_flags(iface); - params.gid = iface->gid_info.gid; - params.lid = uct_ib_iface_port_attr(iface)->lid; - params.roce_info = iface->gid_info.roce_info; - params.path_mtu = iface->config.path_mtu; + params.flags = uct_ib_iface_address_pack_flags(iface); + params.gid = iface->gid_info.gid; + params.lid = uct_ib_iface_port_attr(iface)->lid; + params.roce_info = iface->gid_info.roce_info; + params.path_mtu = iface->config.path_mtu; /* to suppress gcc 4.3.4 warning */ - params.gid_index = UCT_IB_ADDRESS_INVALID_GID_INDEX; - params.pkey = iface->pkey; + params.gid_index = UCT_IB_ADDRESS_INVALID_GID_INDEX; + params.pkey = iface->pkey; + params.traffic_class = iface->config.traffic_class; uct_ib_address_pack(¶ms, ib_addr); } @@ -610,6 +628,11 @@ ucs_status_t uct_ib_address_unpack(const uct_ib_address_t *ib_addr, /* PKEY is always in params */ params.flags |= UCT_IB_ADDRESS_PACK_FLAG_PKEY; + if (ib_addr->flags & UCT_IB_ADDRESS_FLAG_TRAFFIC_CLASS) { + params.traffic_class = *ucs_serialize_next(&ptr, const uint8_t); + params.flags |= UCT_IB_ADDRESS_PACK_FLAG_TRAFFIC_CLASS; + } + *params_p = params; return UCS_OK; } @@ -1125,6 +1148,15 @@ uct_ib_iface_fill_ah_attr_from_addr(uct_ib_iface_t *iface, lid = (flid == 0) ? params.lid : flid; uct_ib_iface_fill_ah_attr_from_gid_lid(iface, lid, gid, params.gid_index, path_index, ah_attr); + + /* If the remote address carries a traffic class and the local interface + * does not have one configured, apply the remote value so that both + * directions of the connection use the same traffic class. */ + if ((params.flags & UCT_IB_ADDRESS_PACK_FLAG_TRAFFIC_CLASS) && + (iface->config.traffic_class == 0)) { + ah_attr->grh.traffic_class = params.traffic_class; + } + return UCS_OK; } diff --git a/src/uct/ib/base/ib_iface.h b/src/uct/ib/base/ib_iface.h index 717a9271b2c..44f36cde69a 100644 --- a/src/uct/ib/base/ib_iface.h +++ b/src/uct/ib/base/ib_iface.h @@ -105,7 +105,8 @@ enum { UCT_IB_ADDRESS_PACK_FLAG_SUBNET_PREFIX = UCS_BIT(2), UCT_IB_ADDRESS_PACK_FLAG_PATH_MTU = UCS_BIT(3), UCT_IB_ADDRESS_PACK_FLAG_GID_INDEX = UCS_BIT(4), - UCT_IB_ADDRESS_PACK_FLAG_PKEY = UCS_BIT(5) + UCT_IB_ADDRESS_PACK_FLAG_PKEY = UCS_BIT(5), + UCT_IB_ADDRESS_PACK_FLAG_TRAFFIC_CLASS = UCS_BIT(6) }; @@ -147,6 +148,9 @@ typedef struct uct_ib_address_pack_params { /* PKEY value, must be valid if @ref UCT_IB_ADDRESS_PACK_FLAG_PKEY is set. */ uint16_t pkey; + /* Traffic class value, + must be valid if @ref UCT_IB_ADDRESS_PACK_FLAG_TRAFFIC_CLASS is set. */ + uint8_t traffic_class; } uct_ib_address_pack_params_t; diff --git a/src/uct/ib/mlx5/rc/rc_mlx5_devx.c b/src/uct/ib/mlx5/rc/rc_mlx5_devx.c index d6c7dc12582..6702228a0eb 100644 --- a/src/uct/ib/mlx5/rc/rc_mlx5_devx.c +++ b/src/uct/ib/mlx5/rc/rc_mlx5_devx.c @@ -434,7 +434,7 @@ ucs_status_t uct_rc_mlx5_iface_common_devx_connect_qp( UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.udp_sport, ah_attr->dlid); UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.dscp, - uct_ib_iface_roce_dscp(&iface->super.super)); + ah_attr->grh.traffic_class >> 2); } uct_ib_mlx5_devx_set_qpc_port_affinity(md, path_index, qpc, @@ -457,7 +457,7 @@ ucs_status_t uct_rc_mlx5_iface_common_devx_connect_qp( UCT_IB_MLX5DV_FLD_SZ_BYTES(qpc, primary_address_path.rgid_rip)); /* TODO add flow_label support */ UCT_IB_MLX5DV_SET(qpc, qpc, primary_address_path.tclass, - iface->super.super.config.traffic_class); + ah_attr->grh.traffic_class); } } diff --git a/test/gtest/uct/ib/test_ib.cc b/test/gtest/uct/ib/test_ib.cc index 80db6e99651..c41b3c042dd 100644 --- a/test/gtest/uct/ib/test_ib.cc +++ b/test/gtest/uct/ib/test_ib.cc @@ -102,8 +102,9 @@ class test_uct_ib_addr : public test_uct_ib { pack_params.lid = lid_in; pack_params.roce_info = iface->gid_info.roce_info; pack_params.path_mtu = iface->config.path_mtu; - pack_params.gid_index = std::numeric_limits::max(); - pack_params.pkey = iface->pkey; + pack_params.gid_index = std::numeric_limits::max(); + pack_params.pkey = iface->pkey; + pack_params.traffic_class = iface->config.traffic_class; address_size = uct_ib_address_size(&pack_params); ib_addr = (uct_ib_address_t*)malloc(address_size); uct_ib_address_pack(&pack_params, ib_addr);