diff --git a/src/ucp/api/ucp.h b/src/ucp/api/ucp.h index 7dbcf68f644..5d81a2bffd7 100644 --- a/src/ucp/api/ucp.h +++ b/src/ucp/api/ucp.h @@ -4214,7 +4214,8 @@ enum ucp_ep_attr_field { UCP_EP_ATTR_FIELD_LOCAL_SOCKADDR = UCS_BIT(1), /**< Sockaddr used by the endpoint */ UCP_EP_ATTR_FIELD_REMOTE_SOCKADDR = UCS_BIT(2), /**< Sockaddr the endpoint is connected to */ UCP_EP_ATTR_FIELD_TRANSPORTS = UCS_BIT(3), /**< Transport and device used by endpoint */ - UCP_EP_ATTR_FIELD_USER_DATA = UCS_BIT(4) /**< User data associated with the endpoint */ + UCP_EP_ATTR_FIELD_USER_DATA = UCS_BIT(4), /**< User data associated with the endpoint */ + UCP_EP_ATTR_FIELD_ESTIMATED_BW = UCS_BIT(5) /**< Estimated bandwidth for a memory type pair */ }; @@ -4268,6 +4269,21 @@ typedef struct ucp_ep_attr { * @ref ucp_ep_params_t::user_data. */ void *user_data; + + /** + * Estimated bandwidth (in bytes/second) for a given pair of local and + * remote memory types. The caller sets @a local_mem_type and + * @a remote_mem_type before calling @ref ucp_ep_query, and the + * implementation fills in @a bandwidth with the aggregate estimated + * bandwidth across the endpoint's data lanes that support the requested + * memory types. + */ + struct { + ucs_memory_type_t local_mem_type; /**< [in] Local memory type */ + ucs_memory_type_t remote_mem_type; /**< [in] Remote memory type */ + double bandwidth; /**< [out] Estimated bandwidth + (bytes/second) */ + } estimated_bw; } ucp_ep_attr_t; diff --git a/src/ucp/core/ucp_ep.c b/src/ucp/core/ucp_ep.c index 98382548d26..09cfb8a266e 100644 --- a/src/ucp/core/ucp_ep.c +++ b/src/ucp/core/ucp_ep.c @@ -3976,6 +3976,47 @@ ucs_status_t ucp_ep_query_sockaddr(ucp_ep_h ep, ucp_ep_attr_t *attr) return UCS_OK; } +static void ucp_ep_query_estimated_bw(ucp_ep_h ep, ucp_ep_attr_t *attr) +{ + ucp_worker_h worker = ep->worker; + ucp_context_h context = worker->context; + ucp_ep_config_t *config = ucp_ep_config(ep); + ucs_memory_type_t local_mt = attr->estimated_bw.local_mem_type; + double total_bw = 0.0; + ucp_lane_index_t lane; + ucp_rsc_index_t rsc_index; + ucp_md_index_t md_index; + const uct_md_attr_v2_t *md_attr; + ucp_worker_iface_t *wiface; + + for (lane = 0; lane < config->key.num_lanes; ++lane) { + if (lane == config->key.cm_lane) { + continue; + } + + rsc_index = config->key.lanes[lane].rsc_index; + if (rsc_index == UCP_NULL_RESOURCE) { + continue; + } + + md_index = config->md_index[lane]; + md_attr = &context->tl_mds[md_index].attr; + + /* Check if the lane's memory domain can register the requested local + * memory type. Host memory is always assumed to be supported. */ + if ((local_mt != UCS_MEMORY_TYPE_HOST) && + !(md_attr->reg_mem_types & UCS_BIT(local_mt)) && + !(md_attr->access_mem_types & UCS_BIT(local_mt))) { + continue; + } + + wiface = ucp_worker_iface(worker, rsc_index); + total_bw += ucp_wireup_iface_bw_distance(wiface); + } + + attr->estimated_bw.bandwidth = total_bw; +} + ucs_status_t ucp_ep_query(ucp_ep_h ep, ucp_ep_attr_t *attr) { ucs_status_t status; @@ -4007,6 +4048,10 @@ ucs_status_t ucp_ep_query(ucp_ep_h ep, ucp_ep_attr_t *attr) attr->user_data = ep->flags & UCP_EP_FLAG_USER_DATA_PARAM ? ep->ext->user_data : NULL; } + if (attr->field_mask & UCP_EP_ATTR_FIELD_ESTIMATED_BW) { + ucp_ep_query_estimated_bw(ep, attr); + } + return UCS_OK; }