Skip to content

Commit 4fad336

Browse files
committed
UCP/AM: fixed CI failures and TODO comment
1 parent 7576f38 commit 4fad336

8 files changed

Lines changed: 29 additions & 55 deletions

File tree

contrib/test_efa.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ run_gtests() {
109109
IBMOCK_FILTER="$IBMOCK_FILTER:srd/test_ucp_peer_failure.*"
110110
IBMOCK_FILTER="$IBMOCK_FILTER:srd/test_ucp_perf.envelope/*"
111111
IBMOCK_FILTER="$IBMOCK_FILTER:*test_ucp_am_psn*"
112+
IBMOCK_FILTER="$IBMOCK_FILTER:*test_ucp_fault_tolerance.am_send_with*"
112113
113114
# Try the faster approach before valgrind
114115
make -C contrib/test/gtest test \

src/ucp/am/eager.inl

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,13 @@ static void ucp_am_eager_zcopy_completion(uct_completion_t *self)
5050
send.state.uct_comp);
5151

5252
ucs_assert(req->send.msg_proto.am.header.reg_desc != NULL);
53-
ucs_mpool_put_inline(req->send.msg_proto.am.header.reg_desc);
54-
req->send.msg_proto.am.header.reg_desc = NULL;
53+
54+
if (ucs_likely(req->send.state.uct_comp.status == UCS_OK) ||
55+
!ucp_ep_err_mode_eq(req->send.ep, UCP_ERR_HANDLING_MODE_FAILOVER)) {
56+
ucs_mpool_put_inline(req->send.msg_proto.am.header.reg_desc);
57+
req->send.msg_proto.am.header.reg_desc = NULL;
58+
}
59+
5560
ucp_proto_request_zcopy_completion(self);
5661
}
5762

src/ucp/am/eager_multi.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,11 @@ ucp_am_eager_multi_zcopy_send_func_common(
278278
UCS_STATIC_ASSERT(sizeof(hdr.middle) == sizeof(ucp_am_hdr_t));
279279

280280
if (ucp_proto_am_is_first_fragment(req)) {
281+
status = ucp_ep_resolve_remote_id(req->send.ep, lpriv->super.lane);
282+
if (ucs_unlikely(status != UCS_OK)) {
283+
return status;
284+
}
285+
281286
am_id = first_am_id;
282287
footer_size = sizeof(*ftr) + user_hdr_size;
283288
footer_offset = 0;

src/ucp/core/ucp_am.c

Lines changed: 13 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1899,46 +1899,32 @@ const ucp_request_send_proto_t ucp_am_reply_proto = {
18991899
.only_hdr_size = sizeof(ucp_am_hdr_t) + sizeof(ucp_am_reply_ftr_t)
19001900
};
19011901

1902-
static void ucp_am_proto_request_zcopy_reset_header(ucp_request_t *request)
1902+
ucs_status_t ucp_am_proto_request_zcopy_reset(ucp_request_t *request)
19031903
{
19041904
ucs_assert(request->send.msg_proto.am.header.reg_desc != NULL);
1905+
/* Zcopy pack function releases header if it's stored in mpool buffer
1906+
* and set copied flag to zero. Zcopy pack function is always called
1907+
* before reset function.
1908+
*/
1909+
ucs_assert(!(request->flags & UCP_REQUEST_FLAG_USER_HEADER_COPIED));
19051910

19061911
/* If user header is not guaranteed to be valid,
19071912
* use mpool buffer for storing the user header.
19081913
*/
19091914
if ((request->send.msg_proto.am.flags & UCP_AM_SEND_FLAG_COPY_HEADER) &&
19101915
(request->send.msg_proto.am.header.length != 0)) {
1911-
request->send.msg_proto.am.header.ptr = ucs_mpool_set_get_inline(
1912-
&request->send.ep->worker->am_mps,
1913-
request->send.msg_proto.am.header.length);
1914-
memcpy(request->send.msg_proto.am.header.ptr,
1915-
request->send.msg_proto.am.header.reg_desc + 1,
1916-
request->send.msg_proto.am.header.length);
1917-
request->flags |= UCP_REQUEST_FLAG_USER_HEADER_COPIED;
1916+
request->send.msg_proto.am.header.ptr = ucs_mpool_set_get_inline(
1917+
&request->send.ep->worker->am_mps,
1918+
request->send.msg_proto.am.header.length);
1919+
memcpy(request->send.msg_proto.am.header.ptr,
1920+
request->send.msg_proto.am.header.reg_desc + 1,
1921+
request->send.msg_proto.am.header.length);
1922+
request->flags |= UCP_REQUEST_FLAG_USER_HEADER_COPIED;
19181923
}
19191924

19201925
ucs_mpool_put_inline(request->send.msg_proto.am.header.reg_desc);
19211926
request->send.msg_proto.am.header.reg_desc = NULL;
19221927

1923-
}
1924-
1925-
ucs_status_t ucp_am_proto_request_zcopy_reset(ucp_request_t *request)
1926-
{
1927-
/* Zcopy pack function releases header if it's stored in mpool buffer
1928-
* and set copied flag to zero. Zcopy pack function is always called
1929-
* before reset function.
1930-
* TODO: check the statement above, in case of failover request should
1931-
* be restarted after its (error) completion but the completion
1932-
* releases header buffer.
1933-
*/
1934-
ucs_assert(!(request->flags & UCP_REQUEST_FLAG_USER_HEADER_COPIED));
1935-
1936-
if (request->send.state.uct_comp.status != UCS_OK) {
1937-
ucs_assert(request->send.msg_proto.am.header.reg_desc == NULL);
1938-
} else {
1939-
ucp_am_proto_request_zcopy_reset_header(request);
1940-
}
1941-
19421928
request->send.msg_proto.am.internal_flags &=
19431929
~UCP_REQUEST_AM_FLAG_HEADER_SENT;
19441930

src/ucp/core/ucp_ep.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1640,6 +1640,7 @@ ucp_ep_reconfig_internal(ucp_ep_h ep, ucp_lane_map_t failed_lanes)
16401640
for (lane = 0; lane < cfg_key.num_lanes; lane++) {
16411641
if (failed_lanes & UCS_BIT(lane)) {
16421642
cfg_key.lanes[lane].lane_types |= UCS_BIT(UCP_LANE_TYPE_FAILED);
1643+
/* coverity[overrun-local] */
16431644
if (cfg_key.am_lane == lane) {
16441645
cfg_key.am_lane = UCP_NULL_LANE;
16451646
}

src/ucp/proto/proto_common.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -955,7 +955,6 @@ ucs_status_t ucp_proto_offload_zcopy_reset(ucp_request_t *req)
955955
}
956956

957957
ucp_datatype_iter_rewind(&req->send.state.dt_iter, UCP_DT_MASK_ALL);
958-
//todo: check if this is needed
959958
req->flags &= ~UCP_REQUEST_FLAG_PROTO_INITIALIZED;
960959
return UCS_OK;
961960
}

src/ucp/proto/proto_multi.inl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,9 +166,9 @@ ucp_proto_multi_progress(ucp_request_t *req,
166166
ucp_proto_complete_cb_t complete_func,
167167
unsigned dt_mask)
168168
{
169-
ucp_lane_index_t lane_shift = 1;
169+
ucp_lane_index_t lane_shift = 1;
170+
ucp_datatype_iter_t next_iter = {0};
170171
const ucp_proto_multi_lane_priv_t *lpriv;
171-
ucp_datatype_iter_t next_iter;
172172
ucp_lane_index_t lane_idx;
173173
ucs_status_t status;
174174

test/gtest/ucp/test_ucp_fault_tolerance.cc

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -266,29 +266,6 @@ class test_ucp_fault_tolerance : public test_ucp_memheap {
266266
return ucs::limit_buffer_size(UCS_KBYTE);
267267
}
268268

269-
static std::string op_name(unsigned op_mask)
270-
{
271-
std::string name;
272-
273-
if (op_mask & TEST_OP_PUT) {
274-
name += "PUT|";
275-
}
276-
277-
if (op_mask & TEST_OP_GET) {
278-
name += "GET|";
279-
}
280-
281-
if (op_mask & TEST_OP_FLUSH) {
282-
name += "FLUSH|";
283-
}
284-
285-
if (!name.empty()) {
286-
name.pop_back();
287-
}
288-
289-
return name;
290-
}
291-
292269
ucs_status_t do_am_send_and_wait(ucp_ep_h ep, size_t size, bool flush_after) {
293270
m_am_received = false;
294271

0 commit comments

Comments
 (0)