Skip to content

Commit f0c4973

Browse files
committed
bnxt_re/lib: Optimize the inline data copy
Inline data is currently copied in 16-byte chunks to avoid ring-buffer wraparound. Wraparound is rare for typical post_send usage, so when it does not occur we can copy the whole inline payload in one go. Implements a mechanism to handle wraparound and non-wraparound cases separately. Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
1 parent 8b9cdb7 commit f0c4973

1 file changed

Lines changed: 60 additions & 34 deletions

File tree

providers/bnxt_re/verbs.c

Lines changed: 60 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2260,50 +2260,76 @@ static int bnxt_re_put_inline(struct bnxt_re_queue *que, uint32_t *idx,
22602260
struct ibv_sge *sgl, uint32_t nsg,
22612261
uint16_t max_ils)
22622262
{
2263-
int len, t_len, offt = 0;
2264-
int t_cplen = 0, cplen;
2265-
bool pull_dst = true;
2266-
void *il_dst = NULL;
2267-
void *il_src = NULL;
2268-
int alsize;
2263+
const int alsize = sizeof(struct bnxt_re_sge);
2264+
uint32_t n_slots, start_phys, slots_nowrap;
2265+
uint32_t sge_idx, sge_off;
2266+
int copied, cplen, len;
2267+
int nowrap_bytes;
2268+
int t_len = 0;
2269+
void *il_dst;
22692270
int indx;
22702271

2271-
alsize = sizeof(struct bnxt_re_sge);
2272-
2273-
t_len = 0;
22742272
for (indx = 0; indx < nsg; indx++) {
2275-
len = sgl[indx].length;
2276-
il_src = (void *)(uintptr_t)(sgl[indx].addr);
2277-
t_len += len;
2273+
t_len += sgl[indx].length;
22782274
if (t_len > max_ils)
2279-
goto bad;
2275+
return -ENOMEM;
2276+
}
22802277

2281-
while (len) {
2282-
if (pull_dst) {
2283-
pull_dst = false;
2284-
il_dst = bnxt_re_get_hwqe(que, (*idx)++);
2285-
if (pbuf)
2286-
pbuf->wqe[*idx - 1] =
2287-
(uintptr_t)il_dst;
2288-
t_cplen = 0;
2289-
offt = 0;
2290-
}
2291-
cplen = MIN(len, alsize);
2292-
cplen = MIN(cplen, (alsize - offt));
2293-
memcpy(il_dst, il_src, cplen);
2294-
t_cplen += cplen;
2295-
il_src += cplen;
2278+
n_slots = (t_len + alsize - 1) / alsize;
2279+
start_phys = (que->tail + *idx) % que->depth;
2280+
slots_nowrap = que->depth - start_phys;
2281+
2282+
/* Record push buffer slots for all slots we use */
2283+
if (pbuf) {
2284+
for (indx = 0; indx < n_slots; indx++)
2285+
pbuf->wqe[*idx + indx] =
2286+
(uintptr_t)bnxt_re_get_hwqe(que, *idx + indx);
2287+
}
2288+
2289+
il_dst = bnxt_re_get_hwqe(que, *idx);
2290+
2291+
if (n_slots <= slots_nowrap) {
2292+
/* No wraparound: copy each SGE in one shot */
2293+
for (indx = 0; indx < nsg; indx++) {
2294+
memcpy(il_dst, (void *)(uintptr_t)sgl[indx].addr,
2295+
sgl[indx].length);
2296+
il_dst += sgl[indx].length;
2297+
}
2298+
} else {
2299+
/* Wraparound: copy first part to end of ring, rest from start */
2300+
nowrap_bytes = slots_nowrap * alsize;
2301+
copied = 0;
2302+
sge_idx = 0;
2303+
sge_off = 0;
2304+
while (copied < nowrap_bytes) {
2305+
len = sgl[sge_idx].length - sge_off;
2306+
cplen = len <= (nowrap_bytes - copied) ?
2307+
len : (nowrap_bytes - copied);
2308+
memcpy(il_dst,
2309+
(char *)(uintptr_t)sgl[sge_idx].addr + sge_off,
2310+
cplen);
22962311
il_dst += cplen;
2297-
offt += cplen;
2298-
len -= cplen;
2299-
if (t_cplen == alsize)
2300-
pull_dst = true;
2312+
copied += cplen;
2313+
sge_off += cplen;
2314+
if (sge_off == sgl[sge_idx].length) {
2315+
sge_idx++;
2316+
sge_off = 0;
2317+
}
2318+
}
2319+
il_dst = que->va;
2320+
while (sge_idx < nsg) {
2321+
len = sgl[sge_idx].length - sge_off;
2322+
memcpy(il_dst,
2323+
(char *)(uintptr_t)sgl[sge_idx].addr + sge_off,
2324+
len);
2325+
il_dst += len;
2326+
sge_off = 0;
2327+
sge_idx++;
23012328
}
23022329
}
23032330

2331+
*idx += n_slots;
23042332
return t_len;
2305-
bad:
2306-
return -ENOMEM;
23072333
}
23082334

23092335
static int bnxt_re_required_slots(struct bnxt_re_qp *qp, struct ibv_send_wr *wr,

0 commit comments

Comments
 (0)