Skip to content

Commit e5608da

Browse files
authored
Merge pull request #1704 from selvintxavier/inline_perf
bnxt_re/lib: Optimize the inline data copy
2 parents cfedf9e + f0c4973 commit e5608da

1 file changed

Lines changed: 60 additions & 34 deletions

File tree

providers/bnxt_re/verbs.c

Lines changed: 60 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2274,50 +2274,76 @@ static int bnxt_re_put_inline(struct bnxt_re_queue *que, uint32_t *idx,
22742274
struct ibv_sge *sgl, uint32_t nsg,
22752275
uint16_t max_ils)
22762276
{
2277-
int len, t_len, offt = 0;
2278-
int t_cplen = 0, cplen;
2279-
bool pull_dst = true;
2280-
void *il_dst = NULL;
2281-
void *il_src = NULL;
2282-
int alsize;
2277+
const int alsize = sizeof(struct bnxt_re_sge);
2278+
uint32_t n_slots, start_phys, slots_nowrap;
2279+
uint32_t sge_idx, sge_off;
2280+
int copied, cplen, len;
2281+
int nowrap_bytes;
2282+
int t_len = 0;
2283+
void *il_dst;
22832284
int indx;
22842285

2285-
alsize = sizeof(struct bnxt_re_sge);
2286-
2287-
t_len = 0;
22882286
for (indx = 0; indx < nsg; indx++) {
2289-
len = sgl[indx].length;
2290-
il_src = (void *)(uintptr_t)(sgl[indx].addr);
2291-
t_len += len;
2287+
t_len += sgl[indx].length;
22922288
if (t_len > max_ils)
2293-
goto bad;
2289+
return -ENOMEM;
2290+
}
22942291

2295-
while (len) {
2296-
if (pull_dst) {
2297-
pull_dst = false;
2298-
il_dst = bnxt_re_get_hwqe(que, (*idx)++);
2299-
if (pbuf)
2300-
pbuf->wqe[*idx - 1] =
2301-
(uintptr_t)il_dst;
2302-
t_cplen = 0;
2303-
offt = 0;
2304-
}
2305-
cplen = MIN(len, alsize);
2306-
cplen = MIN(cplen, (alsize - offt));
2307-
memcpy(il_dst, il_src, cplen);
2308-
t_cplen += cplen;
2309-
il_src += cplen;
2292+
n_slots = (t_len + alsize - 1) / alsize;
2293+
start_phys = (que->tail + *idx) % que->depth;
2294+
slots_nowrap = que->depth - start_phys;
2295+
2296+
/* Record push buffer slots for all slots we use */
2297+
if (pbuf) {
2298+
for (indx = 0; indx < n_slots; indx++)
2299+
pbuf->wqe[*idx + indx] =
2300+
(uintptr_t)bnxt_re_get_hwqe(que, *idx + indx);
2301+
}
2302+
2303+
il_dst = bnxt_re_get_hwqe(que, *idx);
2304+
2305+
if (n_slots <= slots_nowrap) {
2306+
/* No wraparound: copy each SGE in one shot */
2307+
for (indx = 0; indx < nsg; indx++) {
2308+
memcpy(il_dst, (void *)(uintptr_t)sgl[indx].addr,
2309+
sgl[indx].length);
2310+
il_dst += sgl[indx].length;
2311+
}
2312+
} else {
2313+
/* Wraparound: copy first part to end of ring, rest from start */
2314+
nowrap_bytes = slots_nowrap * alsize;
2315+
copied = 0;
2316+
sge_idx = 0;
2317+
sge_off = 0;
2318+
while (copied < nowrap_bytes) {
2319+
len = sgl[sge_idx].length - sge_off;
2320+
cplen = len <= (nowrap_bytes - copied) ?
2321+
len : (nowrap_bytes - copied);
2322+
memcpy(il_dst,
2323+
(char *)(uintptr_t)sgl[sge_idx].addr + sge_off,
2324+
cplen);
23102325
il_dst += cplen;
2311-
offt += cplen;
2312-
len -= cplen;
2313-
if (t_cplen == alsize)
2314-
pull_dst = true;
2326+
copied += cplen;
2327+
sge_off += cplen;
2328+
if (sge_off == sgl[sge_idx].length) {
2329+
sge_idx++;
2330+
sge_off = 0;
2331+
}
2332+
}
2333+
il_dst = que->va;
2334+
while (sge_idx < nsg) {
2335+
len = sgl[sge_idx].length - sge_off;
2336+
memcpy(il_dst,
2337+
(char *)(uintptr_t)sgl[sge_idx].addr + sge_off,
2338+
len);
2339+
il_dst += len;
2340+
sge_off = 0;
2341+
sge_idx++;
23152342
}
23162343
}
23172344

2345+
*idx += n_slots;
23182346
return t_len;
2319-
bad:
2320-
return -ENOMEM;
23212347
}
23222348

23232349
static int bnxt_re_required_slots(struct bnxt_re_qp *qp, struct ibv_send_wr *wr,

0 commit comments

Comments
 (0)