Skip to content

Commit d2b71ac

Browse files
committed
feat(adapter): implement lock-free rte_ring IPC for LD_PRELOAD - FF_USE_RING_IPC
Implement dual-ring SPSC IPC model to replace sem_wait/sem_post synchronization between APP and fstack processes, controlled by FF_USE_RING_IPC compile macro. Core changes: - Makefile: Add FF_USE_RING_IPC macro with FF_PRELOAD_POLLING_MODE mutual exclusion - ff_socket_ops.h: Add ff_sc_ring_zone struct (64B cache-aligned), modify ff_so_context (sem_t -> completion+ring_zone_id+reserved), add ring IPC function declarations - ff_so_zone.c: Implement ff_create_sc_ring_zone/ff_attach_sc_ring_zone for Hugepage ring creation, conditional sem_init bypass in ring mode - ff_socket_ops.c: Add ff_handle_socket_ops_ring callback, rewrite ff_handle_each_context to O(1) rte_ring_dequeue_burst, implement ff_ring_process_requests/send_response/ dequeue_wait/alarm_wakeup helper functions - ff_hook_syscall.c: Rewrite ACQUIRE/RELEASE_ZONE_LOCK/SYSCALL macros for ring mode, implement ff_ring_submit_and_wait, add ring-mode epoll_wait/kevent with rte_rdtsc high-precision timeout, replace alarm_event_sem with ring sentinel Key design decisions: - SPSC mode (RING_F_SP_ENQ|RING_F_SC_DEQ) for zero-CAS overhead - Three configurable wait strategies: busy-poll/yield-poll/eventfd - All changes under #ifdef FF_USE_RING_IPC, original sem path fully preserved - Cache line alignment maintained for all modified structures Performance targets: syscall RTT < 1us, throughput >= 10% improvement Refs: SPEC-001 (requirements), SPEC-002 (architecture), SPEC-003 (interfaces)
1 parent 86fe212 commit d2b71ac

5 files changed

Lines changed: 613 additions & 0 deletions

File tree

adapter/syscall/Makefile

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ endif
2121
# If disable it, epoll use sem_wait.
2222
#FF_PRELOAD_POLLING_MODE=1
2323

24+
# Lock-free ring IPC mode, replace sem_wait/sem_post with rte_ring.
25+
# If enable it, FF_PRELOAD_POLLING_MODE must be disabled.
26+
#FF_USE_RING_IPC=1
27+
2428
# If enable FF_KERNEL_EVENT, epoll_create/epoll_clt/epoll_wait always call f-stack and system API at the same time.
2529
# Use for some scenarios similar to Nginx.
2630
#FF_KERNEL_EVENT=1
@@ -59,6 +63,13 @@ ifdef FF_PRELOAD_POLLING_MODE
5963
CFLAGS+= -DFF_PRELOAD_POLLING_MODE
6064
endif
6165

66+
ifdef FF_USE_RING_IPC
67+
ifdef FF_PRELOAD_POLLING_MODE
68+
$(error "FF_USE_RING_IPC and FF_PRELOAD_POLLING_MODE are mutually exclusive")
69+
endif
70+
CFLAGS+= -DFF_USE_RING_IPC
71+
endif
72+
6273
ifdef FF_USE_THREAD_STRUCT_HANDLE
6374
CFLAGS+= -DFF_USE_THREAD_STRUCT_HANDLE
6475
endif

adapter/syscall/ff_hook_syscall.c

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,12 @@
1212

1313
#include <rte_malloc.h>
1414
#include <rte_memcpy.h>
15+
#ifdef FF_USE_RING_IPC
16+
#include <rte_ring.h>
17+
#include <rte_cycles.h>
18+
#include <sys/eventfd.h>
19+
#include <sched.h>
20+
#endif
1521

1622
#include "ff_config.h"
1723
#include "ff_socket_ops.h"
@@ -111,6 +117,38 @@ static __thread int sh_iov_static_fill_idx_share = 0;
111117
} \
112118
args = name##_args;
113119

120+
#ifdef FF_USE_RING_IPC
121+
/*
122+
* Ring mode macros: ACQUIRE/RELEASE_ZONE_LOCK are not used in the
123+
* normal request path (SYSCALL macro is rewritten), but kept as
124+
* no-ops to prevent compilation errors from any residual references.
125+
*/
126+
#define ACQUIRE_ZONE_LOCK(exp) do { (void)(exp); } while (0)
127+
#define ACQUIRE_ZONE_TRY_LOCK(exp) ACQUIRE_ZONE_LOCK(exp)
128+
#define RELEASE_ZONE_LOCK(s) do { (void)(s); } while (0)
129+
130+
/*
131+
* Ring mode SYSCALL: enqueue to req_ring, wait on rsp_ring.
132+
* No spinlock, no status machine — ring operations are lock-free.
133+
*/
134+
#define SYSCALL(op, arg) do { \
135+
sc->ops = (op); \
136+
sc->args = (arg); \
137+
sc->result = 0; \
138+
sc->error = 0; \
139+
if (ff_ring_submit_and_wait(ff_so_zone->ring_zone, sc, -1) < 0) { \
140+
errno = ETIMEDOUT; \
141+
ret = -1; \
142+
} else { \
143+
ret = sc->result; \
144+
if (ret < 0) { \
145+
errno = sc->error; \
146+
} \
147+
} \
148+
} while (0)
149+
150+
#else /* !FF_USE_RING_IPC */
151+
114152
/* Dirty read first, and then try to lock sc and real read. */
115153
#define ACQUIRE_ZONE_LOCK(exp) do { \
116154
while (1) { \
@@ -159,6 +197,8 @@ static __thread int sh_iov_static_fill_idx_share = 0;
159197
RELEASE_ZONE_LOCK(FF_SC_IDLE); \
160198
} while (0)
161199

200+
#endif /* FF_USE_RING_IPC */
201+
162202
#define RETURN_NOFREE() do { \
163203
DEBUG_LOG("RETURN_NOFREE ret:%d, errno:%d\n", ret, errno); \
164204
return ret; \
@@ -245,7 +285,9 @@ static int ff_kernel_max_fd = FF_KERNEL_MAX_FD_DEFAULT;
245285
#endif
246286

247287
/* not support thread socket now */
288+
#ifndef FF_USE_RING_IPC
248289
static int need_alarm_sem = 0;
290+
#endif
249291

250292
#define count_trailing_zeros(x) __builtin_ctzll(x)
251293

@@ -2152,7 +2194,9 @@ ff_hook_epoll_wait(int epfd, struct epoll_event *events,
21522194
{
21532195
DEBUG_LOG("ff_hook_epoll_wait, epfd:%d, maxevents:%d, timeout:%d\n", epfd, maxevents, timeout);
21542196
int fd = epfd;
2197+
#ifndef FF_USE_RING_IPC
21552198
struct timespec abs_timeout;
2199+
#endif
21562200

21572201
CHECK_FD_OWNERSHIP(epoll_wait, (epfd, events, maxevents, timeout));
21582202

@@ -2190,6 +2234,7 @@ ff_hook_epoll_wait(int epfd, struct epoll_event *events,
21902234
}
21912235
}
21922236

2237+
#ifndef FF_USE_RING_IPC
21932238
if (timeout > 0) {
21942239
clock_gettime(CLOCK_REALTIME, &abs_timeout);
21952240
DEBUG_LOG("before wait, sec:%ld, nsec:%ld\n", abs_timeout.tv_sec, abs_timeout.tv_nsec);
@@ -2206,13 +2251,48 @@ ff_hook_epoll_wait(int epfd, struct epoll_event *events,
22062251
RETURN_ERROR_NOFREE(EINVAL);
22072252
}
22082253
}
2254+
#endif
22092255

22102256
args->epfd = fd;
22112257
args->events = sh_events;
22122258
args->maxevents = maxevents;
22132259
args->timeout = timeout;
22142260

22152261
RETRY:
2262+
#ifdef FF_USE_RING_IPC
2263+
{
2264+
int64_t timeout_us;
2265+
if (timeout < 0) {
2266+
timeout_us = -1; /* block forever */
2267+
} else if (timeout == 0) {
2268+
timeout_us = 0; /* non-blocking */
2269+
} else {
2270+
timeout_us = (int64_t)timeout * 1000; /* ms -> us */
2271+
}
2272+
2273+
args->epfd = fd;
2274+
args->events = sh_events;
2275+
args->maxevents = maxevents;
2276+
args->timeout = timeout;
2277+
2278+
sc->ops = FF_SO_EPOLL_WAIT;
2279+
sc->args = args;
2280+
sc->result = 0;
2281+
sc->error = 0;
2282+
errno = 0;
2283+
2284+
ret = ff_ring_submit_and_wait(ff_so_zone->ring_zone, sc, timeout_us);
2285+
2286+
if (ret == -ETIMEDOUT) {
2287+
ret = 0; /* timeout: 0 events */
2288+
} else {
2289+
ret = sc->result;
2290+
if (ret < 0) {
2291+
errno = sc->error;
2292+
}
2293+
}
2294+
}
2295+
#else /* !FF_USE_RING_IPC */
22162296
/* for timeout, Although not really effective in FreeBSD stack */
22172297
//SYSCALL(FF_SO_EPOLL_WAIT, args);
22182298
ACQUIRE_ZONE_LOCK(FF_SC_IDLE);
@@ -2296,6 +2376,7 @@ ff_hook_epoll_wait(int epfd, struct epoll_event *events,
22962376

22972377
sc->status = FF_SC_IDLE;
22982378
rte_spinlock_unlock(&sc->lock);
2379+
#endif /* !FF_USE_RING_IPC */
22992380

23002381
if (likely(ret > 0)) {
23012382
if (unlikely(ret > maxevents)) {
@@ -2512,6 +2593,33 @@ kevent(int kq, const struct kevent *changelist, int nchanges,
25122593
args->kq = kq;
25132594
args->timeout = (struct timespec *)timeout;
25142595

2596+
#ifdef FF_USE_RING_IPC
2597+
{
2598+
int64_t timeout_us;
2599+
if (timeout == NULL) {
2600+
timeout_us = -1; /* block forever */
2601+
} else {
2602+
timeout_us = timeout->tv_sec * 1000000LL + timeout->tv_nsec / 1000;
2603+
}
2604+
2605+
sc->ops = FF_SO_KEVENT;
2606+
sc->args = args;
2607+
sc->result = 0;
2608+
sc->error = 0;
2609+
errno = 0;
2610+
2611+
ret = ff_ring_submit_and_wait(ff_so_zone->ring_zone, sc, timeout_us);
2612+
2613+
if (ret == -ETIMEDOUT) {
2614+
ret = 0; /* timeout: 0 events */
2615+
} else {
2616+
ret = sc->result;
2617+
if (ret < 0) {
2618+
errno = sc->error;
2619+
}
2620+
}
2621+
}
2622+
#else /* !FF_USE_RING_IPC */
25152623
ACQUIRE_ZONE_LOCK(FF_SC_IDLE);
25162624
//rte_spinlock_lock(&sc->lock);
25172625

@@ -2583,6 +2691,7 @@ kevent(int kq, const struct kevent *changelist, int nchanges,
25832691
sc->status = FF_SC_IDLE;
25842692

25852693
rte_spinlock_unlock(&sc->lock);
2694+
#endif /* !FF_USE_RING_IPC */
25862695

25872696
if (ret > 0) {
25882697
if (eventlist && nevents) {
@@ -3236,6 +3345,12 @@ void
32363345
alarm_event_sem()
32373346
{
32383347
#ifndef FF_THREAD_SOCKET
3348+
#ifdef FF_USE_RING_IPC
3349+
/* Ring mode: wakeup APP via response ring sentinel */
3350+
if (ff_so_zone && ff_so_zone->ring_zone && sc) {
3351+
ff_ring_alarm_wakeup(ff_so_zone->ring_zone, sc);
3352+
}
3353+
#else
32393354
DEBUG_LOG("check whether need to alarm sem sc:%p, status:%d, ops:%d, need_alarm_sem:%d\n",
32403355
sc, sc->status, sc->ops, need_alarm_sem);
32413356
rte_spinlock_lock(&sc->lock);
@@ -3248,6 +3363,58 @@ alarm_event_sem()
32483363

32493364
DEBUG_LOG("finish alarm sem sc:%p, status:%d, ops:%d, need_alarm_sem:%d\n",
32503365
sc, sc->status, sc->ops, need_alarm_sem);
3366+
#endif /* FF_USE_RING_IPC */
32513367
#endif
32523368
}
32533369

3370+
#ifdef FF_USE_RING_IPC
3371+
/*
3372+
* APP side: submit request to req_ring and wait for response on rsp_ring.
3373+
*
3374+
* @param ring_zone Ring zone pointer
3375+
* @param sc so_context with ops/args already filled
3376+
* @param timeout_us Timeout in microseconds: -1=forever, 0=non-blocking, >0=wait
3377+
* @return 0 on success, -ETIMEDOUT on timeout
3378+
*/
3379+
int
3380+
ff_ring_submit_and_wait(struct ff_sc_ring_zone *ring_zone,
3381+
struct ff_so_context *sc,
3382+
int64_t timeout_us)
3383+
{
3384+
void *obj = NULL;
3385+
3386+
if (ring_zone == NULL || ring_zone->req_ring == NULL ||
3387+
ring_zone->rsp_ring == NULL) {
3388+
return -EINVAL;
3389+
}
3390+
3391+
/* Enqueue request — spin if ring full */
3392+
while (rte_ring_sp_enqueue(ring_zone->req_ring, sc) != 0) {
3393+
ERR_LOG("req_ring full, waiting... sc:%p, ops:%d\n", sc, sc->ops);
3394+
rte_pause();
3395+
}
3396+
3397+
/* Notify fstack via eventfd if configured */
3398+
if (ring_zone->wait_mode == FF_RING_WAIT_EVENTFD &&
3399+
ring_zone->eventfd_req >= 0) {
3400+
uint64_t val = 1;
3401+
write(ring_zone->eventfd_req, &val, sizeof(val));
3402+
}
3403+
3404+
/* Wait for response from rsp_ring */
3405+
int ret = ff_ring_dequeue_wait(ring_zone->rsp_ring, &obj,
3406+
timeout_us, ring_zone->wait_mode);
3407+
3408+
if (ret == -ETIMEDOUT) {
3409+
return -ETIMEDOUT;
3410+
}
3411+
3412+
/* Verify we got our own sc back (SPSC guarantees ordering) */
3413+
if (obj != sc) {
3414+
ERR_LOG("ring response mismatch: expected sc:%p, got:%p\n", sc, obj);
3415+
}
3416+
3417+
return 0;
3418+
}
3419+
#endif /* FF_USE_RING_IPC */
3420+

0 commit comments

Comments
 (0)