Skip to content

Commit 63fad57

Browse files
committed
Wire hot-syscall guardrail into make check
Three hot paths the PR #34 OrbStack baseline tracked -- getpid (~47 ns), clock_gettime through the vDSO (~2.5 ns), and 1-byte /dev/urandom read (~134 ns) -- had no automated regression check. A silent slip-back to the SVC fallback turned each into a ~1-2 us trap without anything in CI to notice. This adds an explicit guardrail. tests/bench-hot-guard.c resolves __kernel_clock_gettime via AT_SYSINFO_EHDR + PT_DYNAMIC + DT_HASH (SysV ELF hash walk) and measures three labels in fixed-width "%-20s %10.1f ns/op last=%ld" output: getpid (raw SVC), clock_gettime (vDSO trampoline), and read-urandom1 (raw 1-byte read of /dev/urandom). The same source builds two binaries via a compile-time switch: build/bench-hot-guard Static glibc. Built without the macro. clock_gettime invokes the trampoline directly through the resolved function pointer. Static glibc never initializes dl_sysinfo_dso, so its libc wrapper falls back to raw SVC for reasons unrelated to the vDSO; measuring the wrapper would fail the 50 ns ceiling for the wrong reason. Direct call isolates the trampoline. build/bench-hot-guard-glibc Dynamic glibc. Built with -DGUARD_USE_LIBC_CG=1. clock_gettime invokes glibc's clock_gettime() wrapper -- which on glibc 2.41 + a correctly-stamped vDSO (NT_GNU_ABI_TAG PT_NOTE, LINUX_2.6.39 versioning) routes through the trampoline. A regression in the note or versioning would push this measurement from ~7 ns to SVC range and trip the ceiling. Built only when the cross-toolchain sysroot at $(LINUX_TOOLCHAIN)/aarch64-unknown-linux-gnu/sysroot exists; run with elfuse --sysroot at that path. Disassembly verifies the split: the dynamic binary lowers bench_clock_gettime to "bl <clock_gettime@plt>" while the static binary lowers it to "ldr x2, [x1], #8" + indirect dispatch. Validation: static getpid 50.4 ns, clock_gettime 6.7 ns, urandom 141.9 ns dyn-glibc getpid 71.9 ns, clock_gettime 17.8 ns, urandom 147.9 ns
1 parent b2ae41d commit 63fad57

5 files changed

Lines changed: 429 additions & 0 deletions

File tree

Makefile

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,25 @@ $(BUILD_DIR)/test-lowbase-mem-300000: tests/test-lowbase-mem.c | $(BUILD_DIR)
221221
$(Q)$(CROSS_COMPILE)gcc -D_GNU_SOURCE -static -O2 -no-pie \
222222
-Wl,-Ttext-segment=0x300000 -o $@ $<
223223

224+
# bench-hot-guard-glibc is the dynamic-glibc twin of bench-hot-guard.
225+
# Built only when the cross-glibc toolchain ships its own sysroot
226+
# (so a host without that toolchain can still run the rest of the
227+
# suite). Linked without -static so glibc resolves time / urandom
228+
# syscalls through the vDSO trampoline -- which is exactly what the
229+
# guardrail script verifies against the 50 ns / 200 ns ceilings.
230+
ifneq ($(wildcard $(LINUX_TOOLCHAIN)/aarch64-unknown-linux-gnu/sysroot/.),)
231+
# -DGUARD_USE_LIBC_CG switches the bench's clock_gettime case from a
232+
# direct vDSO trampoline call to the libc wrapper, so the dynamic-glibc
233+
# build measures glibc's actual routing decision. A regression in the
234+
# NT_GNU_ABI_TAG note or LINUX_2.6.39 versioning would push this
235+
# measurement from ~7 ns up to SVC time (~2000 ns) and fail the
236+
# guardrail.
237+
$(BUILD_DIR)/bench-hot-guard-glibc: tests/bench-hot-guard.c | $(BUILD_DIR)
238+
@echo " CROSS $< (dynamic glibc)"
239+
$(Q)$(CROSS_COMPILE)gcc -D_GNU_SOURCE -DGUARD_USE_LIBC_CG=1 -O2 \
240+
-o $@ $<
241+
endif
242+
224243
endif
225244

226245
include mk/tests.mk

mk/tests.mk

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,28 @@ check: $(ELFUSE_BIN) $(TEST_DEPS) check-syscall-coverage
5151
@$(MAKE) --no-print-directory test-timeout-disable
5252
@printf "\n$(BLUE)━━━ rosetta CLI gating ━━━$(RESET)\n"
5353
@$(MAKE) --no-print-directory test-rosetta-cli
54+
@printf "\n$(BLUE)━━━ hot-syscall guardrail ━━━$(RESET)\n"
55+
@$(MAKE) --no-print-directory test-bench-guardrail
56+
57+
## Hot-syscall performance guardrail: ensure getpid, libc clock_gettime,
58+
## and 1-byte /dev/urandom reads stay under their TODO ns/op ceilings.
59+
## Builds the dynamic-glibc variant opportunistically; the script skips
60+
## that arm when the cross-toolchain sysroot is missing.
61+
BENCH_GUARDRAIL_DEPS := $(ELFUSE_BIN)
62+
BENCH_GUARDRAIL_REQUIRE_STATIC := 0
63+
ifndef GUEST_TEST_BINARIES
64+
BENCH_GUARDRAIL_DEPS += $(BUILD_DIR)/bench-hot-guard
65+
BENCH_GUARDRAIL_REQUIRE_STATIC := 1
66+
ifneq ($(wildcard $(LINUX_TOOLCHAIN)/aarch64-unknown-linux-gnu/sysroot/.),)
67+
BENCH_GUARDRAIL_DEPS += $(BUILD_DIR)/bench-hot-guard-glibc
68+
endif
69+
endif
70+
test-bench-guardrail: $(BENCH_GUARDRAIL_DEPS)
71+
@ELFUSE="$(ELFUSE_BIN)" \
72+
BENCH_GUARDRAIL_DIR="$(TEST_DIR)" \
73+
BENCH_GUARDRAIL_REQUIRE_STATIC="$(BENCH_GUARDRAIL_REQUIRE_STATIC)" \
74+
LINUX_TOOLCHAIN="$(LINUX_TOOLCHAIN)" \
75+
bash tests/test-bench-guardrail.sh
5476

5577
test-sysroot-rename: $(ELFUSE_BIN) $(BUILD_DIR)/test-sysroot-rename
5678
@set -e; \

tests/bench-hot-guard.c

Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
/* Hot-syscall guardrail bench
2+
*
3+
* Copyright 2026 elfuse contributors
4+
* SPDX-License-Identifier: Apache-2.0
5+
*
6+
* Minimal bench that measures the three labels the guardrail script
7+
* checks against the TODO ceilings:
8+
*
9+
* getpid (raw SVC; shim identity fast path)
10+
* clock_gettime (vDSO trampoline; see -DGUARD_USE_LIBC_CG below)
11+
* read-urandom1 (raw read; shim urandom ring fast path)
12+
*
13+
* Built twice from this single source:
14+
* build/bench-hot-guard -- static glibc. Compiled without
15+
* -DGUARD_USE_LIBC_CG: `clock_gettime` calls the vDSO trampoline
16+
* directly via its function-pointer address resolved through
17+
* AT_SYSINFO_EHDR. Static glibc never initializes
18+
* dl_sysinfo_dso, so its libc clock_gettime wrapper falls back
19+
* to raw SVC (~2000 ns/op) regardless of trampoline health --
20+
* measuring it would fail the 50 ns ceiling for reasons that
21+
* have nothing to do with the vDSO. Direct call isolates the
22+
* trampoline.
23+
* build/bench-hot-guard-glibc -- dynamic glibc. Compiled with
24+
* -DGUARD_USE_LIBC_CG so `clock_gettime` invokes the libc
25+
* wrapper, which on glibc 2.41 + a correctly-stamped vDSO
26+
* (NT_GNU_ABI_TAG present, LINUX_2.6.39 versioning) routes the
27+
* call through the same trampoline. The guardrail's 50 ns
28+
* ceiling here is exactly the "did glibc accept the vDSO?"
29+
* regression check called out in the TODO baseline: if the
30+
* PT_NOTE or versioning regresses, this measurement jumps to
31+
* SVC time and the guardrail fails. The cross-toolchain sysroot
32+
* must be passed via --sysroot at runtime.
33+
*
34+
* Output format mirrors bench-hot-syscalls.c:
35+
*
36+
* name<padding> XX.X ns/op last=N
37+
*
38+
* so the guardrail's awk extractor reads identical labels across both
39+
* variants.
40+
*/
41+
42+
#include <elf.h>
43+
#include <fcntl.h>
44+
#include <stdint.h>
45+
#include <stdio.h>
46+
#include <stdlib.h>
47+
#include <string.h>
48+
#include <sys/auxv.h>
49+
#include <sys/syscall.h>
50+
#include <time.h>
51+
#include <unistd.h>
52+
53+
typedef int (*clock_gettime_fn)(clockid_t, struct timespec *);
54+
55+
typedef long (*bench_fn_t)(void *ctx);
56+
57+
typedef struct {
58+
const char *name;
59+
bench_fn_t fn;
60+
void *ctx;
61+
} bench_case_t;
62+
63+
typedef struct {
64+
clock_gettime_fn fn;
65+
struct timespec ts;
66+
} cg_ctx_t;
67+
68+
static uint32_t sysv_hash(const char *name)
69+
{
70+
uint32_t h = 0, g;
71+
while (*name) {
72+
h = (h << 4) + (unsigned char) *name++;
73+
g = h & 0xf0000000U;
74+
if (g)
75+
h ^= g >> 24;
76+
h &= ~g;
77+
}
78+
return h;
79+
}
80+
81+
/* Walk the vDSO ELF at AT_SYSINFO_EHDR and return the absolute address
82+
* of __kernel_clock_gettime, or NULL if anything is missing.
83+
*/
84+
static clock_gettime_fn resolve_vdso_clock_gettime(void)
85+
{
86+
unsigned long base = getauxval(AT_SYSINFO_EHDR);
87+
if (!base)
88+
return NULL;
89+
90+
const Elf64_Ehdr *eh = (const Elf64_Ehdr *) base;
91+
const Elf64_Phdr *ph =
92+
(const Elf64_Phdr *) ((const uint8_t *) eh + eh->e_phoff);
93+
const Elf64_Dyn *dyn = NULL;
94+
for (int i = 0; i < eh->e_phnum; i++) {
95+
if (ph[i].p_type == PT_DYNAMIC) {
96+
dyn = (const Elf64_Dyn *) ((const uint8_t *) eh + ph[i].p_offset);
97+
break;
98+
}
99+
}
100+
if (!dyn)
101+
return NULL;
102+
103+
const Elf64_Sym *st = NULL;
104+
const char *str = NULL;
105+
const uint32_t *hsh = NULL;
106+
for (; dyn->d_tag; dyn++) {
107+
const uint8_t *p = (const uint8_t *) eh + dyn->d_un.d_ptr;
108+
switch (dyn->d_tag) {
109+
case DT_SYMTAB:
110+
st = (const Elf64_Sym *) p;
111+
break;
112+
case DT_STRTAB:
113+
str = (const char *) p;
114+
break;
115+
case DT_HASH:
116+
hsh = (const uint32_t *) p;
117+
break;
118+
default:
119+
break;
120+
}
121+
}
122+
if (!st || !str || !hsh)
123+
return NULL;
124+
125+
uint32_t nbucket = hsh[0];
126+
uint32_t nchain = hsh[1];
127+
const uint32_t *bucket = &hsh[2];
128+
const uint32_t *chain = &bucket[nbucket];
129+
const char *name = "__kernel_clock_gettime";
130+
uint32_t h = sysv_hash(name) % nbucket;
131+
for (uint32_t i = bucket[h]; i && i < nchain; i = chain[i]) {
132+
if (strcmp(&str[st[i].st_name], name) == 0)
133+
return (clock_gettime_fn) (base + st[i].st_value);
134+
}
135+
return NULL;
136+
}
137+
138+
static uint64_t monotonic_ns(clock_gettime_fn cg)
139+
{
140+
struct timespec ts;
141+
if (cg(CLOCK_MONOTONIC, &ts) != 0) {
142+
perror("clock_gettime");
143+
exit(1);
144+
}
145+
return (uint64_t) ts.tv_sec * 1000000000ULL + (uint64_t) ts.tv_nsec;
146+
}
147+
148+
static long bench_getpid(void *ctx)
149+
{
150+
(void) ctx;
151+
return (long) syscall(SYS_getpid);
152+
}
153+
154+
static long bench_clock_gettime(void *ctx)
155+
{
156+
cg_ctx_t *c = ctx;
157+
#ifdef GUARD_USE_LIBC_CG
158+
/* Dynamic glibc build: exercise the libc wrapper so the
159+
* NT_GNU_ABI_TAG / LINUX_2.6.39 vDSO routing is validated end to
160+
* end. If glibc falls back to SVC (broken note / version regress)
161+
* this measurement jumps to ~2000 ns and the guardrail fails.
162+
*/
163+
(void) c->fn;
164+
return clock_gettime(CLOCK_MONOTONIC, &c->ts);
165+
#else
166+
/* Static build (no dl_sysinfo_dso): call the trampoline directly
167+
* via the resolved function pointer.
168+
*/
169+
return c->fn(CLOCK_MONOTONIC, &c->ts);
170+
#endif
171+
}
172+
173+
static long bench_read_urandom1(void *ctx)
174+
{
175+
int fd = *(int *) ctx;
176+
unsigned char byte;
177+
return read(fd, &byte, 1);
178+
}
179+
180+
static void run_case(clock_gettime_fn cg,
181+
const bench_case_t *bc,
182+
unsigned long iters)
183+
{
184+
uint64_t start = monotonic_ns(cg);
185+
long last = 0;
186+
for (unsigned long i = 0; i < iters; i++)
187+
last = bc->fn(bc->ctx);
188+
uint64_t elapsed = monotonic_ns(cg) - start;
189+
double ns_per_op = (double) elapsed / (double) iters;
190+
printf("%-20s %10.1f ns/op last=%ld\n", bc->name, ns_per_op, last);
191+
}
192+
193+
int main(int argc, char **argv)
194+
{
195+
/* Line-buffered stdout so each completed case is visible
196+
* immediately when stdout is piped or captured.
197+
*/
198+
setvbuf(stdout, NULL, _IOLBF, 0);
199+
200+
unsigned long iters = 50000;
201+
if (argc > 1)
202+
iters = strtoul(argv[1], NULL, 10);
203+
if (iters == 0) {
204+
fprintf(stderr, "iterations must be > 0\n");
205+
return 1;
206+
}
207+
208+
clock_gettime_fn vdso_cg = resolve_vdso_clock_gettime();
209+
if (!vdso_cg) {
210+
fprintf(stderr,
211+
"could not resolve __kernel_clock_gettime via "
212+
"AT_SYSINFO_EHDR\n");
213+
return 1;
214+
}
215+
216+
int urandomfd = open("/dev/urandom", O_RDONLY);
217+
if (urandomfd < 0) {
218+
perror("open /dev/urandom");
219+
return 1;
220+
}
221+
222+
cg_ctx_t cg_ctx = {.fn = vdso_cg};
223+
const bench_case_t cases[] = {
224+
{"getpid", bench_getpid, NULL},
225+
{"clock_gettime", bench_clock_gettime, &cg_ctx},
226+
{"read-urandom1", bench_read_urandom1, &urandomfd},
227+
};
228+
229+
for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++)
230+
run_case(vdso_cg, &cases[i], iters);
231+
232+
close(urandomfd);
233+
return 0;
234+
}

tests/bench-hot-syscalls.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,13 @@ static void run_case(const bench_case_t *bc, unsigned long iters)
593593

594594
int main(int argc, char **argv)
595595
{
596+
/* Line-buffer stdout so each completed case is visible immediately
597+
* when the bench is piped or redirected. Full buffering hides the
598+
* progress and turns "the bench is slow" into "the bench appears
599+
* stuck" until the buffer flushes at exit.
600+
*/
601+
setvbuf(stdout, NULL, _IOLBF, 0);
602+
596603
unsigned long iters = 1000000;
597604
if (argc > 1)
598605
iters = strtoul(argv[1], NULL, 10);

0 commit comments

Comments
 (0)