Skip to content

Commit bfbc71f

Browse files
committed
setup: dynamically detect default huge page size
Replaces the hardcoded 2MB huge page size with dynamic detection by parsing /proc/meminfo. This fixes no-mmap allocation failures on architectures with different default huge page sizes (like ARM64 which often uses 512MB) or x86 systems configured for 1GB pages. - Safely parses /proc/meminfo without allocating memory. - Uses raw syscalls and manual byte-by-byte matching to maintain strict compatibility with CONFIG_NOLIBC builds (avoiding strstr). - Drops the MAP_HUGE_2MB mmap flag to allow the kernel to correctly apply the system's default huge page size. - Falls back safely to 2MB if /proc/meminfo is unreadable. Signed-off-by: Prateek <kprateek283@gmail.com>
1 parent 2750b94 commit bfbc71f

1 file changed

Lines changed: 68 additions & 16 deletions

File tree

src/setup.c

Lines changed: 68 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -220,15 +220,67 @@ __cold int io_uring_ring_dontfork(struct io_uring *ring)
220220
return 0;
221221
}
222222

223-
#ifndef MAP_HUGE_SHIFT
224-
#define MAP_HUGE_SHIFT 26
225-
#endif
226-
#ifndef MAP_HUGE_2MB
227-
#define MAP_HUGE_2MB (21U << MAP_HUGE_SHIFT)
228-
#endif
229223

230-
/* FIXME */
231-
static size_t huge_page_size = 2 * 1024 * 1024;
224+
static size_t get_huge_page_size(void)
225+
{
226+
static size_t hps;
227+
size_t ret = 2 * 1024 * 1024; /* fallback: 2MB */
228+
char buf[4096];
229+
char *p, *end;
230+
unsigned long val;
231+
ssize_t n;
232+
int fd;
233+
234+
if (hps)
235+
return hps;
236+
237+
fd = __sys_open("/proc/meminfo", O_RDONLY, 0);
238+
if (fd < 0)
239+
goto out;
240+
241+
n = __sys_read(fd, buf, sizeof(buf) - 1);
242+
__sys_close(fd);
243+
if (n <= 0)
244+
goto out;
245+
buf[n] = '\0';
246+
247+
/*
248+
* Scan line-by-line for "Hugepagesize:". We avoid strstr() and
249+
* memcmp() because they are not available in CONFIG_NOLIBC builds.
250+
*/
251+
p = buf;
252+
end = buf + n;
253+
while (p < end) {
254+
/* Check if this line starts with "Hugepagesize:" (13 chars) */
255+
if (p + 13 <= end &&
256+
p[0] == 'H' && p[1] == 'u' && p[2] == 'g' &&
257+
p[3] == 'e' && p[4] == 'p' && p[5] == 'a' &&
258+
p[6] == 'g' && p[7] == 'e' && p[8] == 's' &&
259+
p[9] == 'i' && p[10] == 'z' && p[11] == 'e' &&
260+
p[12] == ':') {
261+
p += 13;
262+
while (p < end && (*p == ' ' || *p == '\t'))
263+
p++;
264+
val = 0;
265+
while (p < end && *p >= '0' && *p <= '9') {
266+
val = val * 10 + (*p - '0');
267+
p++;
268+
}
269+
if (val)
270+
ret = val * 1024; /* kB -> bytes */
271+
break;
272+
}
273+
/* Advance to next line */
274+
while (p < end && *p != '\n')
275+
p++;
276+
if (p < end)
277+
p++;
278+
}
279+
out:
280+
hps = ret;
281+
return hps;
282+
}
283+
232284

233285
#define KRING_SIZE 64
234286

@@ -261,13 +313,13 @@ static int io_uring_alloc_huge(unsigned entries, struct io_uring_params *p,
261313
mem_used = (mem_used + page_size - 1) & ~(page_size - 1);
262314

263315
/*
264-
* A maxed-out number of CQ entries with IORING_SETUP_CQE32 fills a 2MB
265-
* huge page by itself, so the SQ entries won't fit in the same huge
266-
* page. For SQEs, that shouldn't be possible given KERN_MAX_ENTRIES,
316+
* A maxed-out number of CQ entries with IORING_SETUP_CQE32 can fill a
317+
* single huge page by itself, so the SQ entries won't fit in the same
318+
* huge page. For SQEs, that shouldn't be possible given KERN_MAX_ENTRIES,
267319
* but check that too to future-proof (e.g. against different huge page
268320
* sizes). Bail out early so we don't overrun.
269321
*/
270-
if (!buf && (sqes_mem > huge_page_size || ring_mem > huge_page_size))
322+
if (!buf && (sqes_mem > get_huge_page_size() || ring_mem > get_huge_page_size()))
271323
return -ENOMEM;
272324

273325
if (buf) {
@@ -279,8 +331,8 @@ static int io_uring_alloc_huge(unsigned entries, struct io_uring_params *p,
279331
if (sqes_mem <= page_size)
280332
buf_size = page_size;
281333
else {
282-
buf_size = huge_page_size;
283-
map_hugetlb = MAP_HUGETLB | MAP_HUGE_2MB;
334+
buf_size = get_huge_page_size();
335+
map_hugetlb = MAP_HUGETLB;
284336
}
285337
sqes_size = buf_size;
286338
ptr = __sys_mmap(NULL, sqes_size, PROT_READ|PROT_WRITE,
@@ -302,8 +354,8 @@ static int io_uring_alloc_huge(unsigned entries, struct io_uring_params *p,
302354
if (ring_mem <= page_size)
303355
buf_size = page_size;
304356
else {
305-
buf_size = huge_page_size;
306-
map_hugetlb = MAP_HUGETLB | MAP_HUGE_2MB;
357+
buf_size = get_huge_page_size();
358+
map_hugetlb = MAP_HUGETLB;
307359
}
308360
ptr = __sys_mmap(NULL, buf_size, PROT_READ|PROT_WRITE,
309361
MAP_SHARED|MAP_ANONYMOUS|map_hugetlb,

0 commit comments

Comments
 (0)