Skip to content

Commit 69c3265

Browse files
committed
setup: dynamically detect default huge page size
Replaces the hardcoded 2MB huge page size with dynamic detection by parsing /proc/meminfo. This fixes no-mmap allocation failures on architectures with different default huge page sizes (like ARM64 which often uses 512MB) or x86 systems configured for 1GB pages. - Safely parses /proc/meminfo without allocating memory. - Uses raw syscalls and manual byte-by-byte matching to maintain strict compatibility with CONFIG_NOLIBC builds (avoiding strstr). - Drops the MAP_HUGE_2MB mmap flag to allow the kernel to correctly apply the system's default huge page size. - Falls back safely to 2MB if /proc/meminfo is unreadable. Signed-off-by: Prateek <kprateek283@gmail.com>
1 parent 2750b94 commit 69c3265

1 file changed

Lines changed: 67 additions & 16 deletions

File tree

src/setup.c

Lines changed: 67 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -220,15 +220,66 @@ __cold int io_uring_ring_dontfork(struct io_uring *ring)
220220
return 0;
221221
}
222222

223-
#ifndef MAP_HUGE_SHIFT
224-
#define MAP_HUGE_SHIFT 26
225-
#endif
226-
#ifndef MAP_HUGE_2MB
227-
#define MAP_HUGE_2MB (21U << MAP_HUGE_SHIFT)
228-
#endif
229223

230-
/* FIXME */
231-
static size_t huge_page_size = 2 * 1024 * 1024;
224+
static size_t get_huge_page_size(void)
225+
{
226+
static size_t hps;
227+
size_t ret = 2 * 1024 * 1024; /* fallback: 2MB */
228+
char buf[4096];
229+
char *p, *end;
230+
unsigned long val;
231+
int fd, n;
232+
233+
if (hps)
234+
return hps;
235+
236+
fd = __sys_open("/proc/meminfo", O_RDONLY, 0);
237+
if (fd < 0)
238+
goto out;
239+
240+
n = __sys_read(fd, buf, sizeof(buf) - 1);
241+
__sys_close(fd);
242+
if (n <= 0)
243+
goto out;
244+
buf[n] = '\0';
245+
246+
/*
247+
* Scan line-by-line for "Hugepagesize:". We avoid strstr() and
248+
* memcmp() because they are not available in CONFIG_NOLIBC builds.
249+
*/
250+
p = buf;
251+
end = buf + n;
252+
while (p < end) {
253+
/* Check if this line starts with "Hugepagesize:" (13 chars) */
254+
if (p + 13 <= end &&
255+
p[0] == 'H' && p[1] == 'u' && p[2] == 'g' &&
256+
p[3] == 'e' && p[4] == 'p' && p[5] == 'a' &&
257+
p[6] == 'g' && p[7] == 'e' && p[8] == 's' &&
258+
p[9] == 'i' && p[10] == 'z' && p[11] == 'e' &&
259+
p[12] == ':') {
260+
p += 13;
261+
while (p < end && (*p == ' ' || *p == '\t'))
262+
p++;
263+
val = 0;
264+
while (p < end && *p >= '0' && *p <= '9') {
265+
val = val * 10 + (*p - '0');
266+
p++;
267+
}
268+
if (val)
269+
ret = val * 1024; /* kB -> bytes */
270+
break;
271+
}
272+
/* Advance to next line */
273+
while (p < end && *p != '\n')
274+
p++;
275+
if (p < end)
276+
p++;
277+
}
278+
out:
279+
hps = ret;
280+
return hps;
281+
}
282+
232283

233284
#define KRING_SIZE 64
234285

@@ -261,13 +312,13 @@ static int io_uring_alloc_huge(unsigned entries, struct io_uring_params *p,
261312
mem_used = (mem_used + page_size - 1) & ~(page_size - 1);
262313

263314
/*
264-
* A maxed-out number of CQ entries with IORING_SETUP_CQE32 fills a 2MB
265-
* huge page by itself, so the SQ entries won't fit in the same huge
266-
* page. For SQEs, that shouldn't be possible given KERN_MAX_ENTRIES,
315+
* A maxed-out number of CQ entries with IORING_SETUP_CQE32 can fill a
316+
* single huge page by itself, so the SQ entries won't fit in the same
317+
* huge page. For SQEs, that shouldn't be possible given KERN_MAX_ENTRIES,
267318
* but check that too to future-proof (e.g. against different huge page
268319
* sizes). Bail out early so we don't overrun.
269320
*/
270-
if (!buf && (sqes_mem > huge_page_size || ring_mem > huge_page_size))
321+
if (!buf && (sqes_mem > get_huge_page_size() || ring_mem > get_huge_page_size()))
271322
return -ENOMEM;
272323

273324
if (buf) {
@@ -279,8 +330,8 @@ static int io_uring_alloc_huge(unsigned entries, struct io_uring_params *p,
279330
if (sqes_mem <= page_size)
280331
buf_size = page_size;
281332
else {
282-
buf_size = huge_page_size;
283-
map_hugetlb = MAP_HUGETLB | MAP_HUGE_2MB;
333+
buf_size = get_huge_page_size();
334+
map_hugetlb = MAP_HUGETLB;
284335
}
285336
sqes_size = buf_size;
286337
ptr = __sys_mmap(NULL, sqes_size, PROT_READ|PROT_WRITE,
@@ -302,8 +353,8 @@ static int io_uring_alloc_huge(unsigned entries, struct io_uring_params *p,
302353
if (ring_mem <= page_size)
303354
buf_size = page_size;
304355
else {
305-
buf_size = huge_page_size;
306-
map_hugetlb = MAP_HUGETLB | MAP_HUGE_2MB;
356+
buf_size = get_huge_page_size();
357+
map_hugetlb = MAP_HUGETLB;
307358
}
308359
ptr = __sys_mmap(NULL, buf_size, PROT_READ|PROT_WRITE,
309360
MAP_SHARED|MAP_ANONYMOUS|map_hugetlb,

0 commit comments

Comments
 (0)