Skip to content

Commit dc4004f

Browse files
author
Samir
committed
[LTP] [PATCH v5] Migrating the libhugetlbfs/testcases/alloc-instantiate-race.c test
This test is designed to detect a kernel allocation race introduced with hugepage demand-faulting. The problem is that no lock is held between allocating a hugepage and instantiating it in the pagetables or page cache index. In between the two, the (huge) page is cleared, so there's substantial time. Thus two processes can race instantiating the (same) last available hugepage - one will fail on the allocation, and thus cause an OOM fault even though the page it actually wants is being instantiated by the other racing process. Signed-off-by: Samir <samir@linux.ibm.com> v3: https://lore.kernel.org/all/20250928030721.3537869-1-samir@linux.ibm.com/ v4: https://lore.kernel.org/ltp/20260317095559.5766-1-samir@linux.ibm.com/ --- v4: Addressed review comments: - Removed unnecessary [Description] tag from comment block - Added static keyword to global variables (child1, child2, race_type, fd_sync) - Moved totpages and hpage_size to local scope in run_test() - Replaced busy loop with TST_CHECKPOINT_WAIT/WAKE mechanism - Fixed indentation in thread_racer() function - Made check_online_cpus() function static - Declared loop variable 'i' inside for loops using C99 style - Removed unnecessary 'available' variable, use CPU_COUNT() directly - Fixed indentation for tst_res() call - Removed q_sync global variable to avoid uninitialized access - Removed unused SYSFS_CPU_ONLINE_FMT macro - Optimized variable scope throughout the code - Implemented proper checkpoint synchronization pattern - Added cleanup() function for resource cleanup - Updated Makefile, runtest/hugetlb, and .gitignore v5: - Replace empty initializer {} with {NULL, NULL, NULL} to fix -Wmissing-field-initializers warning in the options array terminator. --- Signed-off-by: Samir <samir@linux.ibm.com>
1 parent 1ad07fa commit dc4004f

4 files changed

Lines changed: 283 additions & 0 deletions

File tree

runtest/hugetlb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ hugemmap30 hugemmap30
3636
hugemmap31 hugemmap31
3737
hugemmap32 hugemmap32
3838
hugemmap34 hugemmap34
39+
hugemmap36 hugemmap36
3940
hugemmap05_1 hugemmap05 -m
4041
hugemmap05_2 hugemmap05 -s
4142
hugemmap05_3 hugemmap05 -s -m

testcases/kernel/mem/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
/hugetlb/hugemmap/hugemmap31
3737
/hugetlb/hugemmap/hugemmap32
3838
/hugetlb/hugemmap/hugemmap34
39+
/hugetlb/hugemmap/hugemmap36
3940
/hugetlb/hugeshmat/hugeshmat01
4041
/hugetlb/hugeshmat/hugeshmat02
4142
/hugetlb/hugeshmat/hugeshmat03

testcases/kernel/mem/hugetlb/hugemmap/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,5 @@ CFLAGS_no_stack_prot := $(filter-out -fstack-clash-protection, $(CFLAGS))
1212

1313
hugemmap06: CFLAGS+=-pthread
1414
hugemmap34: CFLAGS=$(CFLAGS_no_stack_prot)
15+
hugemmap36: LDLIBS+=-lpthread
16+
hugemmap36: CFLAGS+=-pthread
Lines changed: 279 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,279 @@
1+
// SPDX-License-Identifier: GPL-2.0-or-later
2+
/*
3+
* Copyright (C) 2005-2006 IBM Corporation
4+
* Author: David Gibson & Adam Litke
5+
*/
6+
7+
/*
8+
* This test is designed to detect a kernel allocation race introduced
9+
* with hugepage demand-faulting. The problem is that no lock is held
10+
* between allocating a hugepage and instantiating it in the
11+
* pagetables or page cache index. In between the two, the (huge)
12+
* page is cleared, so there's substantial time. Thus two processes
13+
* can race instantiating the (same) last available hugepage - one
14+
* will fail on the allocation, and thus cause an OOM fault even
15+
* though the page it actually wants is being instantiated by the
16+
* other racing process.
17+
*/
18+
19+
#define _GNU_SOURCE
20+
#include <stdio.h>
21+
#include <pthread.h>
22+
#include "tst_safe_pthread.h"
23+
#include "hugetlb.h"
24+
25+
#define MNTPOINT "hugetlbfs/"
26+
27+
static char *str_op;
28+
static int child1, child2, race_type, fd_sync;
29+
30+
struct racer_info {
31+
void *p;
32+
int cpu;
33+
int status;
34+
};
35+
36+
static int one_racer(void *p, int cpu)
37+
{
38+
volatile int *pi = p;
39+
cpu_set_t *cpuset;
40+
size_t mask_size;
41+
int err;
42+
43+
cpuset = CPU_ALLOC(cpu + 1);
44+
if (!cpuset)
45+
tst_brk(TBROK | TERRNO, "CPU_ALLOC() failed");
46+
47+
mask_size = CPU_ALLOC_SIZE(cpu + 1);
48+
49+
/* Split onto different CPUs to encourage the race */
50+
CPU_ZERO_S(mask_size, cpuset);
51+
CPU_SET_S(cpu, mask_size, cpuset);
52+
53+
err = sched_setaffinity(getpid(), mask_size, cpuset);
54+
if (err == -1)
55+
tst_brk(TBROK | TERRNO, "sched_setaffinity() failed");
56+
57+
/* Wait for parent to signal both racers to start */
58+
TST_CHECKPOINT_WAIT(0);
59+
60+
/* Set the shared value */
61+
*pi = 1;
62+
63+
CPU_FREE(cpuset);
64+
return 0;
65+
}
66+
67+
static void proc_racer(void *p, int cpu)
68+
{
69+
exit(one_racer(p, cpu));
70+
}
71+
72+
static void *thread_racer(void *info)
73+
{
74+
struct racer_info *ri = info;
75+
76+
ri->status = one_racer(ri->p, ri->cpu);
77+
return ri;
78+
}
79+
80+
static void check_online_cpus(int online_cpus[], int nr_cpus_needed)
81+
{
82+
cpu_set_t cpuset;
83+
int total_cpus, cpu_idx;
84+
85+
CPU_ZERO(&cpuset);
86+
87+
for (int i = 0; i < CPU_SETSIZE; i++)
88+
CPU_SET(i, &cpuset);
89+
90+
if (sched_setaffinity(0, sizeof(cpuset), &cpuset) == -1)
91+
tst_brk(TBROK | TERRNO, "sched_setaffinity() reset failed");
92+
93+
total_cpus = get_nprocs_conf();
94+
95+
if (sched_getaffinity(0, sizeof(cpu_set_t), &cpuset) == -1)
96+
tst_brk(TBROK | TERRNO, "sched_getaffinity() failed");
97+
98+
tst_res(TINFO, "Online CPUs needed: %d, available: %d",
99+
nr_cpus_needed, CPU_COUNT(&cpuset));
100+
101+
if (CPU_COUNT(&cpuset) < nr_cpus_needed)
102+
tst_brk(TCONF, "At least %d online CPUs are required", nr_cpus_needed);
103+
104+
cpu_idx = 0;
105+
for (int i = 0; i < total_cpus && cpu_idx < nr_cpus_needed; i++) {
106+
if (CPU_ISSET(i, &cpuset))
107+
online_cpus[cpu_idx++] = i;
108+
}
109+
110+
if (cpu_idx < nr_cpus_needed)
111+
tst_brk(TBROK, "Unable to find enough online CPUs");
112+
}
113+
114+
static void run_race(int race_type)
115+
{
116+
int fd = -1;
117+
void *p = MAP_FAILED;
118+
void *tret1, *tret2;
119+
int status1 = 0, status2 = 0;
120+
int online_cpus[2];
121+
long hpage_size;
122+
pthread_t thread1, thread2;
123+
124+
check_online_cpus(online_cpus, 2);
125+
126+
hpage_size = tst_get_hugepage_size();
127+
128+
/* Get a new file for the final page */
129+
fd = tst_creat_unlinked(MNTPOINT, 0, 0600);
130+
tst_res(TINFO, "Mapping final page..");
131+
132+
p = SAFE_MMAP(NULL, hpage_size, PROT_READ|PROT_WRITE, race_type, fd, 0);
133+
134+
if (race_type == MAP_SHARED) {
135+
child1 = SAFE_FORK();
136+
if (child1 == 0)
137+
proc_racer(p, online_cpus[0]);
138+
139+
child2 = SAFE_FORK();
140+
if (child2 == 0)
141+
proc_racer(p, online_cpus[1]);
142+
143+
/* Wake both children to start the race simultaneously */
144+
TST_CHECKPOINT_WAKE2(0, 2);
145+
146+
SAFE_WAITPID(child1, &status1, 0);
147+
tst_res(TINFO, "Child 1 status: %x", status1);
148+
149+
SAFE_WAITPID(child2, &status2, 0);
150+
tst_res(TINFO, "Child 2 status: %x", status2);
151+
152+
if (WIFSIGNALED(status1))
153+
tst_res(TFAIL, "Child 1 killed by signal %s",
154+
strsignal(WTERMSIG(status1)));
155+
if (WIFSIGNALED(status2))
156+
tst_res(TFAIL, "Child 2 killed by signal %s",
157+
strsignal(WTERMSIG(status2)));
158+
} else {
159+
struct racer_info ri1 = {
160+
.p = p,
161+
.cpu = online_cpus[0],
162+
.status = -1,
163+
};
164+
struct racer_info ri2 = {
165+
.p = p,
166+
.cpu = online_cpus[1],
167+
.status = -1,
168+
};
169+
170+
SAFE_PTHREAD_CREATE(&thread1, NULL, thread_racer, &ri1);
171+
SAFE_PTHREAD_CREATE(&thread2, NULL, thread_racer, &ri2);
172+
173+
/* Wake both threads to start the race simultaneously */
174+
TST_CHECKPOINT_WAKE2(0, 2);
175+
176+
SAFE_PTHREAD_JOIN(thread1, &tret1);
177+
if (tret1 != &ri1)
178+
tst_res(TFAIL, "Thread 1 returned %p not %p, killed?",
179+
tret1, &ri1);
180+
181+
SAFE_PTHREAD_JOIN(thread2, &tret2);
182+
if (tret2 != &ri2)
183+
tst_res(TFAIL, "Thread 2 returned %p not %p, killed?",
184+
tret2, &ri2);
185+
186+
status1 = ri1.status;
187+
status2 = ri2.status;
188+
}
189+
190+
if (status1 != 0)
191+
tst_res(TFAIL, "Racer 1 terminated with code %d", status1);
192+
193+
if (status2 != 0)
194+
tst_res(TFAIL, "Racer 2 terminated with code %d", status2);
195+
196+
if (status1 == 0 && status2 == 0)
197+
tst_res(TPASS, "Test completed successfully");
198+
199+
if (fd >= 0)
200+
SAFE_CLOSE(fd);
201+
202+
if (p != MAP_FAILED)
203+
SAFE_MUNMAP(p, hpage_size);
204+
}
205+
206+
static void run_test(void)
207+
{
208+
unsigned long totpages;
209+
long hpage_size;
210+
void *p_sync = MAP_FAILED;
211+
212+
totpages = SAFE_READ_MEMINFO(MEMINFO_HPAGE_FREE);
213+
hpage_size = tst_get_hugepage_size();
214+
215+
tst_res(TINFO, "Instantiating..");
216+
217+
fd_sync = tst_creat_unlinked(MNTPOINT, 0, 0600);
218+
219+
tst_res(TINFO, "Mapping %ld/%ld pages..", totpages - 1, totpages);
220+
p_sync = SAFE_MMAP(NULL, (totpages - 1) * hpage_size, PROT_READ|PROT_WRITE,
221+
MAP_SHARED, fd_sync, 0);
222+
223+
run_race(race_type);
224+
225+
if (fd_sync >= 0)
226+
SAFE_CLOSE(fd_sync);
227+
228+
if (p_sync != MAP_FAILED)
229+
SAFE_MUNMAP(p_sync, (totpages - 1) * hpage_size);
230+
}
231+
232+
static void setup(void)
233+
{
234+
if (str_op) {
235+
if (strcmp(str_op, "shared") == 0)
236+
race_type = MAP_SHARED;
237+
else if (strcmp(str_op, "private") == 0)
238+
race_type = MAP_PRIVATE;
239+
else
240+
tst_brk(TBROK, "Invalid parameter: use -m <private|shared>");
241+
} else {
242+
/* Default to shared if no option is passed */
243+
race_type = MAP_SHARED;
244+
}
245+
}
246+
247+
static void cleanup(void)
248+
{
249+
if (fd_sync >= 0)
250+
SAFE_CLOSE(fd_sync);
251+
252+
if (child1 > 0) {
253+
if (kill(child1, 0) == 0)
254+
SAFE_KILL(child1, SIGKILL);
255+
}
256+
257+
if (child2 > 0) {
258+
if (kill(child2, 0) == 0)
259+
SAFE_KILL(child2, SIGKILL);
260+
}
261+
}
262+
263+
static struct tst_test test = {
264+
.options = (struct tst_option[]) {
265+
{"m:", &str_op, "Type of mmap() mapping <private|shared>"},
266+
{NULL, NULL, NULL}
267+
},
268+
.needs_root = 1,
269+
.mntpoint = MNTPOINT,
270+
.needs_hugetlbfs = 1,
271+
.needs_tmpdir = 1,
272+
.setup = setup,
273+
.cleanup = cleanup,
274+
.test_all = run_test,
275+
.hugepages = {2, TST_NEEDS},
276+
.forks_child = 1,
277+
.needs_checkpoints = 1,
278+
.min_cpus = 2
279+
};

0 commit comments

Comments
 (0)