diff --git a/runtest/hugetlb b/runtest/hugetlb index 0896d3c941f..bd40a7a30f5 100644 --- a/runtest/hugetlb +++ b/runtest/hugetlb @@ -36,6 +36,7 @@ hugemmap30 hugemmap30 hugemmap31 hugemmap31 hugemmap32 hugemmap32 hugemmap34 hugemmap34 +hugemmap36 hugemmap36 hugemmap05_1 hugemmap05 -m hugemmap05_2 hugemmap05 -s hugemmap05_3 hugemmap05 -s -m diff --git a/testcases/kernel/mem/.gitignore b/testcases/kernel/mem/.gitignore index b4455de51d6..2ddef6bf17f 100644 --- a/testcases/kernel/mem/.gitignore +++ b/testcases/kernel/mem/.gitignore @@ -36,6 +36,7 @@ /hugetlb/hugemmap/hugemmap31 /hugetlb/hugemmap/hugemmap32 /hugetlb/hugemmap/hugemmap34 +/hugetlb/hugemmap/hugemmap36 /hugetlb/hugeshmat/hugeshmat01 /hugetlb/hugeshmat/hugeshmat02 /hugetlb/hugeshmat/hugeshmat03 diff --git a/testcases/kernel/mem/hugetlb/hugemmap/Makefile b/testcases/kernel/mem/hugetlb/hugemmap/Makefile index 6e72e700984..0147929e8ac 100644 --- a/testcases/kernel/mem/hugetlb/hugemmap/Makefile +++ b/testcases/kernel/mem/hugetlb/hugemmap/Makefile @@ -12,3 +12,5 @@ CFLAGS_no_stack_prot := $(filter-out -fstack-clash-protection, $(CFLAGS)) hugemmap06: CFLAGS+=-pthread hugemmap34: CFLAGS=$(CFLAGS_no_stack_prot) +hugemmap36: LDLIBS+=-lpthread +hugemmap36: CFLAGS+=-pthread diff --git a/testcases/kernel/mem/hugetlb/hugemmap/hugemmap36.c b/testcases/kernel/mem/hugetlb/hugemmap/hugemmap36.c new file mode 100644 index 00000000000..3bc858546f7 --- /dev/null +++ b/testcases/kernel/mem/hugetlb/hugemmap/hugemmap36.c @@ -0,0 +1,279 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2005-2006 IBM Corporation + * Author: David Gibson & Adam Litke + */ + +/* + * This test is designed to detect a kernel allocation race introduced + * with hugepage demand-faulting. The problem is that no lock is held + * between allocating a hugepage and instantiating it in the + * pagetables or page cache index. In between the two, the (huge) + * page is cleared, so there's substantial time. Thus two processes + * can race instantiating the (same) last available hugepage - one + * will fail on the allocation, and thus cause an OOM fault even + * though the page it actually wants is being instantiated by the + * other racing process. + */ + +#define _GNU_SOURCE +#include +#include +#include "tst_safe_pthread.h" +#include "hugetlb.h" + +#define MNTPOINT "hugetlbfs/" + +static char *str_op; +static int child1, child2, race_type, fd_sync; + +struct racer_info { + void *p; + int cpu; + int status; +}; + +static int one_racer(void *p, int cpu) +{ + volatile int *pi = p; + cpu_set_t *cpuset; + size_t mask_size; + int err; + + cpuset = CPU_ALLOC(cpu + 1); + if (!cpuset) + tst_brk(TBROK | TERRNO, "CPU_ALLOC() failed"); + + mask_size = CPU_ALLOC_SIZE(cpu + 1); + + /* Split onto different CPUs to encourage the race */ + CPU_ZERO_S(mask_size, cpuset); + CPU_SET_S(cpu, mask_size, cpuset); + + err = sched_setaffinity(getpid(), mask_size, cpuset); + if (err == -1) + tst_brk(TBROK | TERRNO, "sched_setaffinity() failed"); + + /* Wait for parent to signal both racers to start */ + TST_CHECKPOINT_WAIT(0); + + /* Set the shared value */ + *pi = 1; + + CPU_FREE(cpuset); + return 0; +} + +static void proc_racer(void *p, int cpu) +{ + exit(one_racer(p, cpu)); +} + +static void *thread_racer(void *info) +{ + struct racer_info *ri = info; + + ri->status = one_racer(ri->p, ri->cpu); + return ri; +} + +static void check_online_cpus(int online_cpus[], int nr_cpus_needed) +{ + cpu_set_t cpuset; + int total_cpus, cpu_idx; + + CPU_ZERO(&cpuset); + + for (int i = 0; i < CPU_SETSIZE; i++) + CPU_SET(i, &cpuset); + + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) == -1) + tst_brk(TBROK | TERRNO, "sched_setaffinity() reset failed"); + + total_cpus = get_nprocs_conf(); + + if (sched_getaffinity(0, sizeof(cpu_set_t), &cpuset) == -1) + tst_brk(TBROK | TERRNO, "sched_getaffinity() failed"); + + tst_res(TINFO, "Online CPUs needed: %d, available: %d", + nr_cpus_needed, CPU_COUNT(&cpuset)); + + if (CPU_COUNT(&cpuset) < nr_cpus_needed) + tst_brk(TCONF, "At least %d online CPUs are required", nr_cpus_needed); + + cpu_idx = 0; + for (int i = 0; i < total_cpus && cpu_idx < nr_cpus_needed; i++) { + if (CPU_ISSET(i, &cpuset)) + online_cpus[cpu_idx++] = i; + } + + if (cpu_idx < nr_cpus_needed) + tst_brk(TBROK, "Unable to find enough online CPUs"); +} + +static void run_race(int race_type) +{ + int fd = -1; + void *p = MAP_FAILED; + void *tret1, *tret2; + int status1 = 0, status2 = 0; + int online_cpus[2]; + long hpage_size; + pthread_t thread1, thread2; + + check_online_cpus(online_cpus, 2); + + hpage_size = tst_get_hugepage_size(); + + /* Get a new file for the final page */ + fd = tst_creat_unlinked(MNTPOINT, 0, 0600); + tst_res(TINFO, "Mapping final page.."); + + p = SAFE_MMAP(NULL, hpage_size, PROT_READ|PROT_WRITE, race_type, fd, 0); + + if (race_type == MAP_SHARED) { + child1 = SAFE_FORK(); + if (child1 == 0) + proc_racer(p, online_cpus[0]); + + child2 = SAFE_FORK(); + if (child2 == 0) + proc_racer(p, online_cpus[1]); + + /* Wake both children to start the race simultaneously */ + TST_CHECKPOINT_WAKE2(0, 2); + + SAFE_WAITPID(child1, &status1, 0); + tst_res(TINFO, "Child 1 status: %x", status1); + + SAFE_WAITPID(child2, &status2, 0); + tst_res(TINFO, "Child 2 status: %x", status2); + + if (WIFSIGNALED(status1)) + tst_res(TFAIL, "Child 1 killed by signal %s", + strsignal(WTERMSIG(status1))); + if (WIFSIGNALED(status2)) + tst_res(TFAIL, "Child 2 killed by signal %s", + strsignal(WTERMSIG(status2))); + } else { + struct racer_info ri1 = { + .p = p, + .cpu = online_cpus[0], + .status = -1, + }; + struct racer_info ri2 = { + .p = p, + .cpu = online_cpus[1], + .status = -1, + }; + + SAFE_PTHREAD_CREATE(&thread1, NULL, thread_racer, &ri1); + SAFE_PTHREAD_CREATE(&thread2, NULL, thread_racer, &ri2); + + /* Wake both threads to start the race simultaneously */ + TST_CHECKPOINT_WAKE2(0, 2); + + SAFE_PTHREAD_JOIN(thread1, &tret1); + if (tret1 != &ri1) + tst_res(TFAIL, "Thread 1 returned %p not %p, killed?", + tret1, &ri1); + + SAFE_PTHREAD_JOIN(thread2, &tret2); + if (tret2 != &ri2) + tst_res(TFAIL, "Thread 2 returned %p not %p, killed?", + tret2, &ri2); + + status1 = ri1.status; + status2 = ri2.status; + } + + if (status1 != 0) + tst_res(TFAIL, "Racer 1 terminated with code %d", status1); + + if (status2 != 0) + tst_res(TFAIL, "Racer 2 terminated with code %d", status2); + + if (status1 == 0 && status2 == 0) + tst_res(TPASS, "Test completed successfully"); + + if (fd >= 0) + SAFE_CLOSE(fd); + + if (p != MAP_FAILED) + SAFE_MUNMAP(p, hpage_size); +} + +static void run_test(void) +{ + unsigned long totpages; + long hpage_size; + void *p_sync = MAP_FAILED; + + totpages = SAFE_READ_MEMINFO(MEMINFO_HPAGE_FREE); + hpage_size = tst_get_hugepage_size(); + + tst_res(TINFO, "Instantiating.."); + + fd_sync = tst_creat_unlinked(MNTPOINT, 0, 0600); + + tst_res(TINFO, "Mapping %ld/%ld pages..", totpages - 1, totpages); + p_sync = SAFE_MMAP(NULL, (totpages - 1) * hpage_size, PROT_READ|PROT_WRITE, + MAP_SHARED, fd_sync, 0); + + run_race(race_type); + + if (fd_sync >= 0) + SAFE_CLOSE(fd_sync); + + if (p_sync != MAP_FAILED) + SAFE_MUNMAP(p_sync, (totpages - 1) * hpage_size); +} + +static void setup(void) +{ + if (str_op) { + if (strcmp(str_op, "shared") == 0) + race_type = MAP_SHARED; + else if (strcmp(str_op, "private") == 0) + race_type = MAP_PRIVATE; + else + tst_brk(TBROK, "Invalid parameter: use -m "); + } else { + /* Default to shared if no option is passed */ + race_type = MAP_SHARED; + } +} + +static void cleanup(void) +{ + if (fd_sync >= 0) + SAFE_CLOSE(fd_sync); + + if (child1 > 0) { + if (kill(child1, 0) == 0) + SAFE_KILL(child1, SIGKILL); + } + + if (child2 > 0) { + if (kill(child2, 0) == 0) + SAFE_KILL(child2, SIGKILL); + } +} + +static struct tst_test test = { + .options = (struct tst_option[]) { + {"m:", &str_op, "Type of mmap() mapping "}, + {NULL, NULL, NULL} + }, + .needs_root = 1, + .mntpoint = MNTPOINT, + .needs_hugetlbfs = 1, + .needs_tmpdir = 1, + .setup = setup, + .cleanup = cleanup, + .test_all = run_test, + .hugepages = {2, TST_NEEDS}, + .forks_child = 1, + .needs_checkpoints = 1, + .min_cpus = 2 +};