From 2912b6c082c725046cb6236c2e87168a5159c862 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Tue, 1 Oct 2024 22:29:37 +0800 Subject: [PATCH 1/4] subprocess-posix: use clone when available --- meson.build | 1 + osdep/compiler.h | 2 + osdep/subprocess-posix.c | 102 ++++++++++++++++++++++++++++++--------- 3 files changed, 81 insertions(+), 24 deletions(-) diff --git a/meson.build b/meson.build index d7bfd5f3a3311..67070898e4c5e 100644 --- a/meson.build +++ b/meson.build @@ -463,6 +463,7 @@ features += {'ppoll': cc.has_function('ppoll', args: '-D_GNU_SOURCE', prefix: '#include ')} features += {'memrchr': cc.has_function('memrchr', args: '-D_GNU_SOURCE', prefix: '#include ')} +features += {'clone': cc.has_header_symbol('sched.h', 'clone', args: '-D_GNU_SOURCE')} optical_devices = { 'windows': 'D:', diff --git a/osdep/compiler.h b/osdep/compiler.h index d3edecdb200c5..fead5dc7ecaf3 100644 --- a/osdep/compiler.h +++ b/osdep/compiler.h @@ -16,11 +16,13 @@ #define MP_FALLTHROUGH __attribute__((fallthrough)) #define MP_WARN_UNUSED_RESULT __attribute__((warn_unused_result)) #define MP_UNUSED __attribute__((unused)) +#define MP_NO_ASAN __attribute__((no_sanitize("address"))) #else #define MP_NORETURN #define MP_FALLTHROUGH do {} while (0) #define MP_WARN_UNUSED_RESULT #define MP_UNUSED +#define MP_NO_ASAN #endif #if defined(__STDC_VERSION__) && (__STDC_VERSION__ < 202311L) && !defined(thread_local) diff --git a/osdep/subprocess-posix.c b/osdep/subprocess-posix.c index d583ab07d0b86..7160f3cafd7ad 100644 --- a/osdep/subprocess-posix.c +++ b/osdep/subprocess-posix.c @@ -32,6 +32,11 @@ extern char **environ; +#if HAVE_CLONE +#include +#include +#endif + #ifdef SIGRTMAX #define SIGNAL_MAX SIGRTMAX #else @@ -84,17 +89,78 @@ static void reset_signals_child(void) sigprocmask(SIG_SETMASK, &sigmask, NULL); } +struct child_args { + const char *path; + struct mp_subprocess_opts *opts; + int *src_fds; + int pipe_end; + bool failed; +}; + +// This function is called from a clone(CLONE_VM) context where the child +// shares the parent's address space. Use MP_NO_ASAN to avoid false positives +// from ASan when the child writes to shared memory. +MP_NO_ASAN static int child_main(void* args) +{ + struct child_args *child_args = args; + const char *path = child_args->path; + struct mp_subprocess_opts *opts = child_args->opts; + int *src_fds = child_args->src_fds; + int pipe_end = child_args->pipe_end; + + reset_signals_child(); + + for (int n = 0; n < opts->num_fds; n++) { + if (src_fds[n] == opts->fds[n].fd) { + int flags = fcntl(opts->fds[n].fd, F_GETFD); + if (flags == -1) + goto child_failed; + flags &= ~(unsigned)FD_CLOEXEC; + if (fcntl(opts->fds[n].fd, F_SETFD, flags) == -1) + goto child_failed; + } else if (dup2(src_fds[n], opts->fds[n].fd) < 0) { + goto child_failed; + } + } + + as_execvpe(path, opts->exe, opts->args, opts->env ? opts->env : environ); + +child_failed: + child_args->failed = true; + if (pipe_end >= 0) + (void)write(pipe_end, &(char){1}, 1); // shouldn't be able to fail + return 1; +} + // Returns 0 on any error, valid PID on success. // This function must be async-signal-safe, as it may be called from a fork(). static pid_t spawn_process(const char *path, struct mp_subprocess_opts *opts, int src_fds[]) { - int p[2] = {-1, -1}; pid_t fres = 0; sigset_t sigmask, oldmask; + int r = 0; + sigfillset(&sigmask); pthread_sigmask(SIG_BLOCK, &sigmask, &oldmask); + struct child_args child_args = { + .path = path, + .opts = opts, + .src_fds = src_fds, + .pipe_end = -1, + .failed = false, + }; + + int p[2] = {-1, -1}; + +#if HAVE_CLONE + const size_t stack_size = 0x8000; + void* stack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); + if (stack == MAP_FAILED) + goto done; + fres = clone(child_main, (int8_t*)stack + stack_size, CLONE_VM | CLONE_VFORK | SIGCHLD, &child_args); +#else // We setup a communication pipe to signal failure. Since the child calls // exec() and becomes the calling process, we don't know if or when the // child process successfully ran exec() just from the PID. @@ -107,51 +173,39 @@ static pid_t spawn_process(const char *path, struct mp_subprocess_opts *opts, int p_flags = fcntl(p[0], F_GETFD); if (p_flags == -1 || !FD_CLOEXEC || !(p_flags & FD_CLOEXEC)) goto done; // require CLOEXEC; unknown if fallback would be worth it + child_args.pipe_end = p[1]; fres = fork(); +#endif + if (fres < 0) { fres = 0; goto done; } - if (fres == 0) { - // child - reset_signals_child(); - - for (int n = 0; n < opts->num_fds; n++) { - if (src_fds[n] == opts->fds[n].fd) { - int flags = fcntl(opts->fds[n].fd, F_GETFD); - if (flags == -1) - goto child_failed; - flags &= ~(unsigned)FD_CLOEXEC; - if (fcntl(opts->fds[n].fd, F_SETFD, flags) == -1) - goto child_failed; - } else if (dup2(src_fds[n], opts->fds[n].fd) < 0) { - goto child_failed; - } - } - as_execvpe(path, opts->exe, opts->args, opts->env ? opts->env : environ); - - child_failed: - (void)write(p[1], &(char){1}, 1); // shouldn't be able to fail - _exit(1); +#if !HAVE_CLONE + if (fres == 0) { + _exit(child_main(&child_args)); } SAFE_CLOSE(p[1]); - int r; do { r = read(p[0], &(char){0}, 1); } while (r < 0 && errno == EINTR); +#endif // If exec()ing child failed, collect it immediately. - if (r != 0) { + if (child_args.failed || r != 0) { while (waitpid(fres, &(int){0}, 0) < 0 && errno == EINTR) {} fres = 0; } done: pthread_sigmask(SIG_SETMASK, &oldmask, NULL); +#if HAVE_CLONE + munmap(stack, stack_size); +#endif SAFE_CLOSE(p[0]); SAFE_CLOSE(p[1]); From 43208de3dd65e0ce216bc2e0f514230e824aff6c Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Wed, 2 Oct 2024 00:26:28 +0800 Subject: [PATCH 2/4] subprocess-posix: handle detach inside spawn_process --- osdep/subprocess-posix.c | 80 ++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 39 deletions(-) diff --git a/osdep/subprocess-posix.c b/osdep/subprocess-posix.c index 7160f3cafd7ad..c85e6a2369b55 100644 --- a/osdep/subprocess-posix.c +++ b/osdep/subprocess-posix.c @@ -95,8 +95,12 @@ struct child_args { int *src_fds; int pipe_end; bool failed; + bool detach; }; +static pid_t spawn_process_inner(const char *path, struct mp_subprocess_opts *opts, + int src_fds[], bool detach); + // This function is called from a clone(CLONE_VM) context where the child // shares the parent's address space. Use MP_NO_ASAN to avoid false positives // from ASan when the child writes to shared memory. @@ -107,6 +111,14 @@ MP_NO_ASAN static int child_main(void* args) struct mp_subprocess_opts *opts = child_args->opts; int *src_fds = child_args->src_fds; int pipe_end = child_args->pipe_end; + bool detach = child_args->detach; + + if (detach) { + setsid(); + if (!spawn_process_inner(path, opts, src_fds, false)) + goto child_failed; + return 0; + } reset_signals_child(); @@ -132,24 +144,19 @@ MP_NO_ASAN static int child_main(void* args) return 1; } -// Returns 0 on any error, valid PID on success. -// This function must be async-signal-safe, as it may be called from a fork(). -static pid_t spawn_process(const char *path, struct mp_subprocess_opts *opts, - int src_fds[]) +static pid_t spawn_process_inner(const char *path, struct mp_subprocess_opts *opts, + int src_fds[], bool detach) { pid_t fres = 0; - sigset_t sigmask, oldmask; int r = 0; - sigfillset(&sigmask); - pthread_sigmask(SIG_BLOCK, &sigmask, &oldmask); - struct child_args child_args = { .path = path, .opts = opts, .src_fds = src_fds, .pipe_end = -1, .failed = false, + .detach = detach, }; int p[2] = {-1, -1}; @@ -196,13 +203,14 @@ static pid_t spawn_process(const char *path, struct mp_subprocess_opts *opts, #endif // If exec()ing child failed, collect it immediately. - if (child_args.failed || r != 0) { - while (waitpid(fres, &(int){0}, 0) < 0 && errno == EINTR) {} - fres = 0; + if (detach || child_args.failed || r != 0) { + int child_status = 0; + while (waitpid(fres, &child_status, 0) < 0 && errno == EINTR) {} + if (r != 0 || !WIFEXITED(child_status) || WEXITSTATUS(child_status) != 0) + fres = 0; } done: - pthread_sigmask(SIG_SETMASK, &oldmask, NULL); #if HAVE_CLONE munmap(stack, stack_size); #endif @@ -212,6 +220,22 @@ static pid_t spawn_process(const char *path, struct mp_subprocess_opts *opts, return fres; } +// Returns 0 on any error, valid PID on success. +static pid_t spawn_process(const char *path, struct mp_subprocess_opts *opts, + int src_fds[]) +{ + sigset_t sigmask, oldmask; + + sigfillset(&sigmask); + pthread_sigmask(SIG_BLOCK, &sigmask, &oldmask); + + pid_t fres = spawn_process_inner(path, opts, src_fds, opts->detach); + + pthread_sigmask(SIG_SETMASK, &oldmask, NULL); + + return fres; +} + void mp_subprocess2(struct mp_subprocess_opts *opts, struct mp_subprocess_result *res) { @@ -270,33 +294,11 @@ void mp_subprocess2(struct mp_subprocess_opts *opts, src_fds[n] = src_fd; } - if (opts->detach) { - // If we run it detached, we fork a child to start the process; then - // it exits immediately, letting PID 1 inherit it. So we don't need - // anything else to collect these child PIDs. - sigset_t sigmask, oldmask; - sigfillset(&sigmask); - pthread_sigmask(SIG_BLOCK, &sigmask, &oldmask); - pid_t fres = fork(); - if (fres < 0) - goto done; - if (fres == 0) { - // child - setsid(); - if (!spawn_process(path, opts, src_fds)) - _exit(1); - _exit(0); - } - pthread_sigmask(SIG_SETMASK, &oldmask, NULL); - int child_status = 0; - while (waitpid(fres, &child_status, 0) < 0 && errno == EINTR) {} - if (!WIFEXITED(child_status) || WEXITSTATUS(child_status) != 0) - goto done; - } else { - pid = spawn_process(path, opts, src_fds); - if (!pid) - goto done; - } + pid = spawn_process(path, opts, src_fds); + if (!pid) + goto done; + if (opts->detach) + pid = 0; spawned = true; From f7ff685bb7e007310a5ae3cb51a77fc3f8e2801c Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Wed, 2 Oct 2024 01:21:56 +0800 Subject: [PATCH 3/4] subprocess-posix: use rfork_thread on FreeBSD --- meson.build | 1 + osdep/subprocess-posix.c | 24 ++++++++++++++++++------ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/meson.build b/meson.build index 67070898e4c5e..b8ee5eca4e808 100644 --- a/meson.build +++ b/meson.build @@ -464,6 +464,7 @@ features += {'ppoll': cc.has_function('ppoll', args: '-D_GNU_SOURCE', features += {'memrchr': cc.has_function('memrchr', args: '-D_GNU_SOURCE', prefix: '#include ')} features += {'clone': cc.has_header_symbol('sched.h', 'clone', args: '-D_GNU_SOURCE')} +features += {'rfork': cc.has_header_symbol('unistd.h', 'rfork_thread')} optical_devices = { 'windows': 'D:', diff --git a/osdep/subprocess-posix.c b/osdep/subprocess-posix.c index c85e6a2369b55..ae8a4be475b6b 100644 --- a/osdep/subprocess-posix.c +++ b/osdep/subprocess-posix.c @@ -79,6 +79,8 @@ static int as_execvpe(const char *path, const char *file, char *const argv[], // signal handlers first so nothing funny happens. static void reset_signals_child(void) { +#if !HAVE_RFORK + // RFSPAWN has reset all signal actions in the child to default already struct sigaction sa = { 0 }; sigset_t sigmask; sa.sa_handler = SIG_DFL; @@ -87,6 +89,7 @@ static void reset_signals_child(void) for (int nr = 1; nr <= SIGNAL_MAX; nr++) sigaction(nr, &sa, NULL); sigprocmask(SIG_SETMASK, &sigmask, NULL); +#endif } struct child_args { @@ -101,9 +104,9 @@ struct child_args { static pid_t spawn_process_inner(const char *path, struct mp_subprocess_opts *opts, int src_fds[], bool detach); -// This function is called from a clone(CLONE_VM) context where the child -// shares the parent's address space. Use MP_NO_ASAN to avoid false positives -// from ASan when the child writes to shared memory. +// This function is called from a clone(CLONE_VM)/rfork_thread context where +// the child shares the parent's address space. Use MP_NO_ASAN to avoid false +// positives from ASan when the child writes to shared memory. MP_NO_ASAN static int child_main(void* args) { struct child_args *child_args = args; @@ -161,11 +164,16 @@ static pid_t spawn_process_inner(const char *path, struct mp_subprocess_opts *op int p[2] = {-1, -1}; -#if HAVE_CLONE +#if HAVE_CLONE || HAVE_RFORK const size_t stack_size = 0x8000; void* stack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); if (stack == MAP_FAILED) goto done; +#endif + +#if HAVE_RFORK + fres = rfork_thread(RFSPAWN, (int8_t*)stack + stack_size, child_main, &child_args); +#elif HAVE_CLONE fres = clone(child_main, (int8_t*)stack + stack_size, CLONE_VM | CLONE_VFORK | SIGCHLD, &child_args); #else // We setup a communication pipe to signal failure. Since the child calls @@ -190,7 +198,7 @@ static pid_t spawn_process_inner(const char *path, struct mp_subprocess_opts *op goto done; } -#if !HAVE_CLONE +#if !HAVE_CLONE && !HAVE_RFORK if (fres == 0) { _exit(child_main(&child_args)); } @@ -211,7 +219,7 @@ static pid_t spawn_process_inner(const char *path, struct mp_subprocess_opts *op } done: -#if HAVE_CLONE +#if HAVE_CLONE || HAVE_RFORK munmap(stack, stack_size); #endif SAFE_CLOSE(p[0]); @@ -224,14 +232,18 @@ static pid_t spawn_process_inner(const char *path, struct mp_subprocess_opts *op static pid_t spawn_process(const char *path, struct mp_subprocess_opts *opts, int src_fds[]) { +#if !HAVE_RFORK sigset_t sigmask, oldmask; sigfillset(&sigmask); pthread_sigmask(SIG_BLOCK, &sigmask, &oldmask); +#endif pid_t fres = spawn_process_inner(path, opts, src_fds, opts->detach); +#if !HAVE_RFORK pthread_sigmask(SIG_SETMASK, &oldmask, NULL); +#endif return fres; } From ed8bc7cec4cad651b1e098a82c7db111c7469d01 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Wed, 2 Oct 2024 10:51:53 +0000 Subject: [PATCH 4/4] subprocess-posix: avoid using mmap --- osdep/subprocess-posix.c | 46 +++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/osdep/subprocess-posix.c b/osdep/subprocess-posix.c index ae8a4be475b6b..5762058b8eae3 100644 --- a/osdep/subprocess-posix.c +++ b/osdep/subprocess-posix.c @@ -34,7 +34,6 @@ extern char **environ; #if HAVE_CLONE #include -#include #endif #ifdef SIGRTMAX @@ -96,13 +95,14 @@ struct child_args { const char *path; struct mp_subprocess_opts *opts; int *src_fds; + void *child_stack; int pipe_end; bool failed; bool detach; }; static pid_t spawn_process_inner(const char *path, struct mp_subprocess_opts *opts, - int src_fds[], bool detach); + int src_fds[], bool detach, void *stacks[]); // This function is called from a clone(CLONE_VM)/rfork_thread context where // the child shares the parent's address space. Use MP_NO_ASAN to avoid false @@ -113,12 +113,13 @@ MP_NO_ASAN static int child_main(void* args) const char *path = child_args->path; struct mp_subprocess_opts *opts = child_args->opts; int *src_fds = child_args->src_fds; + void *child_stack = child_args->child_stack; int pipe_end = child_args->pipe_end; bool detach = child_args->detach; if (detach) { setsid(); - if (!spawn_process_inner(path, opts, src_fds, false)) + if (!spawn_process_inner(path, opts, src_fds, false, &child_stack)) goto child_failed; return 0; } @@ -148,7 +149,7 @@ MP_NO_ASAN static int child_main(void* args) } static pid_t spawn_process_inner(const char *path, struct mp_subprocess_opts *opts, - int src_fds[], bool detach) + int src_fds[], bool detach, void *stacks[]) { pid_t fres = 0; int r = 0; @@ -157,6 +158,7 @@ static pid_t spawn_process_inner(const char *path, struct mp_subprocess_opts *op .path = path, .opts = opts, .src_fds = src_fds, + .child_stack = stacks[1], .pipe_end = -1, .failed = false, .detach = detach, @@ -164,17 +166,10 @@ static pid_t spawn_process_inner(const char *path, struct mp_subprocess_opts *op int p[2] = {-1, -1}; -#if HAVE_CLONE || HAVE_RFORK - const size_t stack_size = 0x8000; - void* stack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); - if (stack == MAP_FAILED) - goto done; -#endif - #if HAVE_RFORK - fres = rfork_thread(RFSPAWN, (int8_t*)stack + stack_size, child_main, &child_args); + fres = rfork_thread(RFSPAWN, stacks[0], child_main, &child_args); #elif HAVE_CLONE - fres = clone(child_main, (int8_t*)stack + stack_size, CLONE_VM | CLONE_VFORK | SIGCHLD, &child_args); + fres = clone(child_main, stacks[0], CLONE_VM | CLONE_VFORK | SIGCHLD, &child_args); #else // We setup a communication pipe to signal failure. Since the child calls // exec() and becomes the calling process, we don't know if or when the @@ -219,9 +214,6 @@ static pid_t spawn_process_inner(const char *path, struct mp_subprocess_opts *op } done: -#if HAVE_CLONE || HAVE_RFORK - munmap(stack, stack_size); -#endif SAFE_CLOSE(p[0]); SAFE_CLOSE(p[1]); @@ -232,6 +224,24 @@ static pid_t spawn_process_inner(const char *path, struct mp_subprocess_opts *op static pid_t spawn_process(const char *path, struct mp_subprocess_opts *opts, int src_fds[]) { + bool detach = opts->detach; + void *stacks[2]; + void *ctx = NULL; + +#if HAVE_CLONE || HAVE_RFORK + // Pre-allocate stacks so spawn_process_inner() remains async-signal-safe. + // The child only runs a few async-signal-safe calls before execve(), so a + // fixed 32 KiB stack is sufficient; we forgo mmap(MAP_STACK) (and thus the + // auto-growth / guard-page behavior it enables on FreeBSD) in favor of + // plain talloc, which is simpler and async-signal-safe at this call site. + const size_t stack_size = 0x8000; + ctx = talloc_new(NULL); + // stack should be aligned to 16 bytes, which is guaranteed by malloc + stacks[0] = (char *)talloc_size(ctx, stack_size) + stack_size; + if (detach) + stacks[1] = (char *)talloc_size(ctx, stack_size) + stack_size; +#endif + #if !HAVE_RFORK sigset_t sigmask, oldmask; @@ -239,12 +249,14 @@ static pid_t spawn_process(const char *path, struct mp_subprocess_opts *opts, pthread_sigmask(SIG_BLOCK, &sigmask, &oldmask); #endif - pid_t fres = spawn_process_inner(path, opts, src_fds, opts->detach); + pid_t fres = spawn_process_inner(path, opts, src_fds, detach, stacks); #if !HAVE_RFORK pthread_sigmask(SIG_SETMASK, &oldmask, NULL); #endif + talloc_free(ctx); + return fres; }