diff --git a/meson.build b/meson.build index d7bfd5f3a3311..b8ee5eca4e808 100644 --- a/meson.build +++ b/meson.build @@ -463,6 +463,8 @@ features += {'ppoll': cc.has_function('ppoll', args: '-D_GNU_SOURCE', prefix: '#include ')} features += {'memrchr': cc.has_function('memrchr', args: '-D_GNU_SOURCE', prefix: '#include ')} +features += {'clone': cc.has_header_symbol('sched.h', 'clone', args: '-D_GNU_SOURCE')} +features += {'rfork': cc.has_header_symbol('unistd.h', 'rfork_thread')} optical_devices = { 'windows': 'D:', diff --git a/osdep/compiler.h b/osdep/compiler.h index d3edecdb200c5..fead5dc7ecaf3 100644 --- a/osdep/compiler.h +++ b/osdep/compiler.h @@ -16,11 +16,13 @@ #define MP_FALLTHROUGH __attribute__((fallthrough)) #define MP_WARN_UNUSED_RESULT __attribute__((warn_unused_result)) #define MP_UNUSED __attribute__((unused)) +#define MP_NO_ASAN __attribute__((no_sanitize("address"))) #else #define MP_NORETURN #define MP_FALLTHROUGH do {} while (0) #define MP_WARN_UNUSED_RESULT #define MP_UNUSED +#define MP_NO_ASAN #endif #if defined(__STDC_VERSION__) && (__STDC_VERSION__ < 202311L) && !defined(thread_local) diff --git a/osdep/subprocess-posix.c b/osdep/subprocess-posix.c index d583ab07d0b86..5762058b8eae3 100644 --- a/osdep/subprocess-posix.c +++ b/osdep/subprocess-posix.c @@ -32,6 +32,10 @@ extern char **environ; +#if HAVE_CLONE +#include +#endif + #ifdef SIGRTMAX #define SIGNAL_MAX SIGRTMAX #else @@ -74,6 +78,8 @@ static int as_execvpe(const char *path, const char *file, char *const argv[], // signal handlers first so nothing funny happens. static void reset_signals_child(void) { +#if !HAVE_RFORK + // RFSPAWN has reset all signal actions in the child to default already struct sigaction sa = { 0 }; sigset_t sigmask; sa.sa_handler = SIG_DFL; @@ -82,19 +88,89 @@ static void reset_signals_child(void) for (int nr = 1; nr <= SIGNAL_MAX; nr++) sigaction(nr, &sa, NULL); sigprocmask(SIG_SETMASK, &sigmask, NULL); +#endif } -// Returns 0 on any error, valid PID on success. -// This function must be async-signal-safe, as it may be called from a fork(). -static pid_t spawn_process(const char *path, struct mp_subprocess_opts *opts, - int src_fds[]) +struct child_args { + const char *path; + struct mp_subprocess_opts *opts; + int *src_fds; + void *child_stack; + int pipe_end; + bool failed; + bool detach; +}; + +static pid_t spawn_process_inner(const char *path, struct mp_subprocess_opts *opts, + int src_fds[], bool detach, void *stacks[]); + +// This function is called from a clone(CLONE_VM)/rfork_thread context where +// the child shares the parent's address space. Use MP_NO_ASAN to avoid false +// positives from ASan when the child writes to shared memory. +MP_NO_ASAN static int child_main(void* args) +{ + struct child_args *child_args = args; + const char *path = child_args->path; + struct mp_subprocess_opts *opts = child_args->opts; + int *src_fds = child_args->src_fds; + void *child_stack = child_args->child_stack; + int pipe_end = child_args->pipe_end; + bool detach = child_args->detach; + + if (detach) { + setsid(); + if (!spawn_process_inner(path, opts, src_fds, false, &child_stack)) + goto child_failed; + return 0; + } + + reset_signals_child(); + + for (int n = 0; n < opts->num_fds; n++) { + if (src_fds[n] == opts->fds[n].fd) { + int flags = fcntl(opts->fds[n].fd, F_GETFD); + if (flags == -1) + goto child_failed; + flags &= ~(unsigned)FD_CLOEXEC; + if (fcntl(opts->fds[n].fd, F_SETFD, flags) == -1) + goto child_failed; + } else if (dup2(src_fds[n], opts->fds[n].fd) < 0) { + goto child_failed; + } + } + + as_execvpe(path, opts->exe, opts->args, opts->env ? opts->env : environ); + +child_failed: + child_args->failed = true; + if (pipe_end >= 0) + (void)write(pipe_end, &(char){1}, 1); // shouldn't be able to fail + return 1; +} + +static pid_t spawn_process_inner(const char *path, struct mp_subprocess_opts *opts, + int src_fds[], bool detach, void *stacks[]) { - int p[2] = {-1, -1}; pid_t fres = 0; - sigset_t sigmask, oldmask; - sigfillset(&sigmask); - pthread_sigmask(SIG_BLOCK, &sigmask, &oldmask); + int r = 0; + + struct child_args child_args = { + .path = path, + .opts = opts, + .src_fds = src_fds, + .child_stack = stacks[1], + .pipe_end = -1, + .failed = false, + .detach = detach, + }; + int p[2] = {-1, -1}; + +#if HAVE_RFORK + fres = rfork_thread(RFSPAWN, stacks[0], child_main, &child_args); +#elif HAVE_CLONE + fres = clone(child_main, stacks[0], CLONE_VM | CLONE_VFORK | SIGCHLD, &child_args); +#else // We setup a communication pipe to signal failure. Since the child calls // exec() and becomes the calling process, we don't know if or when the // child process successfully ran exec() just from the PID. @@ -107,57 +183,83 @@ static pid_t spawn_process(const char *path, struct mp_subprocess_opts *opts, int p_flags = fcntl(p[0], F_GETFD); if (p_flags == -1 || !FD_CLOEXEC || !(p_flags & FD_CLOEXEC)) goto done; // require CLOEXEC; unknown if fallback would be worth it + child_args.pipe_end = p[1]; fres = fork(); +#endif + if (fres < 0) { fres = 0; goto done; } - if (fres == 0) { - // child - reset_signals_child(); - - for (int n = 0; n < opts->num_fds; n++) { - if (src_fds[n] == opts->fds[n].fd) { - int flags = fcntl(opts->fds[n].fd, F_GETFD); - if (flags == -1) - goto child_failed; - flags &= ~(unsigned)FD_CLOEXEC; - if (fcntl(opts->fds[n].fd, F_SETFD, flags) == -1) - goto child_failed; - } else if (dup2(src_fds[n], opts->fds[n].fd) < 0) { - goto child_failed; - } - } - - as_execvpe(path, opts->exe, opts->args, opts->env ? opts->env : environ); - child_failed: - (void)write(p[1], &(char){1}, 1); // shouldn't be able to fail - _exit(1); +#if !HAVE_CLONE && !HAVE_RFORK + if (fres == 0) { + _exit(child_main(&child_args)); } SAFE_CLOSE(p[1]); - int r; do { r = read(p[0], &(char){0}, 1); } while (r < 0 && errno == EINTR); +#endif // If exec()ing child failed, collect it immediately. - if (r != 0) { - while (waitpid(fres, &(int){0}, 0) < 0 && errno == EINTR) {} - fres = 0; + if (detach || child_args.failed || r != 0) { + int child_status = 0; + while (waitpid(fres, &child_status, 0) < 0 && errno == EINTR) {} + if (r != 0 || !WIFEXITED(child_status) || WEXITSTATUS(child_status) != 0) + fres = 0; } done: - pthread_sigmask(SIG_SETMASK, &oldmask, NULL); SAFE_CLOSE(p[0]); SAFE_CLOSE(p[1]); return fres; } +// Returns 0 on any error, valid PID on success. +static pid_t spawn_process(const char *path, struct mp_subprocess_opts *opts, + int src_fds[]) +{ + bool detach = opts->detach; + void *stacks[2]; + void *ctx = NULL; + +#if HAVE_CLONE || HAVE_RFORK + // Pre-allocate stacks so spawn_process_inner() remains async-signal-safe. + // The child only runs a few async-signal-safe calls before execve(), so a + // fixed 32 KiB stack is sufficient; we forgo mmap(MAP_STACK) (and thus the + // auto-growth / guard-page behavior it enables on FreeBSD) in favor of + // plain talloc, which is simpler and async-signal-safe at this call site. + const size_t stack_size = 0x8000; + ctx = talloc_new(NULL); + // stack should be aligned to 16 bytes, which is guaranteed by malloc + stacks[0] = (char *)talloc_size(ctx, stack_size) + stack_size; + if (detach) + stacks[1] = (char *)talloc_size(ctx, stack_size) + stack_size; +#endif + +#if !HAVE_RFORK + sigset_t sigmask, oldmask; + + sigfillset(&sigmask); + pthread_sigmask(SIG_BLOCK, &sigmask, &oldmask); +#endif + + pid_t fres = spawn_process_inner(path, opts, src_fds, detach, stacks); + +#if !HAVE_RFORK + pthread_sigmask(SIG_SETMASK, &oldmask, NULL); +#endif + + talloc_free(ctx); + + return fres; +} + void mp_subprocess2(struct mp_subprocess_opts *opts, struct mp_subprocess_result *res) { @@ -216,33 +318,11 @@ void mp_subprocess2(struct mp_subprocess_opts *opts, src_fds[n] = src_fd; } - if (opts->detach) { - // If we run it detached, we fork a child to start the process; then - // it exits immediately, letting PID 1 inherit it. So we don't need - // anything else to collect these child PIDs. - sigset_t sigmask, oldmask; - sigfillset(&sigmask); - pthread_sigmask(SIG_BLOCK, &sigmask, &oldmask); - pid_t fres = fork(); - if (fres < 0) - goto done; - if (fres == 0) { - // child - setsid(); - if (!spawn_process(path, opts, src_fds)) - _exit(1); - _exit(0); - } - pthread_sigmask(SIG_SETMASK, &oldmask, NULL); - int child_status = 0; - while (waitpid(fres, &child_status, 0) < 0 && errno == EINTR) {} - if (!WIFEXITED(child_status) || WEXITSTATUS(child_status) != 0) - goto done; - } else { - pid = spawn_process(path, opts, src_fds); - if (!pid) - goto done; - } + pid = spawn_process(path, opts, src_fds); + if (!pid) + goto done; + if (opts->detach) + pid = 0; spawned = true;