From 67a7818605f84c463865d510f763f389270cc1a7 Mon Sep 17 00:00:00 2001 From: xwin <57826089+xwinwin@users.noreply.github.com> Date: Tue, 28 Apr 2026 08:48:55 +0800 Subject: [PATCH] server: fix stopping_thread hang on child process exit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In router mode, stopping_thread uses subprocess_alive() as the predicate for cv_stop.wait(). subprocess_alive() calls waitpid(WNOHANG) which reaps the child process, setting process->child to 0. This destructive check means the function can only detect the alive→dead transition once — after the first call, subsequent calls return the same value with no state change to trigger a CV wake-up. This commonly manifests when a child process is OOM-killed (e.g., during model loading or inference). The main thread notifies cv_stop once after log_thread.join(). If stopping_thread wakes before waitpid has detected the exit, the predicate evaluates to false and the thread goes back to waiting with no further notifications, causing a permanent hang that blocks model reload. Fix by introducing std::atomic child_finished to bypass the subprocess_alive() check as a CV predicate. The main thread sets this flag after log_thread.join() (child stdout EOF) and notifies cv_stop. stopping_thread now wakes on either an explicit stop request or this atomic flag, completely decoupling thread synchronization from the destructive subprocess_alive() behavior. Also guard subprocess_terminate() with process->child <= 0 to prevent kill(0, 9) which would send SIGKILL to the entire process group. --- tools/server/server-models.cpp | 45 ++++++++++++++++++++++------------ vendor/sheredom/subprocess.h | 5 ++++ 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp index db6cbce8f9d8..6ead608a4ffb 100644 --- a/tools/server/server-models.cpp +++ b/tools/server/server-models.cpp @@ -626,56 +626,71 @@ void server_models::load(const std::string & name) { } }); + std::atomic child_finished(false); + std::thread stopping_thread([&]() { - // thread to monitor stopping signal OR child crash + // thread to monitor explicit stop requests; child exit is signalled via child_finished auto is_stopping = [this, &name]() { return this->stopping_models.find(name) != this->stopping_models.end(); }; - auto should_wake = [&]() { - return is_stopping() || !subprocess_alive(child_proc.get()); - }; + { std::unique_lock lk(this->mutex); - this->cv_stop.wait(lk, should_wake); + this->cv_stop.wait(lk, [&]() { + return is_stopping() || child_finished.load(std::memory_order_acquire); + }); + if (!is_stopping() || child_finished.load(std::memory_order_acquire)) { + return; + } } - // child may have already exited (e.g. crashed) — skip shutdown sequence - if (!subprocess_alive(child_proc.get())) { + + // child may have already exited between wake-up and shutdown handling + if (subprocess_alive(child_proc.get()) <= 0) { return; } + SRV_INF("stopping model instance name=%s\n", name.c_str()); - // send interrupt to child process fprintf(stdin_file, "%s\n", CMD_ROUTER_TO_CHILD_EXIT); fflush(stdin_file); - // wait to stop gracefully or timeout + int64_t start_time = ggml_time_ms(); while (true) { + if (subprocess_alive(child_proc.get()) <= 0) { + return; + } + std::unique_lock lk(this->mutex); - if (!is_stopping()) { - return; // already stopped + if (!is_stopping() || child_finished.load(std::memory_order_acquire)) { + return; } + int64_t elapsed = ggml_time_ms() - start_time; if (elapsed >= stop_timeout * 1000) { - // timeout, force kill + lk.unlock(); SRV_WRN("force-killing model instance name=%s after %d seconds timeout\n", name.c_str(), stop_timeout); subprocess_terminate(child_proc.get()); return; } - this->cv_stop.wait_for(lk, std::chrono::seconds(1)); + + this->cv_stop.wait_for(lk, std::chrono::seconds(1), [&]() { + return !is_stopping() || child_finished.load(std::memory_order_acquire); + }); } }); - // we reach here when the child process exits + // note: we cannot join() prior to this point because it will close stdin_file if (log_thread.joinable()) { log_thread.join(); } - // stop the timeout monitoring thread + child_finished.store(true, std::memory_order_release); { std::lock_guard lk(this->mutex); stopping_models.erase(name); cv_stop.notify_all(); } + if (stopping_thread.joinable()) { stopping_thread.join(); } diff --git a/vendor/sheredom/subprocess.h b/vendor/sheredom/subprocess.h index 3e40bae046a2..f6f93dfec760 100644 --- a/vendor/sheredom/subprocess.h +++ b/vendor/sheredom/subprocess.h @@ -1051,6 +1051,11 @@ int subprocess_terminate(struct subprocess_s *const process) { return success_terminate; #else int result; + + if (process->child <= 0) { + return -1; + } + result = kill(process->child, 9); return result; #endif