@@ -3,7 +3,7 @@ use crate::common::constants::{CoroutineState, MONITOR_BEAN};
33use crate :: common:: { get_timeout_time, now, CondvarBlocker } ;
44use crate :: coroutine:: listener:: Listener ;
55use crate :: coroutine:: local:: CoroutineLocal ;
6- use crate :: scheduler:: SchedulableSuspender ;
6+ use crate :: scheduler:: { SchedulableCoroutine , SchedulableSuspender } ;
77use crate :: { catch, error, impl_current_for, impl_display_by_debug, info} ;
88#[ cfg( unix) ]
99use nix:: sys:: pthread:: { pthread_kill, pthread_self, Pthread } ;
@@ -78,6 +78,24 @@ impl Monitor {
7878 set. remove ( Signal :: SIGURG ) ;
7979 set. thread_set_mask ( )
8080 . expect ( "Failed to remove SIGURG signal mask!" ) ;
81+ //不抢占处于Syscall状态的协程。
82+ //MonitorListener的设计理念是不对Syscall状态的协程发送信号。
83+ //但由于NOTIFY_NODE移除和monitor线程遍历之间存在竞态条件,
84+ //SIGURG可能在协程刚进入Syscall状态时到达。
85+ //如果此时抢占,协程会被放入syscall_map但无人唤醒(因为没有io_uring/epoll注册),
86+ //导致死锁。
87+ // Skip preemption for coroutines in Syscall state.
88+ // MonitorListener's design is to NOT send signals to Syscall-state
89+ // coroutines. However, a race between NOTIFY_NODE removal and the
90+ // monitor's queue iteration can cause SIGURG to arrive just after
91+ // the coroutine entered Syscall state. If preempted here, the
92+ // coroutine lands in the syscall map with no io_uring/epoll/timer
93+ // registration to wake it, causing a deadlock.
94+ if let Some ( co) = SchedulableCoroutine :: current ( ) {
95+ if matches ! ( co. state( ) , CoroutineState :: Syscall ( ( ) , _, _) ) {
96+ return ;
97+ }
98+ }
8199 if let Some ( suspender) = SchedulableSuspender :: current ( ) {
82100 suspender. suspend ( ) ;
83101 }
@@ -89,7 +107,7 @@ impl Monitor {
89107 // install panic hook
90108 std:: panic:: set_hook ( Box :: new ( |panic_hook_info| {
91109 let syscall = crate :: common:: constants:: SyscallName :: panicking;
92- if let Some ( co) = crate :: scheduler :: SchedulableCoroutine :: current ( ) {
110+ if let Some ( co) = SchedulableCoroutine :: current ( ) {
93111 let new_state = crate :: common:: constants:: SyscallState :: Executing ;
94112 if co. syscall ( ( ) , syscall, new_state) . is_err ( ) {
95113 error ! (
@@ -109,7 +127,7 @@ impl Monitor {
109127 "stack backtrace:\n {}" ,
110128 std:: backtrace:: Backtrace :: force_capture( )
111129 ) ;
112- if let Some ( co) = crate :: scheduler :: SchedulableCoroutine :: current ( ) {
130+ if let Some ( co) = SchedulableCoroutine :: current ( ) {
113131 if co. running ( ) . is_err ( ) {
114132 error ! ( "{} change to running state failed !" , co. name( ) ) ;
115133 }
@@ -523,6 +541,24 @@ extern "C" fn do_preempt() {
523541 // coroutine never yielded (no hooked syscalls) — it is truly CPU-bound.
524542 // Force immediate suspension.
525543 flag. set ( false ) ;
544+ //不抢占处于Syscall状态的协程。
545+ //MonitorListener的设计理念是不对Syscall状态的协程发送信号。
546+ //但由于NOTIFY_NODE移除和monitor线程遍历之间存在竞态条件,
547+ //SIGURG可能在协程刚进入Syscall状态时到达。
548+ //如果此时抢占,协程会被放入syscall_map但无人唤醒(因为没有io_uring/epoll注册),
549+ //导致死锁。
550+ // Skip preemption for coroutines in Syscall state.
551+ // MonitorListener's design is to NOT send signals to Syscall-state
552+ // coroutines. However, a race between NOTIFY_NODE removal and the
553+ // monitor's queue iteration can cause SIGURG to arrive just after
554+ // the coroutine entered Syscall state. If preempted here, the
555+ // coroutine lands in the syscall map with no io_uring/epoll/timer
556+ // registration to wake it, causing a deadlock.
557+ if let Some ( co) = SchedulableCoroutine :: current ( ) {
558+ if matches ! ( co. state( ) , CoroutineState :: Syscall ( ( ) , _, _) ) {
559+ return ;
560+ }
561+ }
526562 if let Some ( suspender) = SchedulableSuspender :: current ( ) {
527563 suspender. suspend ( ) ;
528564 }
0 commit comments