@@ -50,10 +50,12 @@ class RedisKeys:
5050 LOOP_INITIAL_EVENT = f"{ KEY_PREFIX } :{{app_name}}:initial_event:{{loop_id}}"
5151 LOOP_STATE = f"{ KEY_PREFIX } :{{app_name}}:state:{{loop_id}}"
5252 LOOP_CLAIM = f"{ KEY_PREFIX } :{{app_name}}:claim:{{loop_id}}"
53+ LOOP_WAKE_CLAIM = f"{ KEY_PREFIX } :{{app_name}}:wake_claim:{{loop_id}}"
5354 LOOP_CONTEXT = f"{ KEY_PREFIX } :{{app_name}}:context:{{loop_id}}:{{key}}"
5455 LOOP_NONCE = f"{ KEY_PREFIX } :{{app_name}}:nonce:{{loop_id}}"
5556 LOOP_EVENT_CHANNEL = f"{ KEY_PREFIX } :{{app_name}}:events:{{loop_id}}:notify"
5657 LOOP_WAKE_SCHEDULE = f"{ KEY_PREFIX } :{{app_name}}:wake_schedule"
58+ WAKE_QUEUE = f"{ KEY_PREFIX } :{{app_name}}:wake_queue"
5759 LOOP_MAPPING = f"{ KEY_PREFIX } :{{app_name}}:mapping:{{external_ref_id}}"
5860 LOOP_CONNECTION_INDEX = f"{ KEY_PREFIX } :{{app_name}}:connection_index:{{loop_id}}"
5961 LOOP_CONNECTION_KEY = (
@@ -140,6 +142,7 @@ def __init__(
140142 )
141143
142144 self .wake_queue : Queue [str ] = wake_queue
145+ self ._wake_queue_key = RedisKeys .WAKE_QUEUE .format (app_name = self .app_name )
143146 self ._stop_wake_monitor = threading .Event ()
144147 self .wake_thread : threading .Thread | None = None
145148
@@ -216,34 +219,54 @@ def _process_due_wakes(self, rdb) -> int:
216219 # Process loop wakes
217220 loop_key = RedisKeys .LOOP_WAKE_SCHEDULE .format (app_name = self .app_name )
218221
219- # Debug: check what's pending in the ZSET
220- all_pending = rdb .zrange (loop_key , 0 , - 1 , withscores = True )
221- if all_pending :
222- first_id , first_score = all_pending [0 ]
223- logger .info (
224- f"Pending wakes: key={ loop_key } , count={ len (all_pending )} , "
225- f"first=({ first_id .decode ()} , { first_score } ), now={ now } , "
226- f"due_in={ first_score - now :.1f} s"
227- )
222+ # Keep the wake thread quiet; it runs continuously and can be noisy.
228223
229224 for loop_id_bytes in rdb .zrangebyscore (loop_key , "-inf" , now ):
230225 loop_id = loop_id_bytes .decode ("utf-8" )
231226 if rdb .zrem (loop_key , loop_id ):
232- self .wake_queue .put (loop_id )
233227 processed += 1
234- logger .info (f"Queued wake: { loop_id } " )
228+ logger .info (f"Queued wake: { loop_id } -> redis:{ self ._wake_queue_key } " )
229+ # Redis-backed wake queue so any process/replica can consume.
230+ rdb .rpush (self ._wake_queue_key , loop_id )
235231
236232 # Process workflow wakes
237233 wf_key = RedisKeys .WORKFLOW_WAKE_SCHEDULE .format (app_name = self .app_name )
238234 for wf_id_bytes in rdb .zrangebyscore (wf_key , "-inf" , now ):
239235 wf_id = wf_id_bytes .decode ("utf-8" )
240236 if rdb .zrem (wf_key , wf_id ):
241- self .wake_queue .put (f"{ WORKFLOW_WAKE_PREFIX } { wf_id } " )
242237 processed += 1
243- logger .info (f"Queued workflow wake: { wf_id } " )
238+ payload = f"{ WORKFLOW_WAKE_PREFIX } { wf_id } "
239+ logger .info (
240+ f"Queued workflow wake: { wf_id } -> redis:{ self ._wake_queue_key } "
241+ )
242+ rdb .rpush (self ._wake_queue_key , payload )
244243
245244 return processed
246245
246+ async def drain_wake_queue (
247+ self , * , timeout_s : float , max_items : int = 100
248+ ) -> list [str ]:
249+ """Drain wake events from the Redis-backed wake queue.
250+
251+ Uses BLPOP to wait for a single item, then drains remaining items with LPOP.
252+ """
253+ wakes : list [str ] = []
254+
255+ item = await self .rdb .blpop (self ._wake_queue_key , timeout = timeout_s ) # type: ignore
256+ if not item :
257+ return wakes
258+
259+ _key , value = item
260+ wakes .append (value .decode ("utf-8" ))
261+
262+ for _ in range (max_items - 1 ):
263+ v = await self .rdb .lpop (self ._wake_queue_key ) # type: ignore
264+ if not v :
265+ break
266+ wakes .append (v .decode ("utf-8" ))
267+
268+ return wakes
269+
247270 async def set_loop_mapping (self , external_ref_id : str , loop_id : str ):
248271 await self .rdb .set (
249272 RedisKeys .LOOP_MAPPING .format (
@@ -491,6 +514,14 @@ async def try_claim_loop_recovery(self, loop_id: str) -> bool:
491514 acquired = await self .rdb .set (claim_key , "1" , nx = True , ex = 60 )
492515 return acquired is not None
493516
517+ async def try_claim_loop_wake (self , loop_id : str ) -> bool :
518+ """Atomically try to claim a loop wake. Returns True if this caller won."""
519+ claim_key = RedisKeys .LOOP_WAKE_CLAIM .format (
520+ app_name = self .app_name , loop_id = loop_id
521+ )
522+ acquired = await self .rdb .set (claim_key , "1" , nx = True , ex = 30 )
523+ return acquired is not None
524+
494525 async def get_all_loop_ids (self ) -> set [str ]:
495526 members = await self .rdb .smembers (
496527 RedisKeys .LOOP_INDEX .format (app_name = self .app_name )
@@ -632,7 +663,8 @@ async def push_event(self, loop_id: str, event: "LoopEvent"):
632663 await pipe .execute ()
633664
634665 if event .sender == LoopEventSender .CLIENT :
635- self .wake_queue .put_nowait (loop_id )
666+ # Wake via Redis-backed wake queue so any process/replica can consume it.
667+ await self .rdb .rpush (self ._wake_queue_key , loop_id ) # type: ignore
636668
637669 async def get_context_value (self , loop_id : str , key : str ) -> Any :
638670 value_str = await self .rdb .get (
0 commit comments