@@ -398,8 +398,6 @@ async def send_cdp(self, method: str, params: Optional[dict] = None) -> dict:
398398 self ._stagehand .logger .debug (
399399 f"CDP command '{ method } ' failed: { e } . Attempting to reconnect..."
400400 )
401- # Try to reconnect
402- await self ._ensure_cdp_session ()
403401 # Handle specific errors if needed (e.g., session closed)
404402 if "Target closed" in str (e ) or "Session closed" in str (e ):
405403 # Attempt to reset the client if the session closed unexpectedly
@@ -441,71 +439,213 @@ async def detach_cdp_client(self):
441439 async def _wait_for_settled_dom (self , timeout_ms : int = None ):
442440 """
443441 Wait for the DOM to settle (stop changing) before proceeding.
442+
443+ **Definition of "settled"**
444+ • No in-flight network requests (except WebSocket / Server-Sent-Events).
445+ • That idle state lasts for at least **500 ms** (the "quiet-window").
446+
447+ **How it works**
448+ 1. Subscribes to CDP Network and Page events for the main target and all
449+ out-of-process iframes (via `Target.setAutoAttach { flatten:true }`).
450+ 2. Every time `Network.requestWillBeSent` fires, the request ID is added
451+ to an **`inflight`** set.
452+ 3. When the request finishes—`loadingFinished`, `loadingFailed`,
453+ `requestServedFromCache`, or a *data:* response—the request ID is
454+ removed.
455+ 4. *Document* requests are also mapped **frameId → requestId**; when
456+ `Page.frameStoppedLoading` fires the corresponding Document request is
457+ removed immediately (covers iframes whose network events never close).
458+ 5. A **stalled-request sweep timer** runs every 500 ms. If a *Document*
459+ request has been open for ≥ 2 s it is forcibly removed; this prevents
460+ ad/analytics iframes from blocking the wait forever.
461+ 6. When `inflight` becomes empty the helper starts a 500 ms timer.
462+ If no new request appears before the timer fires, the promise
463+ resolves → **DOM is considered settled**.
464+ 7. A global guard (`timeoutMs` or `stagehand.domSettleTimeoutMs`,
465+ default ≈ 30 s) ensures we always resolve; if it fires we log how many
466+ requests were still outstanding.
444467
445468 Args:
446469 timeout_ms (int, optional): Maximum time to wait in milliseconds.
447470 If None, uses the stagehand client's dom_settle_timeout_ms.
448471 """
472+ import asyncio
473+ import time
474+
475+ timeout = timeout_ms or getattr (self ._stagehand , "dom_settle_timeout_ms" , 30000 )
476+ client = await self .get_cdp_client ()
477+
478+ # Check if document exists
449479 try :
450- timeout = timeout_ms or getattr (
451- self ._stagehand , "dom_settle_timeout_ms" , 30000
452- )
453- import asyncio
454-
455- # Wait for domcontentloaded first
480+ await self ._page .title ()
481+ except Exception :
456482 await self ._page .wait_for_load_state ("domcontentloaded" )
457-
458- # Create a timeout promise that resolves after the specified time
459- timeout_task = asyncio .create_task (asyncio .sleep (timeout / 1000 ))
460-
461- # Try to check if the DOM has settled
462- try :
463- # Create a task for evaluating the DOM settling
464- eval_task = asyncio .create_task (
465- self ._page .evaluate (
466- """
467- () => {
468- return new Promise((resolve) => {
469- if (typeof window.waitForDomSettle === 'function') {
470- window.waitForDomSettle().then(resolve);
471- } else {
472- console.warn('waitForDomSettle is not defined, considering DOM as settled');
473- resolve();
474- }
475- });
476- }
477- """
478- )
479- )
480-
481- # Create tasks for other ways to determine page readiness
482- dom_task = asyncio .create_task (
483- self ._page .wait_for_load_state ("domcontentloaded" )
484- )
485- body_task = asyncio .create_task (self ._page .wait_for_selector ("body" ))
486-
487- # Wait for the first task to complete
488- done , pending = await asyncio .wait (
489- [eval_task , dom_task , body_task , timeout_task ],
490- return_when = asyncio .FIRST_COMPLETED ,
491- )
492-
493- # Cancel any pending tasks
494- for task in pending :
495- task .cancel ()
496-
497- # If the timeout was hit, log a warning
498- if timeout_task in done :
483+
484+ # Enable CDP domains
485+ await client .send ("Network.enable" )
486+ await client .send ("Page.enable" )
487+ await client .send ("Target.setAutoAttach" , {
488+ "autoAttach" : True ,
489+ "waitForDebuggerOnStart" : False ,
490+ "flatten" : True
491+ })
492+
493+ # Set up tracking structures
494+ inflight = set () # Set of request IDs
495+ meta = {} # Dict of request ID -> {"url": str, "start": float}
496+ doc_by_frame = {} # Dict of frame ID -> request ID
497+
498+ # Event tracking
499+ quiet_timer = None
500+ stalled_request_sweep_task = None
501+ loop = asyncio .get_event_loop ()
502+ done_event = asyncio .Event ()
503+
504+ def clear_quiet ():
505+ nonlocal quiet_timer
506+ if quiet_timer :
507+ quiet_timer .cancel ()
508+ quiet_timer = None
509+
510+ def resolve_done ():
511+ """Cleanup and mark as done"""
512+ clear_quiet ()
513+ if stalled_request_sweep_task and not stalled_request_sweep_task .done ():
514+ stalled_request_sweep_task .cancel ()
515+ done_event .set ()
516+
517+ def maybe_quiet ():
518+ """Start quiet timer if no requests are in flight"""
519+ nonlocal quiet_timer
520+ if len (inflight ) == 0 and not quiet_timer :
521+ quiet_timer = loop .call_later (0.5 , resolve_done )
522+
523+ def finish_req (request_id : str ):
524+ """Mark a request as finished"""
525+ if request_id not in inflight :
526+ return
527+ inflight .remove (request_id )
528+ meta .pop (request_id , None )
529+ # Remove from frame mapping
530+ for fid , rid in list (doc_by_frame .items ()):
531+ if rid == request_id :
532+ doc_by_frame .pop (fid )
533+ clear_quiet ()
534+ maybe_quiet ()
535+
536+ # Event handlers
537+ def on_request (params ):
538+ """Handle Network.requestWillBeSent"""
539+ if params .get ("type" ) in ["WebSocket" , "EventSource" ]:
540+ return
541+
542+ request_id = params ["requestId" ]
543+ inflight .add (request_id )
544+ meta [request_id ] = {
545+ "url" : params ["request" ]["url" ],
546+ "start" : time .time ()
547+ }
548+
549+ if params .get ("type" ) == "Document" and params .get ("frameId" ):
550+ doc_by_frame [params ["frameId" ]] = request_id
551+
552+ clear_quiet ()
553+
554+ def on_finish (params ):
555+ """Handle Network.loadingFinished"""
556+ finish_req (params ["requestId" ])
557+
558+ def on_failed (params ):
559+ """Handle Network.loadingFailed"""
560+ finish_req (params ["requestId" ])
561+
562+ def on_cached (params ):
563+ """Handle Network.requestServedFromCache"""
564+ finish_req (params ["requestId" ])
565+
566+ def on_data_url (params ):
567+ """Handle Network.responseReceived for data: URLs"""
568+ if params .get ("response" , {}).get ("url" , "" ).startswith ("data:" ):
569+ finish_req (params ["requestId" ])
570+
571+ def on_frame_stop (params ):
572+ """Handle Page.frameStoppedLoading"""
573+ frame_id = params ["frameId" ]
574+ if frame_id in doc_by_frame :
575+ finish_req (doc_by_frame [frame_id ])
576+
577+ # Register event handlers
578+ client .on ("Network.requestWillBeSent" , on_request )
579+ client .on ("Network.loadingFinished" , on_finish )
580+ client .on ("Network.loadingFailed" , on_failed )
581+ client .on ("Network.requestServedFromCache" , on_cached )
582+ client .on ("Network.responseReceived" , on_data_url )
583+ client .on ("Page.frameStoppedLoading" , on_frame_stop )
584+
585+ async def sweep_stalled_requests ():
586+ """Remove stalled document requests after 2 seconds"""
587+ while not done_event .is_set ():
588+ await asyncio .sleep (0.5 )
589+ now = time .time ()
590+ for request_id , request_meta in list (meta .items ()):
591+ if now - request_meta ["start" ] > 2.0 :
592+ inflight .discard (request_id )
593+ meta .pop (request_id , None )
594+ self ._stagehand .logger .debug (
595+ "⏳ forcing completion of stalled iframe document" ,
596+ extra = {
597+ "url" : request_meta ["url" ][:120 ]
598+ }
599+ )
600+ maybe_quiet ()
601+
602+ # Start stalled request sweeper
603+ stalled_request_sweep_task = asyncio .create_task (sweep_stalled_requests ())
604+
605+ # Set up timeout guard
606+ async def timeout_guard ():
607+ await asyncio .sleep (timeout / 1000 )
608+ if not done_event .is_set ():
609+ if len (inflight ) > 0 :
499610 self ._stagehand .logger .debug (
500- "DOM settle timeout exceeded, continuing anyway" ,
501- extra = {"timeout_ms" : timeout },
611+ "⚠️ DOM-settle timeout reached – network requests still pending" ,
612+ extra = {
613+ "count" : len (inflight )
614+ }
502615 )
503-
504- except Exception as e :
505- self ._stagehand .logger .debug (f"Error waiting for DOM to settle: { e } " )
506-
507- except Exception as e :
508- self ._stagehand .logger .error (f"Error in _wait_for_settled_dom: { e } " )
616+ resolve_done ()
617+
618+ timeout_task = asyncio .create_task (timeout_guard ())
619+
620+ # Initial check
621+ maybe_quiet ()
622+
623+ try :
624+ # Wait for completion
625+ await done_event .wait ()
626+ finally :
627+ # Cleanup
628+ client .remove_listener ("Network.requestWillBeSent" , on_request )
629+ client .remove_listener ("Network.loadingFinished" , on_finish )
630+ client .remove_listener ("Network.loadingFailed" , on_failed )
631+ client .remove_listener ("Network.requestServedFromCache" , on_cached )
632+ client .remove_listener ("Network.responseReceived" , on_data_url )
633+ client .remove_listener ("Page.frameStoppedLoading" , on_frame_stop )
634+
635+ if quiet_timer :
636+ quiet_timer .cancel ()
637+ if stalled_request_sweep_task and not stalled_request_sweep_task .done ():
638+ stalled_request_sweep_task .cancel ()
639+ try :
640+ await stalled_request_sweep_task
641+ except asyncio .CancelledError :
642+ pass
643+ if timeout_task and not timeout_task .done ():
644+ timeout_task .cancel ()
645+ try :
646+ await timeout_task
647+ except asyncio .CancelledError :
648+ pass
509649
510650 # Forward other Page methods to underlying Playwright page
511651 def __getattr__ (self , name ):
0 commit comments