Skip to content

Commit f0bc1ae

Browse files
committed
Route debug-mode activation via call_soon to fix Python 3.14 task nesting
The prior approach called workflow.activate(act) synchronously from inside _handle_activation's coroutine. Python 3.14 raised: RuntimeError: Cannot enter into task <workflow run task> while another task <_handle_activation> is being executed. 3.14 tightened asyncio's task-step validation: a task cannot be entered while another task on the same thread is mid-step. Our dispatch task was mid-step when workflow.activate's internal _run_once tried to step the workflow's own run task. Fix: schedule workflow.activate as a `loop.call_soon` callback and await a future the callback completes. The await suspends the dispatch task (no longer mid-step), the callback runs as part of the loop's ready-handle iteration (not inside any task), and the workflow's task stepping then proceeds with no task currently being stepped. The main loop is still blocked while the activation runs synchronously, which is the intended single-stepping debug behavior. Also drops the temporary 2-minute task_timeout from the test - that was a wrong-direction guess at a symptom; the real issue was the structural nested-task error. Verified locally: - Python 3.14: all 3 tests pass (previously failed deterministically) - Python 3.13: all 3 tests pass (regression check) - tests/worker/test_worker.py: 18 passed, 11 skipped, no regressions
1 parent b6efad5 commit f0bc1ae

2 files changed

Lines changed: 51 additions & 26 deletions

File tree

temporalio/worker/_workflow.py

Lines changed: 51 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,37 @@ async def drain_poll_queue(self) -> None:
271271
except PollShutdownError:
272272
return
273273

274+
async def _activate_inline_for_debug(
275+
self,
276+
loop: asyncio.AbstractEventLoop,
277+
workflow: _RunningWorkflow,
278+
act: temporalio.bridge.proto.workflow_activation.WorkflowActivation,
279+
) -> temporalio.bridge.proto.workflow_completion.WorkflowActivationCompletion:
280+
# Run `workflow.activate(act)` synchronously, but indirected through
281+
# `loop.call_soon` so it executes outside the dispatch task's __step()
282+
# context. Python 3.14's asyncio refuses to enter a task while another
283+
# task on the same thread is mid-step; awaiting the future below
284+
# suspends this dispatch task so the workflow's own task stepping does
285+
# not collide with it.
286+
future: asyncio.Future = loop.create_future()
287+
288+
def run_inline() -> None:
289+
# The workflow's _run_once clears the running-loop registration to
290+
# None when it exits. Save the main loop here and restore it after
291+
# the activation so any code that runs subsequently still sees the
292+
# right loop.
293+
main_loop = asyncio._get_running_loop()
294+
try:
295+
completion = workflow.activate(act)
296+
future.set_result(completion)
297+
except BaseException as e:
298+
future.set_exception(e)
299+
finally:
300+
asyncio._set_running_loop(main_loop)
301+
302+
loop.call_soon(run_inline)
303+
return await future
304+
274305
async def _handle_activation(
275306
self, act: temporalio.bridge.proto.workflow_activation.WorkflowActivation
276307
) -> None:
@@ -362,16 +393,20 @@ async def _handle_activation(
362393

363394
if self._debug_mode:
364395
# Run activation inline on the asyncio main thread so
365-
# interactive debuggers can read stdin. The loop blocks for
366-
# the duration of the activation; this is the intended
367-
# behavior when debugging.
368-
# Save/restore the main loop registration because the
369-
# workflow's custom event loop clears it inside _run_once.
370-
main_loop = asyncio.get_running_loop()
371-
try:
372-
completion = workflow.activate(act)
373-
finally:
374-
asyncio._set_running_loop(main_loop)
396+
# interactive debuggers (pdb, breakpoint(), IDE debuggers)
397+
# can read stdin. We schedule the synchronous activation as
398+
# a `call_soon` callback rather than calling it directly,
399+
# so the dispatch task suspends at the `await` below and is
400+
# no longer mid-`__step()` when the workflow's own task
401+
# gets stepped inside `workflow.activate`. Python 3.14's
402+
# asyncio refuses to enter a task while another task on
403+
# the same thread is currently being executed; the
404+
# call_soon detour avoids that nesting. The main loop is
405+
# still blocked while the activation runs, which is the
406+
# intended behavior when debugging.
407+
completion = await self._activate_inline_for_debug(
408+
asyncio.get_running_loop(), workflow, act
409+
)
375410
else:
376411
# Run activation in separate thread so we can check if it's
377412
# deadlocked
@@ -620,15 +655,12 @@ async def _handle_cache_eviction(
620655
while True:
621656
try:
622657
if self._debug_mode:
623-
# Inline eviction activation for debug mode.
624-
# Save/restore the main loop registration because
625-
# the workflow's custom event loop clears it inside
626-
# _run_once.
627-
main_loop = asyncio.get_running_loop()
628-
try:
629-
workflow.activate(act)
630-
finally:
631-
asyncio._set_running_loop(main_loop)
658+
# Eviction activation runs inline on the main thread
659+
# too. See `_activate_inline_for_debug` for why the
660+
# `call_soon` detour is needed on Python 3.14+.
661+
await self._activate_inline_for_debug(
662+
asyncio.get_running_loop(), workflow, act
663+
)
632664
else:
633665
# We only create the eviction task if we haven't already or
634666
# it is done. This is because if it already is running and

tests/worker/test_breakpoint_hang.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
import sys
1818
import threading
1919
import uuid
20-
from datetime import timedelta
2120

2221
from temporalio import workflow
2322
from temporalio.client import Client
@@ -79,12 +78,6 @@ async def test_workflow_runs_on_main_thread_in_debug_mode(client: Client):
7978
ThreadCaptureWorkflow.run,
8079
id=f"wf-{uuid.uuid4()}",
8180
task_queue=task_queue,
82-
# Conservative task_timeout to tolerate CI contention. The inline
83-
# activation blocks the asyncio loop briefly; under heavy parallel
84-
# CI load this can race the server's default workflow task timeout
85-
# and cause task reassignment. Temporary mitigation while we
86-
# diagnose the CI-specific hang against the bundled dev server.
87-
task_timeout=timedelta(minutes=2),
8881
)
8982

9083
main_name = threading.main_thread().name

0 commit comments

Comments
 (0)