Pivot: preserve workflow state across run() calls

alliscode · Copilot · alliscode · commit e8dfcc90f988 · 2026-04-28T11:02:15.000-07:00
Replace the prior 'combined message + checkpoint_id in one run()' approach
with a cleaner default: Workflow.run no longer wipes shared state or runner-
context messages between calls. Iteration counting and per-run kwargs still
reset on a fresh-message run; checkpoint and responses runs are continuations
that preserve everything.

This lets a WorkflowAgent be invoked repeatedly on the same instance and
maintain multi-turn context (e.g. accumulated Conversation.messages) without
asking developers to opt in. Hosted-agent multi-turn pattern becomes two
explicit calls: restore-from-checkpoint (drive to idle), then run-with-message.

Key changes:
- _workflow.py: drop _state.clear() and reset_for_new_run() from run().
  Reset iteration count and run kwargs on fresh-message runs only.
  Restore 'Cannot provide both message and checkpoint_id' validation.
  Add async guard: fresh-message run with un-drained pending executor
  messages from a prior run is invalid.
- _runner.py: clear _state before import_state in restore_from_checkpoint
  so restore is authoritative (import_state merges, not replaces).
- _agent.py: revert checkpoint branch to restore-only (no message forward).
- _responses.py (foundry_hosting): two-call host pattern - restore checkpoint
  silently, then run with new user input.
- tests: state-preservation is the new default; rebuild Workflow for clean slate.

Co-authored-by: Copilot &lt;223556219+Copilot@users.noreply.github.com&gt;
diff --git a/python/packages/core/agent_framework/_workflows/_agent.py b/python/packages/core/agent_framework/_workflows/_agent.py
@@ -437,17 +437,15 @@ async def _run_core(
                     yield event
 
         elif checkpoint_id is not None:
-            # Restore the prior workflow state from the checkpoint and, if
-            # there's a new user message in this run, deliver it to the
-            # start executor in the same call. This is the multi-turn
-            # continuation path: shared state (e.g. accumulated conversation
-            # history maintained by the workflow's executors) survives across
-            # turns because Workflow.run sets reset_context=False whenever
-            # checkpoint_id is provided.
-            message_arg: Any | None = list(input_messages) if input_messages else None
+            # Restore the prior workflow state from the checkpoint. Shared
+            # state (e.g. accumulated conversation history maintained by the
+            # workflow's executors) survives across turns because Workflow.run
+            # no longer wipes state per call. Callers who want to deliver a
+            # new user message after restore should make a second
+            # `workflow.run(message=...)` call - they are NOT mutually
+            # exclusive on the same instance, but each must be its own call.
             if streaming:
                 async for event in self.workflow.run(
-                    message=message_arg,
                     stream=True,
                     checkpoint_id=checkpoint_id,
                     checkpoint_storage=checkpoint_storage,
@@ -457,7 +455,6 @@ async def _run_core(
                     yield event
             else:
                 for event in await self.workflow.run(
-                    message=message_arg,
                     checkpoint_id=checkpoint_id,
                     checkpoint_storage=checkpoint_storage,
                     function_invocation_kwargs=function_invocation_kwargs,
diff --git a/python/packages/core/agent_framework/_workflows/_runner.py b/python/packages/core/agent_framework/_workflows/_runner.py
@@ -278,7 +278,12 @@ async def restore_from_checkpoint(
                     "Please rebuild the original workflow before resuming."
                 )
 
-            # Restore state
+            # Restore state. Clear first so import_state (which merges) does
+            # not leak stale keys from a prior run on this Workflow instance.
+            # This matters more now that Workflow.run() no longer wipes state
+            # per call - the only reset point for shared state on a reused
+            # instance is at restore time.
+            self._state.clear()
             self._state.import_state(checkpoint.state)
             # Restore executor states using the restored state
             await self._restore_executor_states()
diff --git a/python/packages/core/agent_framework/_workflows/_workflow.py b/python/packages/core/agent_framework/_workflows/_workflow.py
@@ -299,7 +299,7 @@ def get_executors_list(self) -> list[Executor]:
     async def _run_workflow_with_tracing(
         self,
         initial_executor_fn: Callable[[], Awaitable[None]] | None = None,
-        reset_context: bool = True,
+        is_fresh_message_run: bool = True,
         streaming: bool = False,
         function_invocation_kwargs: Mapping[str, Mapping[str, Any]] | Mapping[str, Any] | None = None,
         client_kwargs: Mapping[str, Mapping[str, Any]] | Mapping[str, Any] | None = None,
@@ -310,13 +310,18 @@ async def _run_workflow_with_tracing(
         of external callers to maintain context across different workflow runs.
 
         Args:
-            initial_executor_fn: Optional function to execute initial executor
-            reset_context: Whether to reset the context for a new run
-            streaming: Whether to enable streaming mode for agents
+            initial_executor_fn: Optional function to execute initial executor.
+            is_fresh_message_run: True when this run is a fresh new turn delivered
+                via the start executor (i.e. ``message`` is provided without a
+                ``checkpoint_id`` or ``responses``). Resets per-run accounting
+                (iteration counter and run kwargs) without touching the shared
+                workflow state. False for checkpoint restores and responses-only
+                runs, which are continuations of prior work.
+            streaming: Whether to enable streaming mode for agents.
             function_invocation_kwargs: Optional kwargs to store in State for function
-                invocations in subagents
+                invocations in subagents.
             client_kwargs: Optional kwargs to store in State for chat client
-                invocations in subagents
+                invocations in subagents.
 
         Yields:
             WorkflowEvent: The events generated during the workflow execution.
@@ -345,16 +350,26 @@ async def _run_workflow_with_tracing(
                     in_progress = WorkflowEvent.status(WorkflowRunState.IN_PROGRESS)
                 yield in_progress  # noqa: RUF070
 
-                # Reset context for a new run if supported
-                if reset_context:
+                # Per-run reset for fresh-message runs only. We deliberately
+                # do NOT clear shared workflow state (`_state.clear()`) or the
+                # runner context's in-flight messages (`reset_for_new_run()`)
+                # here - state and pending work persist across `run()` calls
+                # so that a `WorkflowAgent` can deliver multi-turn input on
+                # the same instance and have prior turns' context survive.
+                # Iteration counting and per-run kwargs ARE per-run though,
+                # so they're reset here.
+                if is_fresh_message_run:
                     self._runner.reset_iteration_count()
-                    self._runner.context.reset_for_new_run()
-                    self._state.clear()
 
                 # Store run kwargs in State so executors can access them.
-                # Only overwrite when new kwargs are explicitly provided or state was
-                # just cleared (fresh run). On continuation (reset_context=False) with
-                # no new kwargs, preserve the kwargs from the original run.
+                # Per-run kwargs semantics:
+                # - On a fresh message run, prior kwargs go away (set to {}
+                #   by default, or to the new kwargs if provided). This
+                #   prevents stale kwargs from a prior turn leaking into the
+                #   current turn.
+                # - On a continuation (checkpoint restore or responses), the
+                #   prior run's kwargs are preserved unless the caller
+                #   explicitly provides new kwargs.
                 if function_invocation_kwargs is not None or client_kwargs is not None:
                     combined_kwargs: dict[str, Any] = {}
                     if function_invocation_kwargs is not None:
@@ -366,11 +381,12 @@ async def _run_workflow_with_tracing(
                             client_kwargs, "client_kwargs"
                         )
                     self._state.set(WORKFLOW_RUN_KWARGS_KEY, combined_kwargs)
-                elif reset_context:
+                elif is_fresh_message_run:
                     self._state.set(WORKFLOW_RUN_KWARGS_KEY, {})
                 self._state.commit()  # Commit immediately so kwargs are available
 
-                # Set streaming mode after reset
+                # Set streaming mode (always set explicitly per run since
+                # reset_for_new_run() no longer runs to clear it).
                 self._runner_context.set_streaming(streaming)
 
                 # Execute initial setup if provided
@@ -443,7 +459,7 @@ async def _execute_with_message_or_checkpoint(
         if message is None and checkpoint_id is None:
             raise ValueError("Must provide either 'message' or 'checkpoint_id'")
 
-        # Handle checkpoint restoration (may be combined with message below)
+        # Handle checkpoint restoration
         if checkpoint_id is not None:
             has_checkpointing = self._runner.context.has_checkpointing()
 
@@ -455,10 +471,8 @@ async def _execute_with_message_or_checkpoint(
 
             await self._runner.restore_from_checkpoint(checkpoint_id, checkpoint_storage)
 
-        # Handle initial message - if combined with a checkpoint_id, this
-        # delivers a continuation message to the workflow's start executor
-        # without clearing prior shared state (reset_context=False).
-        if message is not None:
+        # Handle initial message
+        elif message is not None:
             executor = self.get_start_executor()
             await executor.execute(
                 message,
@@ -587,13 +601,29 @@ async def _run_core(
         if checkpoint_storage is not None:
             self._runner.context.set_runtime_checkpoint_storage(checkpoint_storage)
 
-        initial_executor_fn, reset_context = self._resolve_execution_mode(
+        # Async validation: a fresh-message run (no checkpoint, no responses)
+        # is only allowed when the runner context has fully drained from any
+        # prior run. If it still has in-flight executor messages, the prior
+        # run didn't complete - the caller must either resume from a
+        # checkpoint or wait for the prior run to drain. (Pending request_info
+        # events are intentionally NOT blocked here: a follow-up run with
+        # message=... is the normal way to deliver a response to those
+        # pending requests, e.g. via WorkflowAgent._process_pending_requests.)
+        if message is not None and checkpoint_id is None and responses is None:
+            if await self._runner.context.has_messages():
+                raise RuntimeError(
+                    "Cannot start a new run with 'message' while in-flight executor "
+                    "messages remain from a prior run. Either resume from a checkpoint "
+                    "(checkpoint_id=...) or wait for the prior run to complete."
+                )
+
+        initial_executor_fn = self._resolve_execution_mode(
             message, responses, checkpoint_id, checkpoint_storage
         )
 
         async for event in self._run_workflow_with_tracing(
             initial_executor_fn=initial_executor_fn,
-            reset_context=reset_context,
+            is_fresh_message_run=(message is not None and checkpoint_id is None and responses is None),
             streaming=streaming,
             function_invocation_kwargs=function_invocation_kwargs,
             client_kwargs=client_kwargs,
@@ -662,13 +692,7 @@ def _validate_run_params(
             raise ValueError("Cannot provide both 'message' and 'responses'. Use one or the other.")
 
         if message is not None and checkpoint_id is not None:
-            # Combined message + checkpoint_id is supported: restore prior
-            # workflow state from the checkpoint, then execute the start
-            # executor with the new message. The workflow's shared state
-            # (e.g. accumulated conversation history kept in custom shared
-            # state) is preserved across the boundary because reset_context
-            # is set to False for this combination (see _resolve_execution_mode).
-            pass
+            raise ValueError("Cannot provide both 'message' and 'checkpoint_id'. Use one or the other.")
 
         if message is None and responses is None and checkpoint_id is None:
             raise ValueError(
@@ -682,12 +706,8 @@ def _resolve_execution_mode(
         responses: Mapping[str, Any] | None,
         checkpoint_id: str | None,
         checkpoint_storage: CheckpointStorage | None,
-    ) -> tuple[Callable[[], Awaitable[None]], bool]:
-        """Determine the initial executor function and reset_context flag based on parameters.
-
-        Returns:
-            A tuple of (initial_executor_fn, reset_context).
-        """
+    ) -> Callable[[], Awaitable[None]]:
+        """Determine the initial executor function based on parameters."""
         if responses is not None:
             if checkpoint_id is not None:
                 # Combined: restore checkpoint then send responses
@@ -697,13 +717,12 @@ def _resolve_execution_mode(
             else:
                 # Send responses only (requires pending requests in workflow state)
                 initial_executor_fn = functools.partial(self._send_responses_internal, responses)
-            return initial_executor_fn, False
+            return initial_executor_fn
         # Regular run or checkpoint restoration
         initial_executor_fn = functools.partial(
             self._execute_with_message_or_checkpoint, message, checkpoint_id, checkpoint_storage
         )
-        reset_context = message is not None and checkpoint_id is None
-        return initial_executor_fn, reset_context
+        return initial_executor_fn
 
     async def _restore_and_send_responses(
         self,
diff --git a/python/packages/core/tests/workflow/test_workflow.py b/python/packages/core/tests/workflow/test_workflow.py
@@ -488,8 +488,13 @@ async def handle_message(
         await ctx.yield_output(existing_messages.copy())  # type: ignore
 
 
-async def test_workflow_multiple_runs_no_state_collision():
-    """Test that running the same workflow instance multiple times doesn't have state collision."""
+async def test_workflow_multiple_runs_preserve_state():
+    """Test that running the same workflow instance multiple times preserves shared state.
+
+    State preservation is the new default - calling ``Workflow.run`` repeatedly
+    on the same instance behaves like a chat agent maintaining memory across
+    turns. Callers that want fresh state should rebuild the Workflow.
+    """
     with tempfile.TemporaryDirectory() as temp_dir:
         storage = FileCheckpointStorage(temp_dir)
 
@@ -503,29 +508,45 @@ async def test_workflow_multiple_runs_no_state_collision():
             .build()
         )
 
-        # Run 1: Should only see messages from run 1
+        # Run 1: Single record from run 1
         result1 = await workflow.run(StateTrackingMessage(data="message1", run_id="run1"))
         assert result1.get_final_state() == WorkflowRunState.IDLE
         outputs1 = result1.get_outputs()
         assert outputs1[0] == ["run1:message1"]
 
-        # Run 2: Should only see messages from run 2, not run 1
+        # Run 2: State from run 1 persists; run 2's record appends.
         result2 = await workflow.run(StateTrackingMessage(data="message2", run_id="run2"))
         assert result2.get_final_state() == WorkflowRunState.IDLE
         outputs2 = result2.get_outputs()
-        assert outputs2[0] == ["run2:message2"]  # Should NOT contain run1 data
+        assert outputs2[0] == ["run1:message1", "run2:message2"]
 
-        # Run 3: Should only see messages from run 3
+        # Run 3: Same - all three accumulate.
         result3 = await workflow.run(StateTrackingMessage(data="message3", run_id="run3"))
         assert result3.get_final_state() == WorkflowRunState.IDLE
         outputs3 = result3.get_outputs()
-        assert outputs3[0] == ["run3:message3"]  # Should NOT contain run1 or run2 data
+        assert outputs3[0] == ["run1:message1", "run2:message2", "run3:message3"]
+
+
+async def test_workflow_multiple_runs_no_state_collision_after_rebuild():
+    """Rebuilding the Workflow gives a fresh shared-state slate."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        storage = FileCheckpointStorage(temp_dir)
 
-        # Verify that each run only processed its own message
-        # This confirms that the checkpointable context properly resets between runs
-        assert outputs1[0] != outputs2[0]
-        assert outputs2[0] != outputs3[0]
-        assert outputs1[0] != outputs3[0]
+        def _build():
+            executor = StateTrackingExecutor(id="state_executor")
+            return (
+                WorkflowBuilder(start_executor=executor, checkpoint_storage=storage)
+                .add_edge(executor, executor)
+                .build()
+            )
+
+        wf1 = _build()
+        result1 = await wf1.run(StateTrackingMessage(data="message1", run_id="run1"))
+        assert result1.get_outputs()[0] == ["run1:message1"]
+
+        wf2 = _build()
+        result2 = await wf2.run(StateTrackingMessage(data="message2", run_id="run2"))
+        assert result2.get_outputs()[0] == ["run2:message2"]
 
 
 async def test_workflow_checkpoint_runtime_only_configuration(
@@ -942,13 +963,16 @@ async def test_workflow_run_parameter_validation(simple_executor: Executor) -> N
     result = await workflow.run(test_message)
     assert result.get_final_state() == WorkflowRunState.IDLE
 
-    # Valid: message + checkpoint_id (combined restore + new input)
-    # is supported as of the multi-turn checkpoint continuation work
-    # (restore prior state, then deliver message to start executor with
-    # reset_context=False). Use a fake id - we just need to confirm the
-    # call no longer raises at the validation layer.
-    # Note: passing a non-existent checkpoint_id will fail at restore time,
-    # which is a different code path than the validation we're checking.
+    # Invalid: message + checkpoint_id (mutually exclusive). Multi-turn
+    # state preservation is handled by Workflow.run preserving state across
+    # calls, so the host pattern is two separate calls (restore-then-run),
+    # not a single combined call.
+    with pytest.raises(ValueError, match="Cannot provide both 'message' and 'checkpoint_id'"):
+        await workflow.run(test_message, checkpoint_id="some-checkpoint")
+
+    with pytest.raises(ValueError, match="Cannot provide both 'message' and 'checkpoint_id'"):
+        async for _ in workflow.run(test_message, checkpoint_id="some-checkpoint", stream=True):
+            pass
 
     # Invalid: none of message or checkpoint_id
     with pytest.raises(ValueError, match="Must provide at least one of"):
diff --git a/python/packages/foundry_hosting/agent_framework_foundry_hosting/_responses.py b/python/packages/foundry_hosting/agent_framework_foundry_hosting/_responses.py
@@ -298,12 +298,33 @@ async def _handle_inner_workflow(
         yield response_event_stream.emit_created()
         yield response_event_stream.emit_in_progress()
 
+        # Multi-turn pattern: when we have a prior checkpoint, restore it
+        # first (drive the workflow back to idle with prior state intact),
+        # then make a separate call that delivers the new user input. This
+        # depends on Workflow.run preserving shared state across calls. The
+        # restore-only call may yield events from any pending in-flight
+        # work in the checkpoint; we consume those internally here so they
+        # don't surface to the response stream as duplicates.
+        if latest_checkpoint_id is not None:
+            if is_streaming_request:
+                async for _ in self._agent.run(
+                    stream=True,
+                    checkpoint_id=latest_checkpoint_id,
+                    checkpoint_storage=checkpoint_storage,
+                ):
+                    pass
+            else:
+                await self._agent.run(
+                    stream=False,
+                    checkpoint_id=latest_checkpoint_id,
+                    checkpoint_storage=checkpoint_storage,
+                )
+
         if not is_streaming_request:
-            # Run the agent in non-streaming mode
+            # Run the agent in non-streaming mode with the new user input.
             response = await self._agent.run(
                 input_messages,
                 stream=False,
-                checkpoint_id=latest_checkpoint_id,
                 checkpoint_storage=checkpoint_storage,
             )
 
@@ -320,11 +341,10 @@ async def _handle_inner_workflow(
         # lazily created on matching content, closed when a different type arrives.
         tracker = _OutputItemTracker(response_event_stream)
 
-        # Run the workflow agent in streaming mode
+        # Run the workflow agent in streaming mode with the new user input.
         async for update in self._agent.run(
             input_messages,
             stream=True,
-            checkpoint_id=latest_checkpoint_id,
             checkpoint_storage=checkpoint_storage,
         ):
             for content in update.contents: