Store CodeAgent code outputs in ActionStep (TransformerOptimus#1463)

aymeric-roucher · albertvillanova · web-flow · commit 12fa56cd354b · 2025-06-24T09:52:54.000+02:00
Co-authored-by: Albert Villanova del Moral &lt;8515462+albertvillanova@users.noreply.github.com&gt;
diff --git a/src/smolagents/agents.py b/src/smolagents/agents.py
@@ -1668,6 +1668,7 @@ def _step_stream(self, memory_step: ActionStep) -> Generator[ChatMessageStreamDe
             else:
                 code_action = parse_code_blobs(output_text)
             code_action = fix_final_answer_code(code_action)
+            memory_step.code_action = code_action
         except Exception as e:
             error_msg = f"Error in code parsing:\n{e}\nMake sure to provide correct code blobs."
             raise AgentParsingError(error_msg, self.logger)
diff --git a/src/smolagents/memory.py b/src/smolagents/memory.py
@@ -52,6 +52,7 @@ class ActionStep(MemoryStep):
     error: AgentError | None = None
     model_output_message: ChatMessage | None = None
     model_output: str | None = None
+    code_action: str | None = None
     observations: str | None = None
     observations_images: list["PIL.Image.Image"] | None = None
     action_output: Any = None
@@ -68,6 +69,7 @@ def dict(self):
             "error": self.error.dict() if self.error else None,
             "model_output_message": self.model_output_message.dict() if self.model_output_message else None,
             "model_output": self.model_output,
+            "code_action": self.code_action,
             "observations": self.observations,
             "observations_images": [image.tobytes() for image in self.observations_images]
             if self.observations_images
@@ -245,5 +247,11 @@ def replay(self, logger: AgentLogger, detailed: bool = False):
                     logger.log_messages(step.model_input_messages, level=LogLevel.ERROR)
                 logger.log_markdown(title="Agent output:", content=step.plan, level=LogLevel.ERROR)
 
+    def return_full_code(self) -> str:
+        """Returns all code actions from the agent's steps, concatenated as a single script."""
+        return "\n\n".join(
+            [step.code_action for step in self.steps if isinstance(step, ActionStep) and step.code_action is not None]
+        )
+
 
 __all__ = ["AgentMemory"]
diff --git a/tests/test_agents.py b/tests/test_agents.py
@@ -22,6 +22,7 @@
 from contextlib import nullcontext as does_not_raise
 from dataclasses import dataclass
 from pathlib import Path
+from textwrap import dedent
 from typing import Optional
 from unittest.mock import MagicMock, patch
 
@@ -1580,6 +1581,11 @@ def test_syntax_error_show_offending_lines(self):
         assert isinstance(output, AgentText)
         assert output == "got an error"
         assert '    print("Failing due to unexpected indent")' in str(agent.memory.steps)
+        assert isinstance(agent.memory.steps[-2], ActionStep)
+        assert agent.memory.steps[-2].code_action == dedent("""a = 2
+b = a * 2
+    print("Failing due to unexpected indent")
+print("Ok, calculation done!")""")
 
     def test_end_code_appending(self):
         # Checking original output message
diff --git a/tests/test_memory.py b/tests/test_memory.py
@@ -22,6 +22,15 @@ def test_initialization(self):
         assert memory.system_prompt.system_prompt == system_prompt
         assert memory.steps == []
 
+    def test_return_all_code_actions(self):
+        memory = AgentMemory(system_prompt="This is a system prompt.")
+        memory.steps = [
+            ActionStep(step_number=1, timing=Timing(start_time=0.0, end_time=1.0), code_action="print('Hello')"),
+            ActionStep(step_number=2, timing=Timing(start_time=0.0, end_time=1.0), code_action=None),
+            ActionStep(step_number=3, timing=Timing(start_time=0.0, end_time=1.0), code_action="print('World')"),
+        ]  # type: ignore
+        assert memory.return_full_code() == "print('Hello')\n\nprint('World')"
+
 
 class TestMemoryStep:
     def test_initialization(self):