feat: add callback system comparable to pytorch lightning

philipph-askui · philipph-askui · commit d135838f207a · 2026-02-27T15:15:58.000+01:00
diff --git a/README.md b/README.md
@@ -123,6 +123,7 @@ Ready to build your first agent? Check out our documentation:
 9. **[Reporting](docs/08_reporting.md)** - Obtain agent logs as execution reports and summaries as test reports
 10. **[Observability](docs/09_observability_telemetry_tracing.md)** - Monitor and debug agents
 11. **[Extracting Data](docs/10_extracting_data.md)** - Extracting structured data from screenshots and files
+12. **[Callbacks](docs/11_callbacks.md)** - Inject custom logic into the control loop
 
 **Official documentation:** [docs.askui.com](https://docs.askui.com)
 
diff --git a/docs/00_overview.md b/docs/00_overview.md
@@ -87,6 +87,9 @@ Understand what data is collected and how to opt out.
 ### 10 - Extracting Data
 **Topics**: Using `get()`, file support (PDF. Excel, Word, CSV), structured data extraction, response schemas
 
+### 11 - Callbacks
+**Topics**: Inject custom logic at different positions of the control loop through callbacks
+
 Extract information from screens and files using the `get()` method with Pydantic models.
 
 ## Additional Resources
diff --git a/docs/11_callbacks.md b/docs/11_callbacks.md
@@ -0,0 +1,82 @@
+# Callbacks
+
+Callbacks provide hooks into the agent's conversation lifecycle, similar to PyTorch Lightning's callback system. Use them for logging, monitoring, custom metrics, or extending agent behavior.
+
+## Usage
+
+Subclass `ConversationCallback` and override the hooks you need:
+
+```python
+from askui import ComputerAgent, ConversationCallback
+
+class MetricsCallback(ConversationCallback):
+    def on_step_start(self, conversation, step_index):
+        print(f"Step {step_index} starting...")
+
+    def on_step_end(self, conversation, step_index, result):
+        print(f"Step {step_index} finished: {result.status}")
+
+with ComputerAgent(callbacks=[MetricsCallback()]) as agent:
+    agent.act("Open the settings menu")
+```
+
+## Available Hooks
+
+| Hook | When Called | Parameters |
+|------|-------------|------------|
+| `on_conversation_start` | After setup, before control loop | `conversation` |
+| `on_conversation_end` | After control loop, before cleanup | `conversation` |
+| `on_control_loop_start` | Before the iteration loop begins | `conversation` |
+| `on_control_loop_end` | After the iteration loop ends | `conversation` |
+| `on_step_start` | Before each step execution | `conversation`, `step_index` |
+| `on_step_end` | After each step execution | `conversation`, `step_index`, `result` |
+| `on_tool_execution_start` | Before tools are executed | `conversation`, `tool_names` |
+| `on_tool_execution_end` | After tools are executed | `conversation`, `tool_names` |
+
+### Parameters
+
+- **`conversation`**: The `Conversation` instance with access to messages, settings, and state
+- **`step_index`**: Zero-based index of the current step
+- **`result`**: `SpeakerResult` containing `status`, `messages_to_add`, and `usage`
+- **`tool_names`**: List of tool names being executed
+
+## Example: Timing Callback
+
+```python
+import time
+from askui import ComputerAgent, ConversationCallback
+
+class TimingCallback(ConversationCallback):
+    def __init__(self):
+        self.start_time = None
+        self.step_times = []
+
+    def on_conversation_start(self, conversation):
+        self.start_time = time.time()
+
+    def on_step_start(self, conversation, step_index):
+        self._step_start = time.time()
+
+    def on_step_end(self, conversation, step_index, result):
+        elapsed = time.time() - self._step_start
+        self.step_times.append(elapsed)
+        print(f"Step {step_index}: {elapsed:.2f}s")
+
+    def on_conversation_end(self, conversation):
+        total = time.time() - self.start_time
+        print(f"Total: {total:.2f}s across {len(self.step_times)} steps")
+
+with ComputerAgent(callbacks=[TimingCallback()]) as agent:
+    agent.act("Search for documents")
+```
+
+## Multiple Callbacks
+
+Pass multiple callbacks to combine behaviors:
+
+```python
+with ComputerAgent(callbacks=[TimingCallback(), MetricsCallback()]) as agent:
+    agent.act("Complete the form")
+```
+
+Callbacks are called in the order they are provided.
diff --git a/src/askui/__init__.py b/src/askui/__init__.py
@@ -30,6 +30,7 @@
     ToolUseBlockParam,
     UrlImageSourceParam,
 )
+from .models.shared.conversation_callback import ConversationCallback
 from .models.shared.settings import (
     DEFAULT_GET_RESOLUTION,
     DEFAULT_LOCATE_RESOLUTION,
@@ -76,6 +77,7 @@
     "CitationPageLocationParam",
     "ConfigurableRetry",
     "ContentBlockParam",
+    "ConversationCallback",
     "DEFAULT_GET_RESOLUTION",
     "DEFAULT_LOCATE_RESOLUTION",
     "GetSettings",
diff --git a/src/askui/agent.py b/src/askui/agent.py
@@ -9,6 +9,7 @@
 from askui.container import telemetry
 from askui.locators.locators import Locator
 from askui.models.models import Point
+from askui.models.shared.conversation_callback import ConversationCallback
 from askui.models.shared.settings import ActSettings, LocateSettings, MessageSettings
 from askui.models.shared.tools import Tool
 from askui.prompts.act_prompts import (
@@ -67,7 +68,7 @@ class ComputerAgent(Agent):
         ```
     """
 
-    @telemetry.record_call(exclude={"reporters", "tools", "act_tools"})
+    @telemetry.record_call(exclude={"reporters", "tools", "act_tools", "callbacks"})
     @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
     def __init__(
         self,
@@ -77,6 +78,7 @@ def __init__(
         settings: AgentSettings | None = None,
         retry: Retry | None = None,
         act_tools: list[Tool] | None = None,
+        callbacks: list[ConversationCallback] | None = None,
     ) -> None:
         reporter = CompositeReporter(reporters=reporters)
         self.tools = tools or AgentToolbox(
@@ -109,6 +111,7 @@ def __init__(
             + (act_tools or []),
             agent_os=self.tools.os,
             settings=settings,
+            callbacks=callbacks,
         )
         self.act_agent_os_facade: ComputerAgentOsFacade = ComputerAgentOsFacade(
             self.tools.os
diff --git a/src/askui/agent_base.py b/src/askui/agent_base.py
@@ -14,6 +14,7 @@
 from askui.locators.locators import Locator
 from askui.models.shared.agent_message_param import MessageParam
 from askui.models.shared.conversation import Conversation, Speakers
+from askui.models.shared.conversation_callback import ConversationCallback
 from askui.models.shared.settings import (
     ActSettings,
     CacheWritingSettings,
@@ -58,6 +59,7 @@ def __init__(
         tools: list[Tool] | None = None,
         agent_os: AgentOs | AndroidAgentOs | None = None,
         settings: AgentSettings | None = None,
+        callbacks: list[ConversationCallback] | None = None,
     ) -> None:
         load_dotenv()
         self._reporter: Reporter = reporter or CompositeReporter(reporters=None)
@@ -79,6 +81,7 @@ def __init__(
             image_qa_provider=self._image_qa_provider,
             detection_provider=self._detection_provider,
             reporter=self._reporter,
+            callbacks=callbacks,
         )
 
         # Provider-based tools
diff --git a/src/askui/android_agent.py b/src/askui/android_agent.py
@@ -9,6 +9,7 @@
 from askui.container import telemetry
 from askui.locators.locators import Locator
 from askui.models.models import Point
+from askui.models.shared.conversation_callback import ConversationCallback
 from askui.models.shared.settings import ActSettings, MessageSettings
 from askui.models.shared.tools import Tool
 from askui.prompts.act_prompts import create_android_agent_prompt
@@ -63,7 +64,7 @@ class AndroidAgent(Agent):
         ```
     """
 
-    @telemetry.record_call(exclude={"reporters", "tools", "act_tools"})
+    @telemetry.record_call(exclude={"reporters", "tools", "act_tools", "callbacks"})
     @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
     def __init__(
         self,
@@ -72,6 +73,7 @@ def __init__(
         settings: AgentSettings | None = None,
         retry: Retry | None = None,
         act_tools: list[Tool] | None = None,
+        callbacks: list[ConversationCallback] | None = None,
     ) -> None:
         reporter = CompositeReporter(reporters=reporters)
         self.os = PpadbAgentOs(device_identifier=device, reporter=reporter)
@@ -98,6 +100,7 @@ def __init__(
             + (act_tools or []),
             agent_os=self.os,
             settings=settings,
+            callbacks=callbacks,
         )
         self.act_tool_collection.add_agent_os(self.act_agent_os_facade)
         # Override default act settings with Android-specific settings
diff --git a/src/askui/models/shared/conversation.py b/src/askui/models/shared/conversation.py
@@ -25,6 +25,7 @@
 from askui.speaker.speaker import SpeakerResult, Speakers
 
 if TYPE_CHECKING:
+    from askui.models.shared.conversation_callback import ConversationCallback
     from askui.utils.caching.cache_manager import CacheManager
 
 logger = logging.getLogger(__name__)
@@ -58,6 +59,7 @@ class Conversation:
         reporter: Reporter for logging messages and actions
         cache_manager: Cache manager for recording/playback (optional)
         truncation_strategy_factory: Factory for creating truncation strategies
+        callbacks: List of callbacks for conversation lifecycle hooks (optional)
     """
 
     def __init__(
@@ -69,6 +71,7 @@ def __init__(
         reporter: Reporter = NULL_REPORTER,
         cache_manager: "CacheManager | None" = None,
         truncation_strategy_factory: TruncationStrategyFactory | None = None,
+        callbacks: "list[ConversationCallback] | None" = None,
     ) -> None:
         """Initialize conversation with speakers and model providers."""
         if not speakers:
@@ -92,18 +95,33 @@ def __init__(
             truncation_strategy_factory or SimpleTruncationStrategyFactory()
         )
         self._truncation_strategy: TruncationStrategy | None = None
+        self._callbacks: "list[ConversationCallback]" = callbacks or []
 
         # State for current execution (set in start())
         self.settings: ActSettings = ActSettings()
         self.tools: ToolCollection = ToolCollection()
         self._reporters: list[Reporter] = []
+        self._step_index: int = 0
 
         # Cache execution context (for communication between tools and CacheExecutor)
         self.cache_execution_context: dict[str, Any] = {}
 
         # Track if cache execution was used (to prevent recording during playback)
         self._executed_from_cache: bool = False
 
+    def _call_callbacks(self, method_name: str, *args: Any, **kwargs: Any) -> None:
+        """Call a method on all registered callbacks.
+
+        Args:
+            method_name: Name of the callback method to call
+            *args: Positional arguments to pass to the callback
+            **kwargs: Keyword arguments to pass to the callback
+        """
+        for callback in self._callbacks:
+            method = getattr(callback, method_name, None)
+            if method and callable(method):
+                method(self, *args, **kwargs)
+
     @tracer.start_as_current_span("conversation")
     def execute_conversation(
         self,
@@ -119,7 +137,6 @@ def execute_conversation(
 
         Args:
             messages: Initial message history
-            on_message: Optional callback for each message
             tools: Available tools
             settings: Agent settings
             reporters: Optional list of additional reporters for this conversation
@@ -128,7 +145,11 @@ def execute_conversation(
         logger.info(msg)
 
         self._setup_control_loop(messages, tools, settings, reporters)
+
+        self._call_callbacks("on_conversation_start")
         self._execute_control_loop()
+        self._call_callbacks("on_conversation_end")
+
         self._conclude_control_loop()
 
     @tracer.start_as_current_span("setup_control_loop")
@@ -162,9 +183,12 @@ def _setup_control_loop(
 
     @tracer.start_as_current_span("control_loop")
     def _execute_control_loop(self) -> None:
+        self._call_callbacks("on_control_loop_start")
+        self._step_index = 0
         continue_execution = True
         while continue_execution:
             continue_execution = self._execute_step()
+        self._call_callbacks("on_control_loop_end")
 
     @tracer.start_as_current_span("finish_control_loop")
     def _conclude_control_loop(self) -> None:
@@ -189,6 +213,7 @@ def _execute_step(self) -> bool:
         Returns:
             True if loop should continue, False if done
         """
+        self._call_callbacks("on_step_start", self._step_index)
 
         # 1. Infer next speaker
         speaker = self.current_speaker
@@ -226,6 +251,9 @@ def _execute_step(self) -> bool:
         if result.usage:
             self._accumulate_usage(result.usage)
 
+        self._call_callbacks("on_step_end", self._step_index, result)
+        self._step_index += 1
+
         return continue_loop
 
     @tracer.start_as_current_span("execute_tool_call")
@@ -255,8 +283,11 @@ def _execute_tools_if_present(self, message: MessageParam) -> MessageParam | Non
             return None
 
         # Execute tools
+        tool_names = [block.name for block in tool_use_blocks]
         logger.debug("Executing %d tool(s)", len(tool_use_blocks))
+        self._call_callbacks("on_tool_execution_start", tool_names)
         tool_results = self.tools.run(tool_use_blocks)
+        self._call_callbacks("on_tool_execution_end", tool_names)
 
         if not tool_results:
             return None
diff --git a/src/askui/models/shared/conversation_callback.py b/src/askui/models/shared/conversation_callback.py