1- """Runtime usage instrumentation for playbook entries."""
1+ """Runtime usage instrumentation for playbook entries.
2+
3+ This module provides learning tracking for Atlas agents, enabling measurement
4+ of which playbook entries are referenced during execution and which actions
5+ are adopted based on learning recommendations.
6+
7+ Core Concepts:
8+ - Cue Detection: Identifying when user input matches learning cue patterns
9+ - Action Adoption: Tracking when tools are executed based on playbook entries
10+ - Impact Metrics: Computing effectiveness of learning entries across sessions
11+
12+ Typical Usage (BYOA Adapters):
13+ ```python
14+ from atlas.learning.playbook import resolve_playbook
15+ from atlas.learning.usage import get_tracker
16+
17+ # 1. Retrieve playbook (auto-registers entries)
18+ playbook, digest, metadata = resolve_playbook("student", apply=True)
19+
20+ # 2. Get tracker
21+ tracker = get_tracker()
22+
23+ # 3. Detect cue hits
24+ tracker.detect_and_record("student", user_input, step_id=1)
25+
26+ # 4. Record action adoptions
27+ tracker.record_action_adoption("student", "tool_name", success=True, step_id=1)
28+
29+ # 5. Record final outcome
30+ tracker.record_session_outcome(reward_score=0.85)
31+ ```
32+
33+ See docs/sdk/learning_tracking.md for complete guide.
34+ """
235
336from __future__ import annotations
437
@@ -17,7 +50,35 @@ class _TrackerConfig:
1750
1851
1952class LearningUsageTracker :
20- """Lightweight helper that records cue hits and action adoption statistics."""
53+ """Lightweight helper that records cue hits and action adoption statistics.
54+
55+ This tracker instruments learning usage during agent execution, enabling
56+ measurement of:
57+ - Which playbook entries have their cues detected in user input
58+ - Which tools/actions are adopted based on playbook recommendations
59+ - Whether adopted actions succeed or fail
60+ - Session-level metrics (reward, tokens, retries, failures)
61+
62+ The tracker is automatically created and attached to ExecutionContext.
63+ Access it via `get_tracker()` or `get_tracker(context)`.
64+
65+ Usage Pattern:
66+ 1. Register entries: Called automatically by resolve_playbook()
67+ 2. Detect cues: Call detect_and_record() on user input
68+ 3. Track adoptions: Call record_action_adoption() after tool execution
69+ 4. Record outcome: Call record_session_outcome() at session end
70+
71+ Attributes:
72+ enabled (bool): Whether tracking is active (from config)
73+
74+ Methods:
75+ register_entries(): Register playbook entries for tracking
76+ detect_and_record(): Detect and record cue hits from text
77+ record_cue_hit(): Record a specific cue hit manually
78+ record_action_adoption(): Record when a tool/action is adopted
79+ record_session_outcome(): Record final session metrics
80+ snapshot(): Export current tracking state
81+ """
2182
2283 def __init__ (self , context : ExecutionContext ) -> None :
2384 self ._context = context
@@ -53,6 +114,27 @@ def enabled(self) -> bool:
53114 return self ._config .enabled
54115
55116 def register_entries (self , role : str , entries : Iterable [Dict [str , Any ]]) -> None :
117+ """Register playbook entries for tracking.
118+
119+ This method is called automatically by resolve_playbook() so BYOA adapters
120+ typically don't need to call it manually.
121+
122+ For each entry, this method:
123+ - Creates tracking storage (cue_hits, action_adoptions, step_ids)
124+ - Registers cue detection patterns
125+ - Stores runtime_handle for action adoption matching
126+
127+ Args:
128+ role: "student" or "teacher"
129+ entries: List of playbook entry dicts, each containing:
130+ - id: Unique entry identifier
131+ - cue: Dict with type and pattern for detection
132+ - action: Dict with runtime_handle for adoption tracking
133+ - scope: Category and other metadata
134+
135+ Note:
136+ Called automatically by resolve_playbook() in atlas/learning/playbook.py
137+ """
56138 if not self .enabled :
57139 return
58140 role_store = self ._usage_store ["roles" ].setdefault (role , {})
@@ -96,6 +178,38 @@ def detect_and_record(
96178 step_id : int | None = None ,
97179 context_hint : str | None = None ,
98180 ) -> List [Dict [str , Any ]]:
181+ """Detect and record cue hits from text.
182+
183+ Scans the provided text for patterns matching registered playbook entry cues.
184+ For each match, increments cue hit counters and optionally saves examples.
185+
186+ Cue Types:
187+ - keyword: Simple substring match (case-insensitive)
188+ - regex: Regular expression pattern match
189+ - predicate: Condition-based match (treated as keyword)
190+
191+ When to Call:
192+ - On user input/task description (planning phase)
193+ - On step descriptions (execution phase)
194+ - On any text that might contain learning cues
195+
196+ Args:
197+ role: "student" or "teacher"
198+ text: Text to scan for cue patterns
199+ step_id: Optional step identifier for tracking which steps have cue hits
200+ context_hint: Optional text snippet to save as example (truncated to 200 chars)
201+
202+ Returns:
203+ List of matched cue dicts, each containing:
204+ - entry_id: Which playbook entry was matched
205+ - pattern: The pattern that matched
206+ - type: Cue type (keyword, regex, predicate)
207+
208+ Example:
209+ >>> tracker = get_tracker()
210+ >>> matches = tracker.detect_and_record("student", "create a new contact", step_id=1)
211+ >>> print(f"Detected {len(matches)} cue hits")
212+ """
99213 if not self .enabled or not text :
100214 return []
101215 detectors = self ._usage_store .get ("detectors" , {}).get (role ) or []
@@ -147,6 +261,46 @@ def record_action_adoption(
147261 step_id : int | None = None ,
148262 metadata : Optional [Dict [str , Any ]] = None ,
149263 ) -> None :
264+ """Record when a tool/action is adopted based on playbook recommendation.
265+
266+ This method tracks whether learning entries actually change agent behavior.
267+ It matches the runtime_handle parameter against playbook entries and
268+ increments adoption counters when matches are found.
269+
270+ CRITICAL: The runtime_handle must EXACTLY match the action.runtime_handle
271+ stored in playbook entries. This is how Atlas links tool execution to
272+ learning recommendations.
273+
274+ When to Call:
275+ - After executing any tool that might be in playbook entries
276+ - After taking any action recommended by learning
277+ - Typically called once per tool execution
278+
279+ Args:
280+ role: "student" or "teacher"
281+ runtime_handle: Tool name or action identifier (must match playbook entries)
282+ success: Whether the action succeeded (True) or failed (False)
283+ step_id: Optional step identifier for tracking adoption per step
284+ metadata: Optional dict with additional context (saved as example if
285+ capture_examples is enabled)
286+
287+ Example:
288+ >>> tracker = get_tracker()
289+ >>> # After executing a tool
290+ >>> tracker.record_action_adoption(
291+ ... "student",
292+ ... runtime_handle="create_contact", # Must match entry's runtime_handle
293+ ... success=True,
294+ ... step_id=1,
295+ ... metadata={"tool_name": "create_contact", "args": {...}}
296+ ... )
297+
298+ Troubleshooting:
299+ If adoptions remain 0 despite tool execution:
300+ 1. Verify runtime_handle exactly matches playbook entry's action.runtime_handle
301+ 2. Ensure playbook entries are registered (call resolve_playbook first)
302+ 3. Check that learning synthesis has generated entries
303+ """
150304 if not self .enabled :
151305 return
152306 if not runtime_handle :
@@ -187,6 +341,45 @@ def record_session_outcome(
187341 failure_flag : bool | None = None ,
188342 failure_events : Sequence [Dict [str , Any ]] | None = None ,
189343 ) -> None :
344+ """Record final session metrics and outcome.
345+
346+ Call this method once at the end of session execution to record overall
347+ metrics used for computing learning entry impact.
348+
349+ When to Call:
350+ - At the end of session execution, before returning results
351+ - After all steps have been executed
352+ - Typically called once per session
353+
354+ Args:
355+ reward_score: Session reward score (0.0-1.0)
356+ token_usage: Dict with token counts:
357+ - total_tokens: Total tokens used
358+ - prompt_tokens: Input tokens
359+ - completion_tokens: Output tokens
360+ - calls: Number of LLM calls
361+ incident_id: Optional incident/task identifier for grouping
362+ task_identifier: Optional task type/category identifier
363+ incident_tags: Optional list of tags for categorization
364+ retry_count: Number of retries during session
365+ failure_flag: Whether session failed overall
366+ failure_events: List of failure event dicts
367+
368+ Example:
369+ >>> tracker = get_tracker()
370+ >>> tracker.record_session_outcome(
371+ ... reward_score=0.85,
372+ ... token_usage={
373+ ... "total_tokens": 2000,
374+ ... "prompt_tokens": 1500,
375+ ... "completion_tokens": 500,
376+ ... "calls": 3
377+ ... },
378+ ... task_identifier="security-review",
379+ ... retry_count=1,
380+ ... failure_flag=False
381+ ... )
382+ """
190383 if not self .enabled :
191384 return
192385 session_block = self ._usage_store ["session" ]
@@ -239,13 +432,75 @@ def record_session_outcome(
239432 session_block ["failure_events" ] = cleaned
240433
241434 def snapshot (self ) -> Dict [str , Any ]:
242- """Return the current usage store (already JSON-serialisable)."""
435+ """Return the current usage store (already JSON-serializable).
436+
437+ This method exports all tracked learning usage data for the current session,
438+ including:
439+ - Per-entry statistics (cue hits, adoptions, success/failure counts)
440+ - Session-level aggregates (total cues, total adoptions, reward score)
441+ - Step-level tracking (which steps had cue hits or adoptions)
442+
443+ The returned dict is automatically persisted to the database and used for
444+ generating learning impact reports.
445+
446+ Returns:
447+ Dict with structure:
448+ {
449+ "roles": {
450+ "student": {
451+ "entry_id_1": {
452+ "cue_hits": int,
453+ "action_adoptions": int,
454+ "successful_adoptions": int,
455+ "failed_adoptions": int,
456+ "step_ids": List[int],
457+ ...
458+ },
459+ ...
460+ }
461+ },
462+ "session": {
463+ "cue_hits": int,
464+ "action_adoptions": int,
465+ "reward_score": float,
466+ "token_usage": {...},
467+ ...
468+ }
469+ }
470+ """
243471
244472 return dict (self ._usage_store )
245473
246474
247475def get_tracker (context : ExecutionContext | None = None ) -> LearningUsageTracker :
248- """Helper to fetch the tracker for the active execution context."""
476+ """Get the learning usage tracker for the current execution context.
477+
478+ This helper creates or retrieves the tracker instance attached to the
479+ execution context. Call this in BYOA adapters to access learning tracking.
480+
481+ Args:
482+ context: Optional ExecutionContext. If None, uses ExecutionContext.get()
483+
484+ Returns:
485+ LearningUsageTracker instance for the current session
486+
487+ Raises:
488+ Exception: If ExecutionContext is not available (e.g., standalone testing)
489+
490+ Example:
491+ >>> from atlas.learning.usage import get_tracker
492+ >>> from atlas.runtime.orchestration.execution_context import ExecutionContext
493+ >>>
494+ >>> context = ExecutionContext.get()
495+ >>> tracker = get_tracker(context)
496+ >>> # Or simply:
497+ >>> tracker = get_tracker()
498+
499+ Note:
500+ Each execution context has its own tracker instance. Tracking data is
501+ stored in context.metadata["learning_usage"] and persisted to database
502+ at session end.
503+ """
249504
250505 context = context or ExecutionContext .get ()
251506 return LearningUsageTracker (context )
0 commit comments