Fix learning synthesis issues: session_reward context and Gemini structured outputs

jbarnes850 · web-flow · commit 994cc4ff02ab · 2025-11-11T18:09:33.000-07:00
* Fix learning synthesis not triggered when session_reward set via orchestrator - Use ExecutionContext.get() after orchestrator.arun() completes to ensure we access the same context instance that the orchestrator used - Ensure ExecutionContext.metadata property always returns a dict with defensive initialization checks - Maintains backward compatibility with all adapters (LangGraph, BYOA, etc.) - Fixes issue #138 * Implement Gemini structured outputs for learning synthesis - Add JSON Schema builder for playbook_entry.v1 structure (atlas/learning/schema.py) - Update LLMClient to detect Gemini models and use structured outputs via extra_body - Update LearningSynthesizer to pass JSON schema for Gemini models - Improve error handling with clearer error messages including model info - Update mock response handler to support playbook_entry.v1 structure - Add comprehensive unit tests for schema generation and structured outputs - Maintain backward compatibility for non-Gemini models (OpenAI, etc.) This ensures schema validation at API level for Gemini models, prevents malformed responses, and reduces silent failures in learning synthesis. Non-Gemini models continue to use OpenAI-style response_format for backward compatibility. Fixes issue #139 * Address Copilot feedback: add defensive type checks - Add isinstance check for response_format before calling .get() - Simplify test assertion for better readability - Improves defensive programming and code clarity
diff --git a/atlas/core/__init__.py b/atlas/core/__init__.py
@@ -216,17 +216,20 @@ async def arun(
             capability_probe=capability_probe_client,
         )
         result = await orchestrator.arun(task)
+        # Get current context after orchestrator completes to ensure we access the same
+        # context instance that the orchestrator used (they share the same ExecutionContextState)
+        current_context = ExecutionContext.get()
         if (
             database
             and learning_synthesizer
             and learning_synthesizer.enabled
             and learning_cfg.update_enabled
-            and execution_context.metadata.get("session_reward") is not None
+            and current_context.metadata.get("session_reward") is not None
         ):
-            reward_payload = execution_context.metadata.get("session_reward")
-            trajectory_payload = execution_context.metadata.get("session_trajectory")
-            history_payload = execution_context.metadata.get("learning_history")
-            current_learning_state = execution_context.metadata.get("learning_state") or {}
+            reward_payload = current_context.metadata.get("session_reward")
+            trajectory_payload = current_context.metadata.get("session_trajectory")
+            history_payload = current_context.metadata.get("learning_history")
+            current_learning_state = current_context.metadata.get("learning_state") or {}
             synthesis = await learning_synthesizer.asynthesize(
                 learning_key=learning_key,
                 task=task,
diff --git a/atlas/learning/schema.py b/atlas/learning/schema.py
@@ -0,0 +1,142 @@
+"""JSON Schema builder for playbook entry learning synthesis."""
+
+from __future__ import annotations
+
+from typing import Any, Dict
+
+
+def build_playbook_entry_schema() -> Dict[str, Any]:
+    """Build JSON Schema for playbook_entry.v1 structure.
+    
+    This schema matches the structure defined in atlas/learning/prompts.py
+    and is used for Gemini structured outputs to enforce type safety and
+    schema validation at the API level.
+    
+    Returns:
+        JSON Schema dictionary compatible with Gemini's response_json_schema format
+    """
+    return {
+        "type": "object",
+        "properties": {
+            "version": {
+                "type": "string",
+                "const": "playbook_entry.v1",
+                "description": "Schema version identifier"
+            },
+            "student_pamphlet": {
+                "type": ["string", "null"],
+                "description": "Updated student learning pamphlet text or null if unchanged"
+            },
+            "teacher_pamphlet": {
+                "type": ["string", "null"],
+                "description": "Updated teacher learning pamphlet text or null if unchanged"
+            },
+            "playbook_entries": {
+                "type": "array",
+                "description": "List of playbook entries to add or update",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "id": {
+                            "type": ["string", "null"],
+                            "description": "Unique identifier for the entry, or null for new entries"
+                        },
+                        "audience": {
+                            "type": "string",
+                            "enum": ["student", "teacher"],
+                            "description": "Target audience for this playbook entry"
+                        },
+                        "cue": {
+                            "type": "object",
+                            "description": "Machine-detectable trigger pattern",
+                            "properties": {
+                                "type": {
+                                    "type": "string",
+                                    "enum": ["regex", "keyword", "predicate"],
+                                    "description": "Type of cue pattern"
+                                },
+                                "pattern": {
+                                    "type": "string",
+                                    "description": "Machine-detectable trigger pattern (max 150 chars)"
+                                },
+                                "description": {
+                                    "type": ["string", "null"],
+                                    "description": "Optional human-readable explanation"
+                                }
+                            },
+                            "required": ["type", "pattern"]
+                        },
+                        "action": {
+                            "type": "object",
+                            "description": "Action to take when cue is detected",
+                            "properties": {
+                                "imperative": {
+                                    "type": "string",
+                                    "description": "Imperative verb phrasing describing the action (max 120 chars)"
+                                },
+                                "runtime_handle": {
+                                    "type": ["string", "null"],
+                                    "description": "Runtime handle/tool name from available_runtime_handles, or null if no tools"
+                                },
+                                "tool_name": {
+                                    "type": ["string", "null"],
+                                    "description": "Optional tool name"
+                                },
+                                "arguments": {
+                                    "type": ["object", "null"],
+                                    "description": "Optional tool arguments"
+                                }
+                            },
+                            "required": ["imperative"]
+                        },
+                        "expected_effect": {
+                            "type": "string",
+                            "description": "Explanation of why this action improves outcomes (max 200 chars)"
+                        },
+                        "scope": {
+                            "type": "object",
+                            "description": "Scope and constraints for when this entry applies",
+                            "properties": {
+                                "category": {
+                                    "type": "string",
+                                    "enum": ["reinforcement", "differentiation"],
+                                    "description": "Whether this reinforces existing behavior or introduces new strategy"
+                                },
+                                "constraints": {
+                                    "type": "string",
+                                    "description": "Boundaries and applicability constraints (max 250 chars)"
+                                },
+                                "applies_when": {
+                                    "type": ["string", "null"],
+                                    "description": "Optional condition for when this entry applies"
+                                }
+                            },
+                            "required": ["category", "constraints"]
+                        },
+                        "metadata": {
+                            "type": ["object", "null"],
+                            "description": "Optional free-form metadata"
+                        }
+                    },
+                    "required": ["audience", "cue", "action", "expected_effect", "scope"]
+                }
+            },
+            "session_student_learning": {
+                "type": ["string", "null"],
+                "description": "Brief takeaway from this session for student (optional)"
+            },
+            "session_teacher_learning": {
+                "type": ["string", "null"],
+                "description": "Teacher intervention note from this session (optional)"
+            },
+            "metadata": {
+                "type": ["object", "null"],
+                "description": "Optional metadata including synthesis reasoning and validation notes"
+            }
+        },
+        "required": ["version"]
+    }
+
+
+__all__ = ["build_playbook_entry_schema"]
+
diff --git a/atlas/learning/synthesizer.py b/atlas/learning/synthesizer.py
@@ -24,6 +24,7 @@
     stabilise_playbook_entry_id,
 )
 from atlas.learning.prompts import LEARNING_SYNTHESIS_PROMPT
+from atlas.learning.schema import build_playbook_entry_schema
 from atlas.runtime.orchestration.execution_context import ExecutionContext
 from atlas.utils.llm_client import LLMClient
 
@@ -105,25 +106,43 @@ async def asynthesize(
         if client is None:
             logger.debug("Learning synthesizer client unavailable; skipping update for %s", learning_key)
             return None
+        
+        # Build JSON schema for structured outputs (Gemini models)
+        json_schema = build_playbook_entry_schema()
+        overrides: Dict[str, Any] = {}
+        if self._is_gemini_model(client.model):
+            # Pass JSON schema via extra_body for Gemini structured outputs
+            overrides["extra_body"] = {
+                "response_json_schema": json_schema
+            }
+        
         try:
             response = await client.acomplete(
                 messages,
                 response_format={"type": "json_object"},
+                overrides=overrides,
             )
             audit_entry = {
                 "model": client.model,
                 "messages": messages,
                 "response": response.content,
                 "reasoning": response.reasoning or {},
                 "raw_response": response.raw,
+                "structured_output": self._is_gemini_model(client.model),
             }
         except Exception as exc:
             logger.warning("Learning synthesis call failed for %s: %s", learning_key, exc)
             return None
 
         parsed = self._try_parse_json(response.content)
         if parsed is None:
-            logger.warning("Learning synthesis returned non-JSON payload for %s", learning_key)
+            logger.error(
+                "Learning synthesis returned non-JSON payload for %s (model: %s). "
+                "Response preview: %s",
+                learning_key,
+                client.model,
+                response.content[:200] if response.content else "empty",
+            )
             return None
 
         result = self._build_result(parsed, learning_state or {})
@@ -679,6 +698,17 @@ def _teacher_guidance_digest(self, context: ExecutionContext) -> str | None:
         serialized = "\n".join(sorted(set(notes)))
         return hashlib.sha256(serialized.encode("utf-8")).hexdigest()[:16]
 
+    def _is_gemini_model(self, model: str) -> bool:
+        """Check if model is a Gemini model.
+        
+        Args:
+            model: Model identifier string
+            
+        Returns:
+            True if model is a Gemini model, False otherwise
+        """
+        return model.startswith("gemini/") or model.startswith("google/")
+
     @staticmethod
     def _clean_str(value: Any) -> str | None:
         if value is None:
diff --git a/atlas/runtime/orchestration/execution_context.py b/atlas/runtime/orchestration/execution_context.py
@@ -95,7 +95,15 @@ def __init__(self, state: ExecutionContextState) -> None:
 
     @property
     def metadata(self) -> dict[str, typing.Any]:
-        return self._state.metadata.get()
+        # Ensure metadata dict is initialized by accessing the property first
+        # This triggers ExecutionContextState.metadata property which initializes if None
+        _ = self._state.metadata
+        result = self._state.metadata.get()
+        # Defensive check: ensure we always return a dict
+        if result is None:
+            self._state.metadata.set({})
+            return {}
+        return result
 
     @property
     def active_function(self) -> InvocationNode:
diff --git a/atlas/utils/llm_client.py b/atlas/utils/llm_client.py
@@ -62,6 +62,17 @@ def complete(
         content, reasoning = self._extract_content(result)
         return LLMResponse(content=content, reasoning=reasoning, raw=result)
 
+    def _is_gemini_model(self, model: str) -> bool:
+        """Check if model is a Gemini model.
+        
+        Args:
+            model: Model identifier string
+            
+        Returns:
+            True if model is a Gemini model, False otherwise
+        """
+        return model.startswith("gemini/") or model.startswith("google/")
+
     def _prepare_kwargs(
         self,
         messages: Sequence[dict[str, Any]],
@@ -87,8 +98,6 @@ def _prepare_kwargs(
         if params.max_output_tokens is not None:
             kwargs["max_tokens"] = params.max_output_tokens
         kwargs["timeout"] = params.timeout_seconds
-        if response_format:
-            kwargs["response_format"] = response_format
 
         extra_headers = dict(params.additional_headers)
         override_headers = overrides.pop("extra_headers", None)
@@ -105,6 +114,17 @@ def _prepare_kwargs(
         if supports_reasoning and params.reasoning_effort:
             extra_body.setdefault("reasoning_effort", params.reasoning_effort)
 
+        # Handle Gemini structured outputs
+        if response_format and isinstance(response_format, dict) and response_format.get("type") == "json_object":
+            if self._is_gemini_model(params.model):
+                # Use Gemini structured outputs via extra_body
+                # response_json_schema should be provided via overrides["extra_body"]
+                extra_body.setdefault("response_mime_type", "application/json")
+                # Don't set response_format for Gemini - it's not supported
+            else:
+                # Use OpenAI-style response_format for non-Gemini models
+                kwargs["response_format"] = response_format
+
         if extra_headers:
             kwargs["extra_headers"] = extra_headers
         if extra_body:
@@ -179,7 +199,19 @@ def _mock_response(
             if isinstance(last_message, dict):
                 user_content = str(last_message.get("content", ""))
         if response_format and response_format.get("type") == "json_object":
-            if "plan" in user_content:
+            # Check if this looks like a learning synthesis request
+            if "playbook_entry" in user_content or "learning" in user_content.lower():
+                # Return mock playbook_entry.v1 structure
+                payload = {
+                    "version": "playbook_entry.v1",
+                    "student_pamphlet": None,
+                    "teacher_pamphlet": None,
+                    "playbook_entries": [],
+                    "session_student_learning": None,
+                    "session_teacher_learning": None,
+                    "metadata": None
+                }
+            elif "plan" in user_content:
                 payload = {
                     "steps": [
                         {
diff --git a/tests/unit/test_learning_synthesizer.py b/tests/unit/test_learning_synthesizer.py