feat(ci): show full orchestration for each E2E scenario

Michspirit99 · Michspirit99 · commit b6776f194d2a · 2026-02-08T11:35:14.000-08:00
- Display complete workflow execution for every sample
- Show prompts, AI responses, tool calls, and intermediate steps
- Preserve full context of what each agent actually does
- ASCII-safe labels for inputs, outputs, and processing stages
diff --git a/scripts/run_agent_scenarios.py b/scripts/run_agent_scenarios.py
@@ -105,41 +105,33 @@ def clean_text(text: str) -> str:
                         '❌': '[FAIL]',
                         '✗': '[FAIL]',
                         '⚠️': '[WARN]',
-                        '🤖': '',
-                        '📝': '',
-                        '📋': '',
-                        '🧪': '',
-                        '🔄': '',
-                        '🔧': '',
-                        '📄': '',
-                        '📊': '',
-                        '⚙️': '',
-                        '🌐': '',
-                        '🎯': '',
-                        '🔐': '',
-                        '📍': '',
-                        '🎲': '',
-                        '🔢': '',
+                        '🤖': '[AI]',
+                        '📝': '[INPUT]',
+                        '📋': '[INFO]',
+                        '🧪': '[TEST]',
+                        '🔄': '[PROCESS]',
+                        '🔧': '[TOOL]',
+                        '📄': '[FILE]',
+                        '📊': '[DATA]',
+                        '⚙️': '[CONFIG]',
+                        '🌐': '[WEB]',
+                        '🎯': '[TARGET]',
+                        '🔐': '[AUTH]',
+                        '📍': '[LOCATION]',
+                        '🎲': '[GEN]',
+                        '🔢': '[NUM]',
                     }
                     for old, new in replacements.items():
                         text = text.replace(old, new)
                     # Remove any remaining non-ASCII
                     return ''.join(c if ord(c) < 128 else '' for c in text)
                 
-                # Return first meaningful line or summary
+                # Return full output (cleaned)
                 if output:
-                    output = clean_text(output)
-                    lines = [l.strip() for l in output.split('\n') if l.strip()]
-                    # Get the last substantial line (often the result)
-                    meaningful = [l for l in lines if not l.startswith(('Running', 'Loading', 'Connecting'))]
-                    if meaningful:
-                        detail = meaningful[-1][:120]
-                    else:
-                        detail = lines[-1][:120] if lines else "Completed"
+                    full_output = clean_text(output.strip())
+                    return ScenarioResult(name, True, full_output)
                 else:
-                    detail = "Completed successfully"
-                
-                return ScenarioResult(name, True, detail)
+                    return ScenarioResult(name, True, "Completed successfully (no output)")
             else:
                 return ScenarioResult(name, False, "No main() function found")
         finally:
@@ -158,7 +150,7 @@ def clean_text(text: str) -> str:
             return ScenarioResult(name, True, "Completed")
         return ScenarioResult(name, False, f"Exit code {e.code}")
     except Exception as e:
-        error_msg = str(e)[:80]
+        error_msg = str(e)[:200]
         return ScenarioResult(name, False, error_msg)
 
 
@@ -237,7 +229,7 @@ async def run(provider: str, model: str) -> int:
     # Print summary
     print()
     print("=" * 80)
-    print("SCENARIO RESULTS")
+    print("SCENARIO EXECUTION RESULTS")
     print("=" * 80)
     print()
     
@@ -247,27 +239,21 @@ async def run(provider: str, model: str) -> int:
         status = "PASS" if r.ok else "FAIL"
         marker = "+" if r.ok else "!"
         
-        # Print scenario with details
-        print(f"{marker} {r.name}")
+        # Print scenario header
+        print(f"{marker} {r.name.upper()}")
         print(f"  Status: {status}")
+        print()
+        
         if r.details and r.details != r.name:
-            # Wrap long details
-            detail_lines = []
-            current_line = ""
-            words = r.details.split()
-            for word in words:
-                if len(current_line) + len(word) + 1 <= 74:
-                    current_line += (" " if current_line else "") + word
-                else:
-                    if current_line:
-                        detail_lines.append(current_line)
-                    current_line = word
-            if current_line:
-                detail_lines.append(current_line)
-            
-            print(f"  Result: {detail_lines[0]}")
-            for line in detail_lines[1:]:
-                print(f"          {line}")
+            # Print full execution details with indentation
+            lines = r.details.split('\n')
+            for line in lines:
+                # Indent each line for readability
+                if line.strip():
+                    print(f"  {line}")
+            print()
+        
+        print("-" * 80)
         print()
         
         if r.ok: