From e95d33ea5d258559888bf8c0ccc2038a455b3ce9 Mon Sep 17 00:00:00 2001
From: Nikhil Jaai <nikhilraina95@gmail.com>
Date: Sun, 22 Mar 2026 18:35:15 +0530
Subject: [PATCH 1/4] Add: beginner-friendly file analyzer example

- New example demonstrating core HUD SDK concepts
- Shows environment creation with custom tools
- Demonstrates scenario-based evaluation
- Includes clear documentation and usage examples
- Fills gap between basic (00) and advanced (01/02) examples
- Perfect starting point for new users learning the SDK
---
 examples/05_file_analyzer_agent.py | 242 +++++++++++++++++++++++++++++
 examples/README.md                 |  22 +++
 2 files changed, 264 insertions(+)
 create mode 100644 examples/05_file_analyzer_agent.py

diff --git a/examples/05_file_analyzer_agent.py b/examples/05_file_analyzer_agent.py
new file mode 100644
index 000000000..2e13c79fb
--- /dev/null
+++ b/examples/05_file_analyzer_agent.py
@@ -0,0 +1,242 @@
+#!/usr/bin/env python3
+"""
+File Analyzer Agent - A Beginner-Friendly Example
+
+This example demonstrates the core concepts of the HUD SDK:
+- Creating an environment with custom tools
+- Defining evaluation scenarios
+- Running agents with different models
+- Comparing model performance
+
+The agent can list files, read their contents, and analyze text statistics.
+
+Usage:
+  # Set your API key
+  export HUD_API_KEY="sk-hud-..."
+
+  # Run the example
+  uv run python examples/05_file_analyzer_agent.py
+
+  # Or with a specific model
+  uv run python examples/05_file_analyzer_agent.py --model gpt-4o
+
+Requirements:
+  - HUD_API_KEY environment variable
+  - uv sync (to install dependencies)
+"""
+
+import argparse
+import asyncio
+from pathlib import Path
+
+from openai import AsyncOpenAI
+
+import hud
+from hud.agents import OpenAIAgent
+from hud.settings import settings
+
+
+# =============================================================================
+# Environment Setup
+# =============================================================================
+
+env = hud.Environment("file-analyzer")
+
+
+@env.tool()
+def list_files(directory: str = ".") -> str:
+    """List all files in a directory.
+
+    Args:
+        directory: Path to directory (default: current directory)
+
+    Returns:
+        List of files as a string
+    """
+    try:
+        path = Path(directory)
+        files = [f.name for f in path.iterdir() if f.is_file()]
+        return f"Files in {directory}:\n" + "\n".join(f"- {f}" for f in files)
+    except Exception as e:
+        return f"Error: {e}"
+
+
+@env.tool()
+def read_file(filepath: str) -> str:
+    """Read contents of a file.
+
+    Args:
+        filepath: Path to the file
+
+    Returns:
+        File contents (limited to first 1000 characters)
+    """
+    try:
+        with open(filepath, "r", encoding="utf-8") as f:
+            content = f.read()
+        return content[:1000]  # Limit to prevent token overflow
+    except Exception as e:
+        return f"Error reading file: {e}"
+
+
+@env.tool()
+def count_words(text: str) -> str:
+    """Count words in text and provide statistics.
+
+    Args:
+        text: Text to analyze
+
+    Returns:
+        Word count and statistics
+    """
+    words = text.split()
+    lines = text.split("\n")
+    chars = len(text)
+
+    return f"""Statistics:
+- Words: {len(words)}
+- Lines: {len(lines)}
+- Characters: {chars}
+- Average word length: {chars / len(words) if words else 0:.1f}
+"""
+
+
+# =============================================================================
+# Evaluation Scenario
+# =============================================================================
+
+
+@env.scenario("analyze-readme")
+async def analyze_readme():
+    """Scenario: Analyze the README.md file in the current directory.
+
+    The agent should:
+    1. List files to confirm README.md exists
+    2. Read the README.md file
+    3. Count words in it
+    4. Report the word count
+    """
+    prompt = """Please analyze the README.md file in the current directory:
+
+1. First, list the files to confirm README.md exists
+2. Read the README.md file
+3. Count the words in it
+4. Tell me the word count
+
+Use the available tools: list_files, read_file, and count_words
+"""
+
+    response = yield prompt
+
+    # Evaluate: Did the agent mention a word count?
+    response_lower = response.lower()
+    if "word" in response_lower and any(char.isdigit() for char in response):
+        yield 1.0  # Success - mentioned words and a number
+    else:
+        yield 0.3  # Partial - didn't complete the task
+
+
+# =============================================================================
+# Main Execution
+# =============================================================================
+
+
+async def run_example(model: str = "gpt-4o-mini", verbose: bool = False):
+    """Run the file analyzer example.
+
+    Args:
+        model: Model to use (default: gpt-4o-mini)
+        verbose: Enable verbose output
+    """
+    if not settings.api_key:
+        print("❌ Error: HUD_API_KEY not found!")
+        print("\nTo run this example:")
+        print("1. Get your API key from https://hud.ai/settings/api-keys")
+        print("2. Set it: export HUD_API_KEY='sk-hud-...'")
+        print("3. Run again: uv run python examples/05_file_analyzer_agent.py")
+        return
+
+    print("=" * 70)
+    print("FILE ANALYZER AGENT")
+    print("=" * 70)
+    print(f"\n🤖 Model: {model}")
+    print("📋 Task: Analyze README.md file")
+    print("🔧 Tools: list_files, read_file, count_words")
+    print("\n" + "=" * 70)
+
+    # Create agent
+    client = AsyncOpenAI(
+        base_url=settings.hud_gateway_url,
+        api_key=settings.api_key,
+    )
+
+    agent = OpenAIAgent.create(
+        model=model,
+        model_client=client,
+        validate_api_key=False,
+        verbose=verbose,
+    )
+
+    # Run the scenario
+    print("\n🚀 Running agent...\n")
+    result = await env("analyze-readme").run(agent=agent, max_steps=10)
+
+    # Display results
+    print("\n" + "=" * 70)
+    print("RESULTS")
+    print("=" * 70)
+    print(f"✅ Task completed!")
+    print(f"📊 Reward: {result.reward}")
+    print(f"🔢 Steps taken: {len(result.trace)}")
+
+    if result.content:
+        print(f"\n📝 Agent's response:")
+        print(result.content)
+
+    print("\n" + "=" * 70)
+    print("💡 What happened:")
+    print("=" * 70)
+    print("""
+1. The agent received the task to analyze README.md
+2. It automatically figured out which tools to use
+3. It called the tools in the right order
+4. It synthesized the results into a response
+
+This demonstrates:
+- Environment creation with custom tools
+- Scenario-based evaluation
+- Automatic tool usage by the agent
+- Reward-based success measurement
+
+View the full trace at the URL shown above to see every tool call!
+    """)
+
+
+def parse_args():
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(
+        description="File Analyzer Agent - Beginner-friendly HUD SDK example",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument(
+        "--model",
+        type=str,
+        default="gpt-4o-mini",
+        help="Model to use (default: gpt-4o-mini)",
+    )
+    parser.add_argument(
+        "--verbose",
+        action="store_true",
+        help="Enable verbose output",
+    )
+    return parser.parse_args()
+
+
+async def main():
+    """Main entry point."""
+    args = parse_args()
+    await run_example(model=args.model, verbose=args.verbose)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/README.md b/examples/README.md
index 02d272156..43339fb0b 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -43,6 +43,28 @@ uv run python examples/02_opencode_agent.py --plan --task "How does auth work?"
 
 > Requires `HUD_API_KEY`. Works with any model via `--model`.
 
+## Beginner Examples
+
+### 05_file_analyzer_agent.py
+A beginner-friendly example that demonstrates core HUD SDK concepts. The agent can list files, read their contents, and analyze text statistics. Perfect for understanding:
+- How to create environments with custom tools
+- How to define evaluation scenarios
+- How agents automatically use tools
+- How to compare different models
+
+```bash
+# Run with default model (gpt-4o-mini)
+uv run python examples/05_file_analyzer_agent.py
+
+# Run with a specific model
+uv run python examples/05_file_analyzer_agent.py --model gpt-4o
+
+# Enable verbose output
+uv run python examples/05_file_analyzer_agent.py --verbose
+```
+
+> Requires `HUD_API_KEY`. Great starting point for new users!
+
 ## Key Concepts
 
 ### Using hud.eval()

From 6aed6ea11544b3e19b51c79bdaff510cf7c5769d Mon Sep 17 00:00:00 2001
From: Nikhil Jaai <nikhilraina95@gmail.com>
Date: Sun, 22 Mar 2026 18:48:55 +0530
Subject: [PATCH 2/4] Fix: address Bugbot review comments

- Fix average word length calculation to exclude whitespace
- Change 'Steps taken' to 'Tool calls' for accuracy
- Add comment explaining the calculation
---
 examples/05_file_analyzer_agent.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/examples/05_file_analyzer_agent.py b/examples/05_file_analyzer_agent.py
index 2e13c79fb..7361770cb 100644
--- a/examples/05_file_analyzer_agent.py
+++ b/examples/05_file_analyzer_agent.py
@@ -93,11 +93,14 @@ def count_words(text: str) -> str:
     lines = text.split("\n")
     chars = len(text)
 
+    # Calculate average word length (excluding spaces/newlines)
+    avg_word_length = sum(len(word) for word in words) / len(words) if words else 0
+
     return f"""Statistics:
 - Words: {len(words)}
 - Lines: {len(lines)}
 - Characters: {chars}
-- Average word length: {chars / len(words) if words else 0:.1f}
+- Average word length: {avg_word_length:.1f}
 """
 
 
@@ -187,7 +190,7 @@ async def run_example(model: str = "gpt-4o-mini", verbose: bool = False):
     print("=" * 70)
     print(f"✅ Task completed!")
     print(f"📊 Reward: {result.reward}")
-    print(f"🔢 Steps taken: {len(result.trace)}")
+    print(f"🔢 Tool calls: {result.num_messages - 1}")  # Subtract initial prompt
 
     if result.content:
         print(f"\n📝 Agent's response:")

From 3e864f7ec8f4b3ff913d3dbeb608ccbba139ad24 Mon Sep 17 00:00:00 2001
From: Nikhil Jaai <nikhilraina95@gmail.com>
Date: Sun, 22 Mar 2026 18:55:46 +0530
Subject: [PATCH 3/4] Security: add path validation and fix tool calls metric

- Add directory traversal protection to list_files and read_file tools
- Validate all file paths are within working directory before access
- Fix tool calls metric to count actual MCP calls from trace
- Prevent negative count when agent errors early

Addresses Bugbot security and accuracy concerns
---
 examples/05_file_analyzer_agent.py | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/examples/05_file_analyzer_agent.py b/examples/05_file_analyzer_agent.py
index 7361770cb..9125e65d3 100644
--- a/examples/05_file_analyzer_agent.py
+++ b/examples/05_file_analyzer_agent.py
@@ -54,8 +54,15 @@ def list_files(directory: str = ".") -> str:
         List of files as a string
     """
     try:
-        path = Path(directory)
-        files = [f.name for f in path.iterdir() if f.is_file()]
+        # Resolve to absolute path and validate it's within current directory
+        base_path = Path.cwd().resolve()
+        requested_path = (base_path / directory).resolve()
+        
+        # Security: Prevent directory traversal attacks
+        if not str(requested_path).startswith(str(base_path)):
+            return "Error: Access denied - path outside working directory"
+        
+        files = [f.name for f in requested_path.iterdir() if f.is_file()]
         return f"Files in {directory}:\n" + "\n".join(f"- {f}" for f in files)
     except Exception as e:
         return f"Error: {e}"
@@ -72,7 +79,15 @@ def read_file(filepath: str) -> str:
         File contents (limited to first 1000 characters)
     """
     try:
-        with open(filepath, "r", encoding="utf-8") as f:
+        # Resolve to absolute path and validate it's within current directory
+        base_path = Path.cwd().resolve()
+        requested_path = (base_path / filepath).resolve()
+        
+        # Security: Prevent directory traversal attacks
+        if not str(requested_path).startswith(str(base_path)):
+            return "Error: Access denied - path outside working directory"
+        
+        with open(requested_path, "r", encoding="utf-8") as f:
             content = f.read()
         return content[:1000]  # Limit to prevent token overflow
     except Exception as e:
@@ -190,7 +205,9 @@ async def run_example(model: str = "gpt-4o-mini", verbose: bool = False):
     print("=" * 70)
     print(f"✅ Task completed!")
     print(f"📊 Reward: {result.reward}")
-    print(f"🔢 Tool calls: {result.num_messages - 1}")  # Subtract initial prompt
+    # Count actual tool calls from trace (each TraceStep with category="mcp" is a tool call)
+    tool_calls = len([step for step in result.trace if step.category == "mcp"])
+    print(f"🔢 Tool calls: {tool_calls}")
 
     if result.content:
         print(f"\n📝 Agent's response:")

From c8923aa1c671aa04c2f9ef9207ef7d3f53afffaa Mon Sep 17 00:00:00 2001
From: Nikhil Jaai <nikhilraina95@gmail.com>
Date: Sun, 22 Mar 2026 19:01:06 +0530
Subject: [PATCH 4/4] Fix: add None check for agent response in scenario

- Add guard to prevent AttributeError when agent returns no content
- Handle edge case where response is None before calling .lower()

Addresses Bugbot review comment
---
 examples/05_file_analyzer_agent.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/examples/05_file_analyzer_agent.py b/examples/05_file_analyzer_agent.py
index 9125e65d3..4f8472217 100644
--- a/examples/05_file_analyzer_agent.py
+++ b/examples/05_file_analyzer_agent.py
@@ -147,11 +147,14 @@ async def analyze_readme():
     response = yield prompt
 
     # Evaluate: Did the agent mention a word count?
-    response_lower = response.lower()
-    if "word" in response_lower and any(char.isdigit() for char in response):
-        yield 1.0  # Success - mentioned words and a number
+    if not response:
+        yield 0.0  # Failed - no response from agent
     else:
-        yield 0.3  # Partial - didn't complete the task
+        response_lower = response.lower()
+        if "word" in response_lower and any(char.isdigit() for char in response):
+            yield 1.0  # Success - mentioned words and a number
+        else:
+            yield 0.3  # Partial - didn't complete the task
 
 
 # =============================================================================