From e95d33ea5d258559888bf8c0ccc2038a455b3ce9 Mon Sep 17 00:00:00 2001 From: Nikhil Jaai Date: Sun, 22 Mar 2026 18:35:15 +0530 Subject: [PATCH 1/4] Add: beginner-friendly file analyzer example - New example demonstrating core HUD SDK concepts - Shows environment creation with custom tools - Demonstrates scenario-based evaluation - Includes clear documentation and usage examples - Fills gap between basic (00) and advanced (01/02) examples - Perfect starting point for new users learning the SDK --- examples/05_file_analyzer_agent.py | 242 +++++++++++++++++++++++++++++ examples/README.md | 22 +++ 2 files changed, 264 insertions(+) create mode 100644 examples/05_file_analyzer_agent.py diff --git a/examples/05_file_analyzer_agent.py b/examples/05_file_analyzer_agent.py new file mode 100644 index 000000000..2e13c79fb --- /dev/null +++ b/examples/05_file_analyzer_agent.py @@ -0,0 +1,242 @@ +#!/usr/bin/env python3 +""" +File Analyzer Agent - A Beginner-Friendly Example + +This example demonstrates the core concepts of the HUD SDK: +- Creating an environment with custom tools +- Defining evaluation scenarios +- Running agents with different models +- Comparing model performance + +The agent can list files, read their contents, and analyze text statistics. + +Usage: + # Set your API key + export HUD_API_KEY="sk-hud-..." + + # Run the example + uv run python examples/05_file_analyzer_agent.py + + # Or with a specific model + uv run python examples/05_file_analyzer_agent.py --model gpt-4o + +Requirements: + - HUD_API_KEY environment variable + - uv sync (to install dependencies) +""" + +import argparse +import asyncio +from pathlib import Path + +from openai import AsyncOpenAI + +import hud +from hud.agents import OpenAIAgent +from hud.settings import settings + + +# ============================================================================= +# Environment Setup +# ============================================================================= + +env = hud.Environment("file-analyzer") + + +@env.tool() +def list_files(directory: str = ".") -> str: + """List all files in a directory. + + Args: + directory: Path to directory (default: current directory) + + Returns: + List of files as a string + """ + try: + path = Path(directory) + files = [f.name for f in path.iterdir() if f.is_file()] + return f"Files in {directory}:\n" + "\n".join(f"- {f}" for f in files) + except Exception as e: + return f"Error: {e}" + + +@env.tool() +def read_file(filepath: str) -> str: + """Read contents of a file. + + Args: + filepath: Path to the file + + Returns: + File contents (limited to first 1000 characters) + """ + try: + with open(filepath, "r", encoding="utf-8") as f: + content = f.read() + return content[:1000] # Limit to prevent token overflow + except Exception as e: + return f"Error reading file: {e}" + + +@env.tool() +def count_words(text: str) -> str: + """Count words in text and provide statistics. + + Args: + text: Text to analyze + + Returns: + Word count and statistics + """ + words = text.split() + lines = text.split("\n") + chars = len(text) + + return f"""Statistics: +- Words: {len(words)} +- Lines: {len(lines)} +- Characters: {chars} +- Average word length: {chars / len(words) if words else 0:.1f} +""" + + +# ============================================================================= +# Evaluation Scenario +# ============================================================================= + + +@env.scenario("analyze-readme") +async def analyze_readme(): + """Scenario: Analyze the README.md file in the current directory. + + The agent should: + 1. List files to confirm README.md exists + 2. Read the README.md file + 3. Count words in it + 4. Report the word count + """ + prompt = """Please analyze the README.md file in the current directory: + +1. First, list the files to confirm README.md exists +2. Read the README.md file +3. Count the words in it +4. Tell me the word count + +Use the available tools: list_files, read_file, and count_words +""" + + response = yield prompt + + # Evaluate: Did the agent mention a word count? + response_lower = response.lower() + if "word" in response_lower and any(char.isdigit() for char in response): + yield 1.0 # Success - mentioned words and a number + else: + yield 0.3 # Partial - didn't complete the task + + +# ============================================================================= +# Main Execution +# ============================================================================= + + +async def run_example(model: str = "gpt-4o-mini", verbose: bool = False): + """Run the file analyzer example. + + Args: + model: Model to use (default: gpt-4o-mini) + verbose: Enable verbose output + """ + if not settings.api_key: + print("āŒ Error: HUD_API_KEY not found!") + print("\nTo run this example:") + print("1. Get your API key from https://hud.ai/settings/api-keys") + print("2. Set it: export HUD_API_KEY='sk-hud-...'") + print("3. Run again: uv run python examples/05_file_analyzer_agent.py") + return + + print("=" * 70) + print("FILE ANALYZER AGENT") + print("=" * 70) + print(f"\nšŸ¤– Model: {model}") + print("šŸ“‹ Task: Analyze README.md file") + print("šŸ”§ Tools: list_files, read_file, count_words") + print("\n" + "=" * 70) + + # Create agent + client = AsyncOpenAI( + base_url=settings.hud_gateway_url, + api_key=settings.api_key, + ) + + agent = OpenAIAgent.create( + model=model, + model_client=client, + validate_api_key=False, + verbose=verbose, + ) + + # Run the scenario + print("\nšŸš€ Running agent...\n") + result = await env("analyze-readme").run(agent=agent, max_steps=10) + + # Display results + print("\n" + "=" * 70) + print("RESULTS") + print("=" * 70) + print(f"āœ… Task completed!") + print(f"šŸ“Š Reward: {result.reward}") + print(f"šŸ”¢ Steps taken: {len(result.trace)}") + + if result.content: + print(f"\nšŸ“ Agent's response:") + print(result.content) + + print("\n" + "=" * 70) + print("šŸ’” What happened:") + print("=" * 70) + print(""" +1. The agent received the task to analyze README.md +2. It automatically figured out which tools to use +3. It called the tools in the right order +4. It synthesized the results into a response + +This demonstrates: +- Environment creation with custom tools +- Scenario-based evaluation +- Automatic tool usage by the agent +- Reward-based success measurement + +View the full trace at the URL shown above to see every tool call! + """) + + +def parse_args(): + """Parse command line arguments.""" + parser = argparse.ArgumentParser( + description="File Analyzer Agent - Beginner-friendly HUD SDK example", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--model", + type=str, + default="gpt-4o-mini", + help="Model to use (default: gpt-4o-mini)", + ) + parser.add_argument( + "--verbose", + action="store_true", + help="Enable verbose output", + ) + return parser.parse_args() + + +async def main(): + """Main entry point.""" + args = parse_args() + await run_example(model=args.model, verbose=args.verbose) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/README.md b/examples/README.md index 02d272156..43339fb0b 100644 --- a/examples/README.md +++ b/examples/README.md @@ -43,6 +43,28 @@ uv run python examples/02_opencode_agent.py --plan --task "How does auth work?" > Requires `HUD_API_KEY`. Works with any model via `--model`. +## Beginner Examples + +### 05_file_analyzer_agent.py +A beginner-friendly example that demonstrates core HUD SDK concepts. The agent can list files, read their contents, and analyze text statistics. Perfect for understanding: +- How to create environments with custom tools +- How to define evaluation scenarios +- How agents automatically use tools +- How to compare different models + +```bash +# Run with default model (gpt-4o-mini) +uv run python examples/05_file_analyzer_agent.py + +# Run with a specific model +uv run python examples/05_file_analyzer_agent.py --model gpt-4o + +# Enable verbose output +uv run python examples/05_file_analyzer_agent.py --verbose +``` + +> Requires `HUD_API_KEY`. Great starting point for new users! + ## Key Concepts ### Using hud.eval() From 6aed6ea11544b3e19b51c79bdaff510cf7c5769d Mon Sep 17 00:00:00 2001 From: Nikhil Jaai Date: Sun, 22 Mar 2026 18:48:55 +0530 Subject: [PATCH 2/4] Fix: address Bugbot review comments - Fix average word length calculation to exclude whitespace - Change 'Steps taken' to 'Tool calls' for accuracy - Add comment explaining the calculation --- examples/05_file_analyzer_agent.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/examples/05_file_analyzer_agent.py b/examples/05_file_analyzer_agent.py index 2e13c79fb..7361770cb 100644 --- a/examples/05_file_analyzer_agent.py +++ b/examples/05_file_analyzer_agent.py @@ -93,11 +93,14 @@ def count_words(text: str) -> str: lines = text.split("\n") chars = len(text) + # Calculate average word length (excluding spaces/newlines) + avg_word_length = sum(len(word) for word in words) / len(words) if words else 0 + return f"""Statistics: - Words: {len(words)} - Lines: {len(lines)} - Characters: {chars} -- Average word length: {chars / len(words) if words else 0:.1f} +- Average word length: {avg_word_length:.1f} """ @@ -187,7 +190,7 @@ async def run_example(model: str = "gpt-4o-mini", verbose: bool = False): print("=" * 70) print(f"āœ… Task completed!") print(f"šŸ“Š Reward: {result.reward}") - print(f"šŸ”¢ Steps taken: {len(result.trace)}") + print(f"šŸ”¢ Tool calls: {result.num_messages - 1}") # Subtract initial prompt if result.content: print(f"\nšŸ“ Agent's response:") From 3e864f7ec8f4b3ff913d3dbeb608ccbba139ad24 Mon Sep 17 00:00:00 2001 From: Nikhil Jaai Date: Sun, 22 Mar 2026 18:55:46 +0530 Subject: [PATCH 3/4] Security: add path validation and fix tool calls metric - Add directory traversal protection to list_files and read_file tools - Validate all file paths are within working directory before access - Fix tool calls metric to count actual MCP calls from trace - Prevent negative count when agent errors early Addresses Bugbot security and accuracy concerns --- examples/05_file_analyzer_agent.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/examples/05_file_analyzer_agent.py b/examples/05_file_analyzer_agent.py index 7361770cb..9125e65d3 100644 --- a/examples/05_file_analyzer_agent.py +++ b/examples/05_file_analyzer_agent.py @@ -54,8 +54,15 @@ def list_files(directory: str = ".") -> str: List of files as a string """ try: - path = Path(directory) - files = [f.name for f in path.iterdir() if f.is_file()] + # Resolve to absolute path and validate it's within current directory + base_path = Path.cwd().resolve() + requested_path = (base_path / directory).resolve() + + # Security: Prevent directory traversal attacks + if not str(requested_path).startswith(str(base_path)): + return "Error: Access denied - path outside working directory" + + files = [f.name for f in requested_path.iterdir() if f.is_file()] return f"Files in {directory}:\n" + "\n".join(f"- {f}" for f in files) except Exception as e: return f"Error: {e}" @@ -72,7 +79,15 @@ def read_file(filepath: str) -> str: File contents (limited to first 1000 characters) """ try: - with open(filepath, "r", encoding="utf-8") as f: + # Resolve to absolute path and validate it's within current directory + base_path = Path.cwd().resolve() + requested_path = (base_path / filepath).resolve() + + # Security: Prevent directory traversal attacks + if not str(requested_path).startswith(str(base_path)): + return "Error: Access denied - path outside working directory" + + with open(requested_path, "r", encoding="utf-8") as f: content = f.read() return content[:1000] # Limit to prevent token overflow except Exception as e: @@ -190,7 +205,9 @@ async def run_example(model: str = "gpt-4o-mini", verbose: bool = False): print("=" * 70) print(f"āœ… Task completed!") print(f"šŸ“Š Reward: {result.reward}") - print(f"šŸ”¢ Tool calls: {result.num_messages - 1}") # Subtract initial prompt + # Count actual tool calls from trace (each TraceStep with category="mcp" is a tool call) + tool_calls = len([step for step in result.trace if step.category == "mcp"]) + print(f"šŸ”¢ Tool calls: {tool_calls}") if result.content: print(f"\nšŸ“ Agent's response:") From c8923aa1c671aa04c2f9ef9207ef7d3f53afffaa Mon Sep 17 00:00:00 2001 From: Nikhil Jaai Date: Sun, 22 Mar 2026 19:01:06 +0530 Subject: [PATCH 4/4] Fix: add None check for agent response in scenario - Add guard to prevent AttributeError when agent returns no content - Handle edge case where response is None before calling .lower() Addresses Bugbot review comment --- examples/05_file_analyzer_agent.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/examples/05_file_analyzer_agent.py b/examples/05_file_analyzer_agent.py index 9125e65d3..4f8472217 100644 --- a/examples/05_file_analyzer_agent.py +++ b/examples/05_file_analyzer_agent.py @@ -147,11 +147,14 @@ async def analyze_readme(): response = yield prompt # Evaluate: Did the agent mention a word count? - response_lower = response.lower() - if "word" in response_lower and any(char.isdigit() for char in response): - yield 1.0 # Success - mentioned words and a number + if not response: + yield 0.0 # Failed - no response from agent else: - yield 0.3 # Partial - didn't complete the task + response_lower = response.lower() + if "word" in response_lower and any(char.isdigit() for char in response): + yield 1.0 # Success - mentioned words and a number + else: + yield 0.3 # Partial - didn't complete the task # =============================================================================