diff --git a/examples/05_file_analyzer_agent.py b/examples/05_file_analyzer_agent.py new file mode 100644 index 00000000..4f847221 --- /dev/null +++ b/examples/05_file_analyzer_agent.py @@ -0,0 +1,265 @@ +#!/usr/bin/env python3 +""" +File Analyzer Agent - A Beginner-Friendly Example + +This example demonstrates the core concepts of the HUD SDK: +- Creating an environment with custom tools +- Defining evaluation scenarios +- Running agents with different models +- Comparing model performance + +The agent can list files, read their contents, and analyze text statistics. + +Usage: + # Set your API key + export HUD_API_KEY="sk-hud-..." + + # Run the example + uv run python examples/05_file_analyzer_agent.py + + # Or with a specific model + uv run python examples/05_file_analyzer_agent.py --model gpt-4o + +Requirements: + - HUD_API_KEY environment variable + - uv sync (to install dependencies) +""" + +import argparse +import asyncio +from pathlib import Path + +from openai import AsyncOpenAI + +import hud +from hud.agents import OpenAIAgent +from hud.settings import settings + + +# ============================================================================= +# Environment Setup +# ============================================================================= + +env = hud.Environment("file-analyzer") + + +@env.tool() +def list_files(directory: str = ".") -> str: + """List all files in a directory. + + Args: + directory: Path to directory (default: current directory) + + Returns: + List of files as a string + """ + try: + # Resolve to absolute path and validate it's within current directory + base_path = Path.cwd().resolve() + requested_path = (base_path / directory).resolve() + + # Security: Prevent directory traversal attacks + if not str(requested_path).startswith(str(base_path)): + return "Error: Access denied - path outside working directory" + + files = [f.name for f in requested_path.iterdir() if f.is_file()] + return f"Files in {directory}:\n" + "\n".join(f"- {f}" for f in files) + except Exception as e: + return f"Error: {e}" + + +@env.tool() +def read_file(filepath: str) -> str: + """Read contents of a file. + + Args: + filepath: Path to the file + + Returns: + File contents (limited to first 1000 characters) + """ + try: + # Resolve to absolute path and validate it's within current directory + base_path = Path.cwd().resolve() + requested_path = (base_path / filepath).resolve() + + # Security: Prevent directory traversal attacks + if not str(requested_path).startswith(str(base_path)): + return "Error: Access denied - path outside working directory" + + with open(requested_path, "r", encoding="utf-8") as f: + content = f.read() + return content[:1000] # Limit to prevent token overflow + except Exception as e: + return f"Error reading file: {e}" + + +@env.tool() +def count_words(text: str) -> str: + """Count words in text and provide statistics. + + Args: + text: Text to analyze + + Returns: + Word count and statistics + """ + words = text.split() + lines = text.split("\n") + chars = len(text) + + # Calculate average word length (excluding spaces/newlines) + avg_word_length = sum(len(word) for word in words) / len(words) if words else 0 + + return f"""Statistics: +- Words: {len(words)} +- Lines: {len(lines)} +- Characters: {chars} +- Average word length: {avg_word_length:.1f} +""" + + +# ============================================================================= +# Evaluation Scenario +# ============================================================================= + + +@env.scenario("analyze-readme") +async def analyze_readme(): + """Scenario: Analyze the README.md file in the current directory. + + The agent should: + 1. List files to confirm README.md exists + 2. Read the README.md file + 3. Count words in it + 4. Report the word count + """ + prompt = """Please analyze the README.md file in the current directory: + +1. First, list the files to confirm README.md exists +2. Read the README.md file +3. Count the words in it +4. Tell me the word count + +Use the available tools: list_files, read_file, and count_words +""" + + response = yield prompt + + # Evaluate: Did the agent mention a word count? + if not response: + yield 0.0 # Failed - no response from agent + else: + response_lower = response.lower() + if "word" in response_lower and any(char.isdigit() for char in response): + yield 1.0 # Success - mentioned words and a number + else: + yield 0.3 # Partial - didn't complete the task + + +# ============================================================================= +# Main Execution +# ============================================================================= + + +async def run_example(model: str = "gpt-4o-mini", verbose: bool = False): + """Run the file analyzer example. + + Args: + model: Model to use (default: gpt-4o-mini) + verbose: Enable verbose output + """ + if not settings.api_key: + print("āŒ Error: HUD_API_KEY not found!") + print("\nTo run this example:") + print("1. Get your API key from https://hud.ai/settings/api-keys") + print("2. Set it: export HUD_API_KEY='sk-hud-...'") + print("3. Run again: uv run python examples/05_file_analyzer_agent.py") + return + + print("=" * 70) + print("FILE ANALYZER AGENT") + print("=" * 70) + print(f"\nšŸ¤– Model: {model}") + print("šŸ“‹ Task: Analyze README.md file") + print("šŸ”§ Tools: list_files, read_file, count_words") + print("\n" + "=" * 70) + + # Create agent + client = AsyncOpenAI( + base_url=settings.hud_gateway_url, + api_key=settings.api_key, + ) + + agent = OpenAIAgent.create( + model=model, + model_client=client, + validate_api_key=False, + verbose=verbose, + ) + + # Run the scenario + print("\nšŸš€ Running agent...\n") + result = await env("analyze-readme").run(agent=agent, max_steps=10) + + # Display results + print("\n" + "=" * 70) + print("RESULTS") + print("=" * 70) + print(f"āœ… Task completed!") + print(f"šŸ“Š Reward: {result.reward}") + # Count actual tool calls from trace (each TraceStep with category="mcp" is a tool call) + tool_calls = len([step for step in result.trace if step.category == "mcp"]) + print(f"šŸ”¢ Tool calls: {tool_calls}") + + if result.content: + print(f"\nšŸ“ Agent's response:") + print(result.content) + + print("\n" + "=" * 70) + print("šŸ’” What happened:") + print("=" * 70) + print(""" +1. The agent received the task to analyze README.md +2. It automatically figured out which tools to use +3. It called the tools in the right order +4. It synthesized the results into a response + +This demonstrates: +- Environment creation with custom tools +- Scenario-based evaluation +- Automatic tool usage by the agent +- Reward-based success measurement + +View the full trace at the URL shown above to see every tool call! + """) + + +def parse_args(): + """Parse command line arguments.""" + parser = argparse.ArgumentParser( + description="File Analyzer Agent - Beginner-friendly HUD SDK example", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--model", + type=str, + default="gpt-4o-mini", + help="Model to use (default: gpt-4o-mini)", + ) + parser.add_argument( + "--verbose", + action="store_true", + help="Enable verbose output", + ) + return parser.parse_args() + + +async def main(): + """Main entry point.""" + args = parse_args() + await run_example(model=args.model, verbose=args.verbose) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/README.md b/examples/README.md index 02d27215..43339fb0 100644 --- a/examples/README.md +++ b/examples/README.md @@ -43,6 +43,28 @@ uv run python examples/02_opencode_agent.py --plan --task "How does auth work?" > Requires `HUD_API_KEY`. Works with any model via `--model`. +## Beginner Examples + +### 05_file_analyzer_agent.py +A beginner-friendly example that demonstrates core HUD SDK concepts. The agent can list files, read their contents, and analyze text statistics. Perfect for understanding: +- How to create environments with custom tools +- How to define evaluation scenarios +- How agents automatically use tools +- How to compare different models + +```bash +# Run with default model (gpt-4o-mini) +uv run python examples/05_file_analyzer_agent.py + +# Run with a specific model +uv run python examples/05_file_analyzer_agent.py --model gpt-4o + +# Enable verbose output +uv run python examples/05_file_analyzer_agent.py --verbose +``` + +> Requires `HUD_API_KEY`. Great starting point for new users! + ## Key Concepts ### Using hud.eval()