Skip to content

Latest commit

 

History

History
651 lines (524 loc) · 16.7 KB

File metadata and controls

651 lines (524 loc) · 16.7 KB

Tool System - Architecture and Patterns

What Are Tools?

Tools are functions an LLM can call to interact with the real world:

  • Read/write files - Access the filesystem
  • Execute code - Run shell commands or scripts
  • Search the web - Get external information
  • Call APIs - Interact with services

Without tools, an LLM is just a text generator. With tools, it becomes an agent.


Tool Calling Flow

User: "What's in the file test.txt?"
  ↓
Agent → LLM: [context + available tools]
  ↓
LLM Response: {
  tool_call: "read_file",
  arguments: {"path": "test.txt"}
}
  ↓
Agent → Tool Executor: Execute read_file("test.txt")
  ↓
Tool Result: "Hello world"
  ↓
Agent → LLM: [context + tool result]
  ↓
LLM Response: "The file contains: Hello world"
  ↓
Agent → User: "The file contains: Hello world"

Tool Definition

JSON Schema Format (Ollama-compatible)

{
  "type": "function",
  "function": {
    "name": "read_file",
    "description": "Read the contents of a file. Use this when the user asks to see a file's contents.",
    "parameters": {
      "type": "object",
      "properties": {
        "path": {
          "type": "string",
          "description": "Path to the file to read (relative or absolute)"
        }
      },
      "required": ["path"]
    }
  }
}

Key fields:

  • name - Unique identifier (snake_case)
  • description - When/why to use this tool (critical for LLM decision-making)
  • parameters - JSON Schema for arguments
  • required - Which parameters are mandatory

Python Implementation

Base Tool Class

# src/tools/base.py
from dataclasses import dataclass
from typing import Any, Dict
from abc import ABC, abstractmethod

@dataclass
class ToolResult:
    """Result of a tool execution"""
    success: bool
    output: str
    error: str = None

class Tool(ABC):
    """Base class for all tools"""
    
    @property
    @abstractmethod
    def name(self) -> str:
        """Unique tool identifier"""
        pass
    
    @property
    @abstractmethod
    def description(self) -> str:
        """When/why to use this tool"""
        pass
    
    @property
    @abstractmethod
    def parameters(self) -> Dict[str, Any]:
        """JSON Schema for parameters"""
        pass
    
    @abstractmethod
    def execute(self, **kwargs) -> ToolResult:
        """Run the tool with provided arguments"""
        pass
    
    def to_ollama_format(self) -> Dict[str, Any]:
        """Convert to Ollama function calling format"""
        return {
            "type": "function",
            "function": {
                "name": self.name,
                "description": self.description,
                "parameters": self.parameters
            }
        }

Example Tools

1. File Operations

# src/tools/filesystem.py
import os
from pathlib import Path
from .base import Tool, ToolResult

class ReadFileTool(Tool):
    """Read file contents"""
    
    @property
    def name(self) -> str:
        return "read_file"
    
    @property
    def description(self) -> str:
        return "Read the contents of a file. Use when user asks to see a file's contents."
    
    @property
    def parameters(self) -> dict:
        return {
            "type": "object",
            "properties": {
                "path": {
                    "type": "string",
                    "description": "Path to the file (relative or absolute)"
                }
            },
            "required": ["path"]
        }
    
    def execute(self, path: str) -> ToolResult:
        try:
            # Security: restrict to safe directory
            safe_dir = Path.home() / "dev" / "selfhosted-agent"
            file_path = (safe_dir / path).resolve()
            
            # Ensure file is within safe_dir
            if not str(file_path).startswith(str(safe_dir)):
                return ToolResult(
                    success=False,
                    output="",
                    error=f"Access denied: {path} is outside safe directory"
                )
            
            # Read file
            content = file_path.read_text()
            return ToolResult(
                success=True,
                output=content
            )
        except FileNotFoundError:
            return ToolResult(
                success=False,
                output="",
                error=f"File not found: {path}"
            )
        except PermissionError:
            return ToolResult(
                success=False,
                output="",
                error=f"Permission denied: {path}"
            )
        except Exception as e:
            return ToolResult(
                success=False,
                output="",
                error=f"Error reading file: {str(e)}"
            )


class WriteFileTool(Tool):
    """Write content to a file"""
    
    @property
    def name(self) -> str:
        return "write_file"
    
    @property
    def description(self) -> str:
        return "Write content to a file. Creates the file if it doesn't exist, overwrites if it does."
    
    @property
    def parameters(self) -> dict:
        return {
            "type": "object",
            "properties": {
                "path": {
                    "type": "string",
                    "description": "Path to the file"
                },
                "content": {
                    "type": "string",
                    "description": "Content to write to the file"
                }
            },
            "required": ["path", "content"]
        }
    
    def execute(self, path: str, content: str) -> ToolResult:
        try:
            safe_dir = Path.home() / "dev" / "selfhosted-agent"
            file_path = (safe_dir / path).resolve()
            
            if not str(file_path).startswith(str(safe_dir)):
                return ToolResult(
                    success=False,
                    output="",
                    error=f"Access denied: {path} is outside safe directory"
                )
            
            # Create parent directories if needed
            file_path.parent.mkdir(parents=True, exist_ok=True)
            
            # Write file
            file_path.write_text(content)
            return ToolResult(
                success=True,
                output=f"Successfully wrote {len(content)} characters to {path}"
            )
        except Exception as e:
            return ToolResult(
                success=False,
                output="",
                error=f"Error writing file: {str(e)}"
            )

2. Shell Execution

# src/tools/shell.py
import subprocess
import shlex
from .base import Tool, ToolResult

class ShellTool(Tool):
    """Execute shell commands (with safety restrictions)"""
    
    # Whitelist of allowed commands
    ALLOWED_COMMANDS = {
        "ls", "dir", "cat", "echo", "pwd", "date",
        "python", "pip", "git", "npm", "node"
    }
    
    # Blacklist of dangerous patterns
    DANGEROUS_PATTERNS = [
        "rm -rf", "del /f", "format", "mkfs",
        "> /dev/", "dd if=", ":(){ :|:& };:"
    ]
    
    @property
    def name(self) -> str:
        return "execute_shell"
    
    @property
    def description(self) -> str:
        return "Execute a shell command. Use for running scripts, checking system info, or file operations."
    
    @property
    def parameters(self) -> dict:
        return {
            "type": "object",
            "properties": {
                "command": {
                    "type": "string",
                    "description": "Shell command to execute"
                }
            },
            "required": ["command"]
        }
    
    def execute(self, command: str) -> ToolResult:
        # Safety checks
        if not self._is_safe(command):
            return ToolResult(
                success=False,
                output="",
                error=f"Command blocked for safety: {command}"
            )
        
        try:
            # Run command with timeout
            result = subprocess.run(
                shlex.split(command),
                capture_output=True,
                text=True,
                timeout=30,  # 30 second timeout
                cwd=Path.home() / "dev" / "selfhosted-agent"
            )
            
            output = result.stdout
            if result.stderr:
                output += f"\nStderr: {result.stderr}"
            
            return ToolResult(
                success=result.returncode == 0,
                output=output,
                error=None if result.returncode == 0 else f"Command exited with code {result.returncode}"
            )
        except subprocess.TimeoutExpired:
            return ToolResult(
                success=False,
                output="",
                error="Command timed out (30s limit)"
            )
        except Exception as e:
            return ToolResult(
                success=False,
                output="",
                error=f"Error executing command: {str(e)}"
            )
    
    def _is_safe(self, command: str) -> bool:
        """Check if command is safe to execute"""
        # Check for dangerous patterns
        for pattern in self.DANGEROUS_PATTERNS:
            if pattern in command.lower():
                return False
        
        # Check if first command is allowed
        first_cmd = shlex.split(command)[0]
        if first_cmd not in self.ALLOWED_COMMANDS:
            return False
        
        return True

3. Python Code Execution

# src/tools/python.py
import sys
import io
from contextlib import redirect_stdout, redirect_stderr
from .base import Tool, ToolResult

class PythonTool(Tool):
    """Execute Python code safely"""
    
    @property
    def name(self) -> str:
        return "run_python"
    
    @property
    def description(self) -> str:
        return "Execute Python code and return the output. Use for calculations, data processing, or testing code snippets."
    
    @property
    def parameters(self) -> dict:
        return {
            "type": "object",
            "properties": {
                "code": {
                    "type": "string",
                    "description": "Python code to execute"
                }
            },
            "required": ["code"]
        }
    
    def execute(self, code: str) -> ToolResult:
        try:
            # Capture stdout and stderr
            stdout = io.StringIO()
            stderr = io.StringIO()
            
            # Execute code in isolated namespace
            namespace = {"__builtins__": __builtins__}
            
            with redirect_stdout(stdout), redirect_stderr(stderr):
                exec(code, namespace)
            
            output = stdout.getvalue()
            errors = stderr.getvalue()
            
            if errors:
                output += f"\nWarnings/Errors:\n{errors}"
            
            return ToolResult(
                success=True,
                output=output if output else "Code executed successfully (no output)"
            )
        except Exception as e:
            return ToolResult(
                success=False,
                output="",
                error=f"Error executing Python code: {str(e)}"
            )

Tool Registry

# src/tools/registry.py
from typing import Dict, List
from .base import Tool

class ToolRegistry:
    """Manage available tools"""
    
    def __init__(self):
        self._tools: Dict[str, Tool] = {}
    
    def register(self, tool: Tool):
        """Register a tool"""
        self._tools[tool.name] = tool
    
    def get(self, name: str) -> Tool:
        """Get tool by name"""
        return self._tools.get(name)
    
    def list_tools(self) -> List[Tool]:
        """Get all registered tools"""
        return list(self._tools.values())
    
    def to_ollama_format(self) -> List[Dict]:
        """Convert all tools to Ollama format"""
        return [tool.to_ollama_format() for tool in self._tools.values()]

# Global registry
registry = ToolRegistry()

Integration with Agent

# src/agent.py (simplified)
from tools.registry import registry
from tools.filesystem import ReadFileTool, WriteFileTool
from tools.shell import ShellTool

class Agent:
    def __init__(self):
        # Register tools
        registry.register(ReadFileTool())
        registry.register(WriteFileTool())
        registry.register(ShellTool())
        
        self.tools = registry.to_ollama_format()
    
    def run(self, user_message: str):
        messages = [{"role": "user", "content": user_message}]
        
        while True:
            # Send to LLM with tools
            response = self.llm.generate(messages, tools=self.tools)
            
            # Check if LLM wants to call a tool
            if response.get("tool_calls"):
                for tool_call in response["tool_calls"]:
                    # Execute tool
                    tool = registry.get(tool_call["function"]["name"])
                    args = json.loads(tool_call["function"]["arguments"])
                    result = tool.execute(**args)
                    
                    # Add result to messages
                    messages.append({
                        "role": "tool",
                        "content": result.output if result.success else result.error,
                        "tool_call_id": tool_call["id"]
                    })
            else:
                # LLM returned text response, we're done
                return response["message"]["content"]

Safety Considerations

1. Sandboxing

File operations:

  • Restrict to specific directory (e.g., ~/dev/selfhosted-agent)
  • Use Path.resolve() to prevent path traversal (../../etc/passwd)

Shell execution:

  • Whitelist allowed commands
  • Blacklist dangerous patterns
  • Set working directory explicitly

Code execution:

  • Run in isolated namespace (no access to real modules)
  • Consider using RestrictedPython for true sandboxing
  • Timeout long-running code

2. User Confirmation

class DestructiveTool(Tool):
    """Tool that requires user confirmation"""
    
    def execute(self, **kwargs) -> ToolResult:
        # Ask user before proceeding
        print(f"⚠️  About to execute: {kwargs}")
        confirm = input("Proceed? (y/n): ")
        
        if confirm.lower() != 'y':
            return ToolResult(
                success=False,
                output="",
                error="Operation cancelled by user"
            )
        
        # Proceed with actual execution
        ...

3. Logging

import logging

class Tool(ABC):
    def execute(self, **kwargs) -> ToolResult:
        logging.info(f"Executing {self.name} with args: {kwargs}")
        result = self._execute(**kwargs)
        logging.info(f"Result: success={result.success}")
        return result

Advanced Patterns

1. Async Tool Execution

import asyncio

class AsyncTool(Tool):
    async def execute_async(self, **kwargs) -> ToolResult:
        # Long-running operation
        await asyncio.sleep(5)
        return ToolResult(success=True, output="Done")

# Run multiple tools in parallel
results = await asyncio.gather(
    tool1.execute_async(**args1),
    tool2.execute_async(**args2)
)

2. Tool Chaining

# LLM can chain tools together:
# 1. read_file("data.txt")
# 2. run_python("process data")
# 3. write_file("output.txt", result)

3. Conditional Tools

# Only provide certain tools based on context
if user_level == "advanced":
    registry.register(ShellTool())
else:
    # Beginners don't get shell access
    pass

Testing Tools

# tests/test_tools.py
import pytest
from tools.filesystem import ReadFileTool, WriteFileTool

def test_read_file():
    tool = ReadFileTool()
    
    # Create test file
    with open("test.txt", "w") as f:
        f.write("Hello world")
    
    # Test reading
    result = tool.execute(path="test.txt")
    assert result.success
    assert "Hello world" in result.output

def test_write_file():
    tool = WriteFileTool()
    result = tool.execute(path="output.txt", content="Test content")
    assert result.success
    
    # Verify file was written
    with open("output.txt") as f:
        assert f.read() == "Test content"

def test_path_traversal_blocked():
    tool = ReadFileTool()
    result = tool.execute(path="../../etc/passwd")
    assert not result.success
    assert "Access denied" in result.error

Next Steps

  1. Implement base tool class and registry
  2. Start with file operations (read/write)
  3. Add shell execution (with safety checks)
  4. Test thoroughly before using in agent
  5. Expand with more tools as needed

Read context-management.md for handling tool results in conversation history.