Feat: Prometheus metrics integration for agent monitoring

KirobotDev · KirobotDev · commit 084a14d86bcb · 2026-04-08T14:39:41.000+07:00
diff --git a/examples/metrics/prometheus_endpoint.py b/examples/metrics/prometheus_endpoint.py
@@ -0,0 +1,194 @@
+"""Example: Prometheus metrics endpoint for agent monitoring.
+
+This example shows how to set up a FastAPI server with a /metrics endpoint
+that exposes Prometheus metrics for your agents.
+
+To run:
+    pip install 'openai-agents[prometheus]' fastapi uvicorn
+    uv run python examples/metrics/prometheus_endpoint.py
+
+Then open http://localhost:8000/metrics in your browser or configure
+Prometheus to scrape http://localhost:8000/metrics
+"""
+
+from __future__ import annotations
+
+import asyncio
+import time
+import random
+from contextlib import asynccontextmanager
+
+from fastapi import FastAPI
+from prometheus_client import make_asgi_app
+
+from agents import Agent, Runner
+from agents.metrics import PrometheusMetrics, MetricsHooks, enable_metrics
+
+metrics = PrometheusMetrics()
+enable_metrics(metrics)
+
+metrics_app = make_asgi_app()
+
+agent = Agent(
+    name="math_assistant",
+    instructions="You are a helpful math assistant. Solve simple math problems.",
+)
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Lifespan context manager for startup/shutdown."""
+    print("Starting server with metrics enabled...")
+    print("Visit http://localhost:8000/metrics for Prometheus metrics")
+    yield
+    print("Shutting down...")
+
+
+app = FastAPI(title="Agent Metrics Example", lifespan=lifespan)
+
+app.mount("/metrics", metrics_app)
+
+
+@app.get("/")
+async def root():
+    """Root endpoint with instructions."""
+    return {
+        "message": "Agent Metrics Example",
+        "endpoints": {
+            "/": "This help message",
+            "/metrics": "Prometheus metrics endpoint",
+            "/solve/{problem}": "Solve a math problem (generates metrics)",
+            "/chat/{message}": "Chat with the agent (generates metrics)",
+        },
+    }
+
+
+@app.get("/solve/{problem}")
+async def solve(problem: str):
+    """Solve a math problem and record metrics."""
+    hooks = MetricsHooks()
+
+    start_time = time.monotonic()
+
+    try:
+        result = await Runner.run(
+            agent,
+            f"Solve this math problem: {problem}",
+            hooks=[hooks],
+        )
+
+        duration = time.monotonic() - start_time
+
+        return {
+            "problem": problem,
+            "solution": result.final_output,
+            "duration_seconds": round(duration, 3),
+        }
+    except Exception as e:
+        duration = time.monotonic() - start_time
+        return {
+            "problem": problem,
+            "error": str(e),
+            "duration_seconds": round(duration, 3),
+        }
+
+
+@app.get("/chat/{message}")
+async def chat(message: str):
+    """Chat with the agent and record metrics."""
+    hooks = MetricsHooks()
+
+    try:
+        result = await Runner.run(
+            agent,
+            message,
+            hooks=[hooks],
+        )
+
+        return {
+            "message": message,
+            "response": result.final_output,
+            "usage": {
+                "input_tokens": result.usage.input_tokens if result.usage else 0,
+                "output_tokens": result.usage.output_tokens if result.usage else 0,
+                "total_tokens": result.usage.total_tokens if result.usage else 0,
+            },
+        }
+    except Exception as e:
+        return {
+            "message": message,
+            "error": str(e),
+        }
+
+
+@app.post("/generate-load")
+async def generate_load(count: int = 10):
+    """Generate load for testing metrics (simulated)."""
+    results = []
+
+    for i in range(count):
+        operation = random.choice(["add", "multiply", "divide", "subtract"])
+        a, b = random.randint(1, 100), random.randint(1, 100)
+
+        latency = random.uniform(0.1, 2.0)
+        tokens_in = random.randint(50, 500)
+        tokens_out = random.randint(20, 200)
+
+        metrics.record_llm_call(
+            latency=latency,
+            tokens_in=tokens_in,
+            tokens_out=tokens_out,
+            model="gpt-4",
+        )
+
+        if random.random() < 0.1:
+            error_type = random.choice(["RateLimitError", "TimeoutError", "APIError"])
+            metrics.record_error(error_type, agent.name or "unknown")
+            results.append(
+                {
+                    "operation": operation,
+                    "error": error_type,
+                }
+            )
+        else:
+            results.append(
+                {
+                    "operation": operation,
+                    "a": a,
+                    "b": b,
+                    "latency": round(latency, 3),
+                }
+            )
+
+        await asyncio.sleep(0.01)
+
+    return {
+        "generated": count,
+        "results": results,
+    }
+
+
+if __name__ == "__main__":
+    import uvicorn
+
+    print("""
+  Endpoints:                                                  
+    • http://localhost:8000/           - API documentation      
+    • http://localhost:8000/metrics    - Prometheus metrics     
+    • http://localhost:8000/solve/{x}  - Solve math problem     
+    • http://localhost:8000/chat/{msg} - Chat with agent        
+    • POST /generate-load?count=10     - Generate test load     
+                                                              
+  Metrics available:                                          
+    • agents_llm_latency_seconds     - LLM call latency         
+    • agents_tokens_total            - Token usage              
+    • agents_errors_total            - Error counts             
+    • agents_runs_total              - Run counts               
+    • agents_run_duration_seconds    - Run duration             
+    • agents_turns_total             - LLM turns                
+    • agents_tool_executions_total   - Tool executions          
+    • agents_tool_latency_seconds    - Tool latency             
+                                                              
+    """)
+
+    uvicorn.run(app, host="0.0.0.0", port=8000)
diff --git a/pyproject.toml b/pyproject.toml
@@ -43,6 +43,7 @@ sqlalchemy = ["SQLAlchemy>=2.0", "asyncpg>=0.29.0"]
 encrypt = ["cryptography>=45.0, <46"]
 redis = ["redis>=7"]
 dapr = ["dapr>=1.16.0", "grpcio>=1.60.0"]
+prometheus = ["prometheus-client>=0.21.0"]
 
 [dependency-groups]
 dev = [
diff --git a/src/agents/metrics/__init__.py b/src/agents/metrics/__init__.py
@@ -0,0 +1,40 @@
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from .prometheus import PrometheusMetrics
+    from .hooks import MetricsHooks
+
+__all__ = [
+    "PrometheusMetrics",
+    "MetricsHooks",
+    "enable_metrics",
+    "get_metrics",
+    "disable_metrics",
+]
+
+
+def __getattr__(name: str):
+    if name == "PrometheusMetrics":
+        from .prometheus import PrometheusMetrics as _PrometheusMetrics
+
+        return _PrometheusMetrics
+    elif name == "MetricsHooks":
+        from .hooks import MetricsHooks as _MetricsHooks
+
+        return _MetricsHooks
+    elif name == "enable_metrics":
+        from .hooks import enable_metrics as _enable_metrics
+
+        return _enable_metrics
+    elif name == "get_metrics":
+        from .hooks import get_metrics as _get_metrics
+
+        return _get_metrics
+    elif name == "disable_metrics":
+        from .hooks import disable_metrics as _disable_metrics
+
+        return _disable_metrics
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
diff --git a/src/agents/metrics/hooks.py b/src/agents/metrics/hooks.py
@@ -0,0 +1,143 @@
+
+from __future__ import annotations
+
+import time
+from typing import Any
+
+from ..agent import Agent
+from ..lifecycle import RunHooks
+from ..logger import logger
+from ..result import RunResult
+from ..run_context import RunContextWrapper
+
+try:
+    from .prometheus import PrometheusMetrics
+except ImportError:
+    PrometheusMetrics = None
+
+_global_metrics: PrometheusMetrics | None = None
+
+
+def enable_metrics(metrics: PrometheusMetrics) -> None:
+    global _global_metrics
+    _global_metrics = metrics
+
+
+def get_metrics() -> PrometheusMetrics | None:
+    return _global_metrics
+
+
+def disable_metrics() -> None:
+    global _global_metrics
+    _global_metrics = None
+
+
+class MetricsHooks(RunHooks):
+
+    def __init__(self, metrics: PrometheusMetrics | None = None) -> None:
+        self._metrics = metrics or _global_metrics
+        self._run_start_times: dict[str, float] = {}
+        self._tool_start_times: dict[str, float] = {}
+
+    async def on_start(
+        self,
+        context: RunContextWrapper[Any],
+        agent: Agent[Any],
+    ) -> None:
+        if self._metrics is None:
+            return
+
+        agent_name = agent.name or "unknown"
+        self._run_start_times[context.context_id] = time.monotonic()
+        self._metrics.record_run_start(agent_name)
+
+    async def on_end(
+        self,
+        context: RunContextWrapper[Any],
+        agent: Agent[Any],
+        result: RunResult,
+    ) -> None:
+        if self._metrics is None:
+            return
+
+        agent_name = agent.name or "unknown"
+        start_time = self._run_start_times.pop(context.context_id, None)
+        duration = None
+        if start_time is not None:
+            duration = time.monotonic() - start_time
+
+        self._metrics.record_run_end(agent_name, duration, status="success")
+
+    async def on_error(
+        self,
+        context: RunContextWrapper[Any],
+        agent: Agent[Any],
+        error: Exception,
+    ) -> None:
+        if self._metrics is None:
+            return
+
+        agent_name = agent.name or "unknown"
+        start_time = self._run_start_times.pop(context.context_id, None)
+        duration = None
+        if start_time is not None:
+            duration = time.monotonic() - start_time
+
+        error_type = type(error).__name__
+        self._metrics.record_error(error_type, agent_name)
+        self._metrics.record_run_end(agent_name, duration, status="error")
+
+    async def on_tool_start(
+        self,
+        context: RunContextWrapper[Any],
+        agent: Agent[Any],
+        tool_name: str,
+        input_data: dict[str, Any],
+    ) -> None:
+        if self._metrics is None:
+            return
+
+        key = f"{context.context_id}:{tool_name}"
+        self._tool_start_times[key] = time.monotonic()
+
+    async def on_tool_end(
+        self,
+        context: RunContextWrapper[Any],
+        agent: Agent[Any],
+        tool_name: str,
+        result: Any,
+    ) -> None:
+        if self._metrics is None:
+            return
+
+        key = f"{context.context_id}:{tool_name}"
+        start_time = self._tool_start_times.pop(key, None)
+        if start_time is not None:
+            latency = time.monotonic() - start_time
+            agent_name = agent.name or "unknown"
+            self._metrics.record_tool_execution(tool_name, latency, agent_name)
+
+    async def on_tool_error(
+        self,
+        context: RunContextWrapper[Any],
+        agent: Agent[Any],
+        tool_name: str,
+        error: Exception,
+    ) -> None:
+        if self._metrics is None:
+            return
+
+        key = f"{context.context_id}:{tool_name}"
+        start_time = self._tool_start_times.pop(key, None)
+        if start_time is not None:
+            latency = time.monotonic() - start_time
+            agent_name = agent.name or "unknown"
+            self._metrics.record_tool_execution(tool_name, latency, agent_name)
+
+        error_type = f"tool_error:{type(error).__name__}"
+        agent_name = agent.name or "unknown"
+        self._metrics.record_error(error_type, agent_name)
+
+
+def create_metrics_hooks(metrics: PrometheusMetrics | None = None) -> MetricsHooks:
+    return MetricsHooks(metrics)
diff --git a/src/agents/metrics/prometheus.py b/src/agents/metrics/prometheus.py
diff --git a/tests/test_metrics.py b/tests/test_metrics.py