InftyAI
diff --git a/‎alphatrion/storage/runtime.py‎
Lines changed: 16 additions & 6 deletions b/‎alphatrion/storage/runtime.py‎
Lines changed: 16 additions & 6 deletions
diff --git a/‎alphatrion/tracing/clickhouse_exporter.py‎
Lines changed: 7 additions & 70 deletions b/‎alphatrion/tracing/clickhouse_exporter.py‎
Lines changed: 7 additions & 70 deletions
diff --git a/‎alphatrion/tracing/cost_enrichment_processor.py‎
Lines changed: 144 additions & 0 deletions b/‎alphatrion/tracing/cost_enrichment_processor.py‎
Lines changed: 144 additions & 0 deletions
@@ -4,14 +4,16 @@
 
 from opentelemetry import trace
 from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import BatchSpanProcessor
 from traceloop.sdk import Traceloop
 
 from alphatrion import envs
 from alphatrion.artifact.artifact import Artifact
 from alphatrion.storage.sqlstore import SQLStore
 from alphatrion.storage.tracestore import TraceStore
 from alphatrion.tracing.clickhouse_exporter import ClickHouseSpanExporter
-from alphatrion.tracing.prometheus_span_processor import PrometheusSpanProcessor
+from alphatrion.tracing.cost_enrichment_processor import CostEnrichmentProcessor
+from alphatrion.tracing.prometheus_exporter import PrometheusExporter
 from alphatrion.tracing.span_processor import ContextAttributesSpanProcessor
 
 __STORAGE_RUNTIME__ = None
@@ -62,23 +64,31 @@ def __init__(self):
                     telemetry_enabled=False,
                 )
 
-                # Add custom span processor to inject context attributes (run_id, etc.)
-                # into all spans, including child spans created by instrumented libraries
+                # Add custom span processors
                 tracer_provider = trace.get_tracer_provider()
+
+                # 1. Context attributes processor - injects context (run_id, etc.) into all spans
                 tracer_provider.add_span_processor(ContextAttributesSpanProcessor())
 
-                # Add Prometheus span processor if enabled
+                # 2. Cost enrichment processor - calculates costs from tokens and adds to span attributes
+                # This runs early so downstream processors/exporters can access cost data
+                tracer_provider.add_span_processor(CostEnrichmentProcessor())
+
+                # 3. Add Prometheus exporter if enabled
                 if os.getenv(envs.ENABLE_PROMETHEUS, "false").lower() == "true":
                     pushgateway_url = os.getenv(
                         envs.PROMETHEUS_PUSHGATEWAY_URL, "localhost:9091"
                     )
                     job_name = os.getenv(envs.PROMETHEUS_JOB_NAME, "alphatrion")
 
-                    prometheus_processor = PrometheusSpanProcessor(
+                    prometheus_exporter = PrometheusExporter(
                         pushgateway_url=pushgateway_url,
                         job_name=job_name,
                     )
-                    tracer_provider.add_span_processor(prometheus_processor)
+                    # Use BatchSpanProcessor for better performance
+                    tracer_provider.add_span_processor(
+                        BatchSpanProcessor(prometheus_exporter)
+                    )
 
         artifact_insecure = os.getenv(envs.ARTIFACT_INSECURE, "false").lower() == "true"
         if artifact_storage_enabled():
 
@@ -13,7 +13,6 @@
     SEMANTIC_KIND_REASONING,
     SEMANTIC_KIND_UNKNOWN,
 )
-from alphatrion.utils.pricing import calculate_cost
 
 logger = logging.getLogger(__name__)
 
@@ -130,58 +129,16 @@ def _convert_span(self, span: ReadableSpan) -> dict[str, Any]:
 
         # Calculate cost for LLM spans with token usage
         # Store cost per span to enable model-level cost analytics
-        if "llm.usage.total_tokens" in span_attributes:
+        if "alphatrion.cost.total_tokens" in span_attributes:
             try:
-                base_url = span_attributes.get("gen_ai.openai.api_base", "")
-                provider = determine_provider(base_url)
-
-                # Get model and tokens
-                model = span_attributes.get(
-                    "gen_ai.request.model"
-                ) or span_attributes.get("gen_ai.response.model", "")
-
-                input_tokens = int(span_attributes.get("gen_ai.usage.input_tokens", 0))
-                output_tokens = int(
-                    span_attributes.get("gen_ai.usage.output_tokens", 0)
-                )
-                cache_creation_input_tokens = int(
-                    span_attributes.get("gen_ai.usage.cache_creation_input_tokens", 0)
-                )
-                cache_read_input_tokens = int(
-                    span_attributes.get("gen_ai.usage.cache_read_input_tokens", 0)
-                )
-
-                # Calculate cost for this span
-                cost_result = calculate_cost(
-                    provider=provider,
-                    model=model,
-                    input_tokens=input_tokens,
-                    output_tokens=output_tokens,
-                    cache_creation_input_tokens=cache_creation_input_tokens,
-                    cache_read_input_tokens=cache_read_input_tokens,
-                )
-
-                # Add cost to span attributes (in USD)
-                # This enables model-level cost analytics across all spans
-                span_attributes["alphatrion.cost.total_tokens"] = str(
-                    cost_result["total_cost"]
-                )
-                span_attributes["alphatrion.cost.input_tokens"] = str(
-                    cost_result["input_cost"]
-                )
-                span_attributes["alphatrion.cost.output_tokens"] = str(
-                    cost_result["output_cost"]
-                )
-                span_attributes["alphatrion.cost.cache_creation_input_tokens"] = str(
-                    cost_result["cache_creation_input_cost"]
-                )
-                span_attributes["alphatrion.cost.cache_read_input_tokens"] = str(
-                    cost_result["cache_read_input_cost"]
-                )
+                # Cost attributes are already enriched by CostEnrichmentProcessor
+                # Just ensure they exist in span_attributes for ClickHouse storage
+                # (they should already be present from the span)
+                pass
 
             except Exception as e:
-                logger.warning(f"Failed to calculate cost for span {span.name}: {e}")
-                # Don't fail span export if cost calculation fails
+                logger.warning(f"Failed to process LLM span {span.name}: {e}")
+                # Don't fail span export if processing fails
 
         # Extract core identifiers from span attributes
         org_id = span_attributes.get("org_id", "")
@@ -318,23 +275,3 @@ def determine_semantic_kind(attributes: dict[str, str]) -> str:
 
     # Default to unknown
     return SEMANTIC_KIND_UNKNOWN
-
-
-def determine_provider(api_base: str) -> str:
-    """Determine provider from API base URL.
-
-    Args:
-        api_base: API base URL (e.g., "https://api.anthropic.com")
-
-    Returns:
-        Provider name (e.g., "anthropic", "openai", "deepinfra", or "unknown")
-    """
-    api_base = api_base.lower()
-    if "anthropic" in api_base:
-        return "anthropic"
-    elif "deepinfra" in api_base:
-        return "deepinfra"
-    elif "openai" in api_base:
-        return "openai"
-    else:
-        return "unknown"
@@ -0,0 +1,144 @@
+"""
+Cost Enrichment Span Processor.
+
+This processor enriches spans with cost information by calculating costs from token usage.
+It runs early in the processing chain so that downstream processors and exporters can
+access pre-calculated costs from span attributes.
+"""
+
+import logging
+
+from opentelemetry.context import Context
+from opentelemetry.sdk.trace import ReadableSpan
+from opentelemetry.sdk.trace.export import SpanProcessor
+
+from alphatrion.utils.pricing import calculate_cost
+
+logger = logging.getLogger(__name__)
+
+
+class CostEnrichmentProcessor(SpanProcessor):
+    """
+    Span processor that enriches spans with cost information.
+
+    This processor checks if cost attributes are already present in a span.
+    If not, it calculates costs from token usage and adds them to the span's
+    attributes dictionary. This ensures all downstream processors and exporters
+    have access to consistent cost data.
+    """
+
+    def on_start(self, span: ReadableSpan, parent_context: Context | None = None):
+        """Called when a span is started. No-op for this processor."""
+        pass
+
+    def on_end(self, span: ReadableSpan):
+        """
+        Called when a span ends. Calculate and add cost attributes if missing.
+
+        Args:
+            span: The completed span
+        """
+        try:
+            # Only process spans with attributes
+            if not span.attributes:
+                return
+
+            # Check if costs are already present
+            if "alphatrion.cost.total_tokens" in span.attributes:
+                # Costs already calculated (e.g., in claude.py)
+                return
+
+            # Check if this is an LLM span with token usage
+            if "gen_ai.usage.input_tokens" not in span.attributes:
+                # Not an LLM span, skip
+                return
+
+            # Extract token usage
+            attributes = span.attributes
+            provider = determine_provider(str(attributes.get("gen_ai.openai.api_base")))
+            model = str(
+                attributes.get("gen_ai.request.model")
+                or attributes.get("gen_ai.response.model", "")
+            )
+            input_tokens = int(attributes.get("gen_ai.usage.input_tokens", 0))
+            output_tokens = int(attributes.get("gen_ai.usage.output_tokens", 0))
+            cache_creation_input_tokens = int(
+                attributes.get("gen_ai.usage.cache_creation_input_tokens", 0)
+            )
+            cache_read_input_tokens = int(
+                attributes.get("gen_ai.usage.cache_read_input_tokens", 0)
+            )
+
+            # Calculate costs
+            cost_result = calculate_cost(
+                provider=provider,
+                model=model,
+                input_tokens=input_tokens,
+                output_tokens=output_tokens,
+                cache_creation_input_tokens=cache_creation_input_tokens,
+                cache_read_input_tokens=cache_read_input_tokens,
+            )
+
+            # Add cost attributes to span
+            # Note: We can't modify ReadableSpan.attributes directly after span ends,
+            # but we can modify the underlying _attributes dict that will be read
+            # by exporters. This is a bit of a hack but it's the only way to enrich
+            # spans post-creation without modifying OpenTelemetry internals.
+            if hasattr(span, "_attributes"):
+                span._attributes["alphatrion.cost.total_tokens"] = str(
+                    cost_result["total_cost"]
+                )
+                span._attributes["alphatrion.cost.input_tokens"] = str(
+                    cost_result["input_cost"]
+                )
+                span._attributes["alphatrion.cost.output_tokens"] = str(
+                    cost_result["output_cost"]
+                )
+                span._attributes["alphatrion.cost.cache_creation_input_tokens"] = str(
+                    cost_result["cache_creation_input_cost"]
+                )
+                span._attributes["alphatrion.cost.cache_read_input_tokens"] = str(
+                    cost_result["cache_read_input_cost"]
+                )
+                logger.debug(
+                    f"Enriched span {span.name} with cost: ${cost_result['total_cost']:.6f}"
+                )
+
+        except Exception as e:
+            logger.warning(f"Failed to enrich span with cost: {e}", exc_info=True)
+
+    def shutdown(self):
+        """Shutdown the processor."""
+        logger.info("CostEnrichmentProcessor shut down successfully")
+
+    def force_flush(self, timeout_millis: int = 30000) -> bool:
+        """
+        Force flush (no-op for this processor).
+
+        Args:
+            timeout_millis: Timeout in milliseconds
+
+        Returns:
+            True always
+        """
+        return True
+
+
+def determine_provider(api_base: str) -> str:
+    """Determine provider from API base URL.
+
+    Args:
+        api_base: API base URL (e.g., "https://api.anthropic.com")
+
+    Returns:
+        Provider name (e.g., "anthropic", "openai", "deepinfra", or "unknown")
+    """
+    api_base = api_base.lower()
+    if "anthropic" in api_base:
+        return "anthropic"
+    elif "deepinfra" in api_base:
+        return "deepinfra"
+    elif "openai" in api_base:
+        return "openai"
+    else:
+        return "unknown"