refactor: remove metrics module and sampling configuration

deborahjacob-botanu · claude · deborahjacob-botanu · commit e02ed253ec46 · 2026-02-06T00:38:25.000-05:00
Cost attribution requires 100% trace capture. Removed:
- metrics.py module (redundant with trace-based aggregation)
- trace_sample_rate config option
- BOTANU_TRACE_SAMPLE_RATE environment variable

Backend aggregates costs from traces by run_id.

Co-Authored-By: Claude Opus 4.5 &lt;noreply@anthropic.com&gt;
diff --git a/docs/api/configuration.md b/docs/api/configuration.md
@@ -22,7 +22,6 @@ from botanu.sdk.config import BotanuConfig
 | `max_export_batch_size` | `int` | `512` | Max spans per batch |
 | `max_queue_size` | `int` | `2048` | Max spans in queue |
 | `schedule_delay_millis` | `int` | `5000` | Delay between batch exports |
-| `trace_sample_rate` | `float` | `1.0` | Sampling rate (1.0 = 100%) |
 | `propagation_mode` | `str` | `"lean"` | `"lean"` or `"full"` |
 | `auto_instrument_packages` | `list` | `[...]` | Packages to auto-instrument |
 
@@ -137,9 +136,6 @@ export:
   queue_size: integer       # Max spans in queue
   delay_ms: integer         # Delay between exports
 
-sampling:
-  rate: float               # Sampling rate (0.0-1.0)
-
 propagation:
   mode: string              # "lean" or "full"
 
@@ -294,7 +290,6 @@ if not is_enabled():
 |----------|-------------|---------|
 | `BOTANU_ENVIRONMENT` | Fallback for environment | `"production"` |
 | `BOTANU_PROPAGATION_MODE` | `"lean"` or `"full"` | `"lean"` |
-| `BOTANU_TRACE_SAMPLE_RATE` | Sampling rate (0.0-1.0) | `"1.0"` |
 | `BOTANU_AUTO_DETECT_RESOURCES` | Auto-detect cloud resources | `"true"` |
 | `BOTANU_CONFIG_FILE` | Path to YAML config file | None |
 
diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md
@@ -81,9 +81,6 @@ class BotanuConfig:
     max_queue_size: int = 2048
     schedule_delay_millis: int = 5000
 
-    # Sampling (1.0 = 100%)
-    trace_sample_rate: float = 1.0    # BOTANU_TRACE_SAMPLE_RATE
-
     # Propagation mode
     propagation_mode: str = "lean"    # BOTANU_PROPAGATION_MODE
 
@@ -110,7 +107,6 @@ class BotanuConfig:
 |----------|-------------|---------|
 | `BOTANU_ENVIRONMENT` | Fallback for environment | `production` |
 | `BOTANU_PROPAGATION_MODE` | `lean` or `full` | `lean` |
-| `BOTANU_TRACE_SAMPLE_RATE` | Sampling rate (0.0-1.0) | `1.0` |
 | `BOTANU_AUTO_DETECT_RESOURCES` | Auto-detect cloud resources | `true` |
 | `BOTANU_CONFIG_FILE` | Path to YAML config | None |
 
@@ -139,9 +135,6 @@ export:
   queue_size: 2048
   delay_ms: 5000
 
-sampling:
-  rate: 1.0
-
 propagation:
   mode: lean
 
@@ -259,16 +252,6 @@ enable(
 )
 ```
 
-## Sampling
-
-For cost attribution, **always use 100% sampling** (the default):
-
-```python
-trace_sample_rate: float = 1.0  # Never miss a transaction
-```
-
-If you must sample, understand that cost calculations will be incomplete.
-
 ## Exporting Configuration
 
 ```python
diff --git a/docs/patterns/anti-patterns.md b/docs/patterns/anti-patterns.md
@@ -238,26 +238,6 @@ track_llm_call(provider="azure_openai", ...)
 
 ### Sampling for Cost Attribution
 
-**Don't** sample spans:
-
-```python
-# BAD - Missing cost data
-enable(
-    service_name="my-service",
-    trace_sample_rate=0.1,  # Only 10% of costs captured!
-)
-```
-
-**Do** use 100% sampling:
-
-```python
-# GOOD - Complete cost data
-enable(
-    service_name="my-service",
-    trace_sample_rate=1.0,  # Default - don't change
-)
-```
-
 ### Hardcoding Configuration
 
 **Don't** hardcode production values:
diff --git a/docs/patterns/best-practices.md b/docs/patterns/best-practices.md
@@ -276,18 +276,6 @@ export OTEL_EXPORTER_OTLP_ENDPOINT=http://collector:4318
 export BOTANU_ENVIRONMENT=production
 ```
 
-### Never Sample for Cost Attribution
-
-Always use 100% sampling for accurate cost data:
-
-```python
-# GOOD
-trace_sample_rate: float = 1.0
-
-# BAD - Missing cost data
-trace_sample_rate: float = 0.1  # Only 10% of costs captured
-```
-
 ### Use YAML for Complex Configuration
 
 For multi-environment setups:
diff --git a/src/botanu/sdk/config.py b/src/botanu/sdk/config.py
@@ -64,9 +64,6 @@ class BotanuConfig:
     max_queue_size: int = 2048
     schedule_delay_millis: int = 5000
 
-    # Sampling (1.0 = 100% — never sample for cost attribution)
-    trace_sample_rate: float = 1.0
-
     # Propagation mode: "lean" (run_id + use_case only) or "full" (all context)
     propagation_mode: str = "lean"
 
@@ -141,10 +138,6 @@ def __post_init__(self) -> None:
         if env_propagation_mode and env_propagation_mode in ("lean", "full"):
             self.propagation_mode = env_propagation_mode
 
-        env_sample_rate = os.getenv("BOTANU_TRACE_SAMPLE_RATE")
-        if env_sample_rate:
-            self.trace_sample_rate = float(env_sample_rate)
-
     # ------------------------------------------------------------------
     # YAML loading
     # ------------------------------------------------------------------
@@ -236,7 +229,6 @@ def _from_dict(
         service = data.get("service", {})
         otlp = data.get("otlp", {})
         export = data.get("export", {})
-        sampling = data.get("sampling", {})
         propagation = data.get("propagation", {})
         resource = data.get("resource", {})
         auto_packages = data.get("auto_instrument_packages")
@@ -252,7 +244,6 @@ def _from_dict(
             max_export_batch_size=export.get("batch_size", 512),
             max_queue_size=export.get("queue_size", 2048),
             schedule_delay_millis=export.get("delay_ms", 5000),
-            trace_sample_rate=sampling.get("rate", 1.0),
             propagation_mode=propagation.get("mode", "lean"),
             auto_instrument_packages=(auto_packages if auto_packages else BotanuConfig().auto_instrument_packages),
             _config_file=config_file,
@@ -279,9 +270,6 @@ def to_dict(self) -> Dict[str, Any]:
                 "queue_size": self.max_queue_size,
                 "delay_ms": self.schedule_delay_millis,
             },
-            "sampling": {
-                "rate": self.trace_sample_rate,
-            },
             "propagation": {
                 "mode": self.propagation_mode,
             },
diff --git a/src/botanu/sdk/decorators.py b/src/botanu/sdk/decorators.py
@@ -24,7 +24,6 @@
 
 from botanu.models.run_context import RunContext, RunStatus
 from botanu.sdk.context import get_baggage, set_baggage
-from botanu.tracking.metrics import record_run_completed
 
 T = TypeVar("T")
 
@@ -120,7 +119,11 @@ async def async_wrapper(*args: Any, **kwargs: Any) -> T:
                     result = await func(*args, **kwargs)
 
                     span_attrs = getattr(span, "attributes", None)
-                    existing_outcome = span_attrs.get("botanu.outcome.status") if span_attrs else None
+                    existing_outcome = (
+                        span_attrs.get("botanu.outcome.status")
+                        if isinstance(span_attrs, dict)
+                        else None
+                    )
 
                     if existing_outcome is None and auto_outcome_on_success:
                         run_ctx.complete(RunStatus.SUCCESS)
@@ -176,7 +179,11 @@ def sync_wrapper(*args: Any, **kwargs: Any) -> T:
                     result = func(*args, **kwargs)
 
                     span_attrs = getattr(span, "attributes", None)
-                    existing_outcome = span_attrs.get("botanu.outcome.status") if span_attrs else None
+                    existing_outcome = (
+                        span_attrs.get("botanu.outcome.status")
+                        if isinstance(span_attrs, dict)
+                        else None
+                    )
 
                     if existing_outcome is None and auto_outcome_on_success:
                         run_ctx.complete(RunStatus.SUCCESS)
@@ -230,14 +237,6 @@ def _emit_run_completed(
     span.set_attribute("botanu.outcome.status", status.value)
     span.set_attribute("botanu.run.duration_ms", duration_ms)
 
-    record_run_completed(
-        use_case=run_ctx.use_case,
-        status=status.value,
-        environment=run_ctx.environment,
-        duration_ms=duration_ms,
-        workflow=run_ctx.workflow,
-    )
-
 
 # Alias
 use_case = botanu_use_case
diff --git a/src/botanu/tracking/__init__.py b/src/botanu/tracking/__init__.py
@@ -7,7 +7,6 @@
 - LLM/GenAI model calls
 - Database, storage, and messaging operations
 - Attempt ledger for durable cost tracking
-- Run completion metrics
 """
 
 from __future__ import annotations
@@ -44,7 +43,6 @@
     track_llm_call,
     track_tool_call,
 )
-from botanu.tracking.metrics import record_run_completed
 
 __all__ = [
     # LLM tracking
@@ -76,6 +74,4 @@
     "record_tool_attempted",
     "LedgerEventType",
     "AttemptStatus",
-    # Metrics
-    "record_run_completed",
 ]
diff --git a/src/botanu/tracking/ledger.py b/src/botanu/tracking/ledger.py
@@ -22,6 +22,7 @@
 import time
 from dataclasses import dataclass, field
 from enum import Enum
+from functools import lru_cache
 from typing import Any, Dict, Optional
 
 from opentelemetry import trace
@@ -384,12 +385,17 @@ def shutdown(self) -> None:
 _global_ledger: Optional[AttemptLedger] = None
 
 
+@lru_cache(maxsize=1)
+def _create_default_ledger() -> AttemptLedger:
+    """Create default ledger instance (thread-safe via lru_cache)."""
+    return AttemptLedger()
+
+
 def get_ledger() -> AttemptLedger:
-    """Get the global attempt ledger instance."""
-    global _global_ledger
-    if _global_ledger is None:
-        _global_ledger = AttemptLedger()
-    return _global_ledger
+    """Get the global attempt ledger instance (thread-safe)."""
+    if _global_ledger is not None:
+        return _global_ledger
+    return _create_default_ledger()
 
 
 def set_ledger(ledger: AttemptLedger) -> None:
diff --git a/src/botanu/tracking/metrics.py b/src/botanu/tracking/metrics.py
diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py
@@ -53,7 +53,6 @@ def test_default_values(self):
             assert config.service_name == "unknown_service"
             assert config.deployment_environment == "production"
             assert config.propagation_mode == "lean"
-            assert config.trace_sample_rate == 1.0
             assert config.auto_detect_resources is True
 
     def test_env_var_service_name(self):
@@ -85,11 +84,6 @@ def test_explicit_values_override_env(self):
             config = BotanuConfig(service_name="explicit-service")
             assert config.service_name == "explicit-service"
 
-    def test_env_var_sample_rate(self):
-        with mock.patch.dict(os.environ, {"BOTANU_TRACE_SAMPLE_RATE": "0.5"}):
-            config = BotanuConfig()
-            assert config.trace_sample_rate == 0.5
-
     def test_env_var_propagation_mode(self):
         with mock.patch.dict(os.environ, {"BOTANU_PROPAGATION_MODE": "full"}):
             config = BotanuConfig()