Skip to content

Commit 666028f

Browse files
Add retry tracking, tenacity integration, and vendor rename
- Add tenacity integration with contextvars-based retry attempt tracking - Rename provider to vendor across all attributes and span helpers - Add event_id and customer_id to RunContext and baggage propagation - Add OTel LogRecord emission on emit_outcome() for collector flush - Fix CLIENT_REQUEST_ID -> VENDOR_CLIENT_REQUEST_ID attribute bug - Remove unused ledger and resource detector modules - All 296 tests passing Signed-off-by: Deborah <deborah@botanu.ai>
1 parent 5a6f048 commit 666028f

29 files changed

Lines changed: 734 additions & 2045 deletions

pyproject.toml

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,17 +123,30 @@ Changelog = "https://github.com/botanu-ai/botanu-sdk-python/blob/main/CHANGELOG.
123123
Issues = "https://github.com/botanu-ai/botanu-sdk-python/issues"
124124

125125
# ---------------------------------------------------------------------------
126-
# Optional extras (dev only — base install includes everything)
126+
# Optional extras
127127
# ---------------------------------------------------------------------------
128128
[project.optional-dependencies]
129+
# Cloud resource detectors — lightweight, auto-detect env and no-op elsewhere.
130+
# K8s: use the OTel Operator or K8s downward API to set OTEL_RESOURCE_ATTRIBUTES.
131+
# EKS/GKE detection is included in the aws/gcp extras respectively.
132+
aws = ["opentelemetry-resource-detector-aws >= 0.1b0"]
133+
gcp = ["opentelemetry-resource-detector-gcp >= 0.1b0"]
134+
azure = ["opentelemetry-resource-detector-azure >= 0.1b0"]
135+
container = ["opentelemetry-resource-detector-container >= 0.1b0"]
136+
cloud = [
137+
"opentelemetry-resource-detector-aws >= 0.1b0",
138+
"opentelemetry-resource-detector-gcp >= 0.1b0",
139+
"opentelemetry-resource-detector-azure >= 0.1b0",
140+
"opentelemetry-resource-detector-container >= 0.1b0",
141+
]
129142
dev = [
130143
"pytest >= 7.4.0",
131144
"pytest-asyncio >= 0.21.0",
132145
"pytest-cov >= 4.1.0",
133146
"coverage[toml] >= 7.0",
134147
"httpx >= 0.24.0, < 0.28.0; python_version < '3.10'",
135148
"httpx >= 0.24.0; python_version >= '3.10'",
136-
"starlette >= 0.27.0, < 0.30.0; python_version < '3.10'",
149+
"starlette >= 0.27.0, < 0.30.0; python_version >= '3.10'",
137150
"starlette >= 0.27.0; python_version >= '3.10'",
138151
"ruff >= 0.4.0",
139152
"mypy >= 1.7.0",

src/botanu/__init__.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55
66
Quick Start::
77
8-
from botanu import enable, botanu_use_case, emit_outcome
8+
from botanu import enable, botanu_workflow, emit_outcome
99
1010
enable() # reads config from OTEL_SERVICE_NAME, OTEL_EXPORTER_OTLP_ENDPOINT env vars
1111
12-
@botanu_use_case(name="Customer Support")
12+
@botanu_workflow(name="Customer Support")
1313
async def handle_request(data):
1414
result = await process(data)
1515
emit_outcome("success", value_type="tickets_resolved", value_amount=1)
@@ -38,12 +38,12 @@ async def handle_request(data):
3838
get_baggage,
3939
get_current_span,
4040
get_run_id,
41-
get_use_case,
41+
get_workflow,
4242
set_baggage,
4343
)
4444

4545
# Decorators (primary integration point)
46-
from botanu.sdk.decorators import botanu_outcome, botanu_use_case, use_case
46+
from botanu.sdk.decorators import botanu_outcome, botanu_workflow, run_botanu, workflow
4747

4848
# Span helpers
4949
from botanu.sdk.span_helpers import emit_outcome, set_business_context
@@ -56,17 +56,18 @@ async def handle_request(data):
5656
"is_enabled",
5757
# Configuration
5858
"BotanuConfig",
59-
# Decorators
60-
"botanu_use_case",
61-
"use_case",
59+
# Decorators / context managers
60+
"botanu_workflow",
61+
"run_botanu",
62+
"workflow",
6263
"botanu_outcome",
6364
# Span helpers
6465
"emit_outcome",
6566
"set_business_context",
6667
"get_current_span",
6768
# Context
6869
"get_run_id",
69-
"get_use_case",
70+
"get_workflow",
7071
"set_baggage",
7172
"get_baggage",
7273
# Run context
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# SPDX-FileCopyrightText: 2026 The Botanu Authors
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
"""Botanu integrations with third-party libraries."""
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# SPDX-FileCopyrightText: 2026 The Botanu Authors
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
"""Tenacity retry integration — automatic attempt tracking for LLM calls.
5+
6+
Stamps ``botanu.request.attempt`` on every span created inside a tenacity
7+
retry loop so the collector and cost engine can see how many attempts an
8+
event required.
9+
10+
Usage::
11+
12+
from tenacity import retry, stop_after_attempt, wait_exponential
13+
from botanu.integrations.tenacity import botanu_before, botanu_after_all
14+
from botanu.tracking.llm import track_llm_call
15+
16+
@retry(
17+
stop=stop_after_attempt(3),
18+
wait=wait_exponential(min=1, max=10),
19+
before=botanu_before,
20+
after=botanu_after_all, # optional — resets attempt counter
21+
)
22+
def call_llm():
23+
with track_llm_call("openai", "gpt-4") as tracker:
24+
response = openai.chat.completions.create(...)
25+
tracker.set_tokens(
26+
input_tokens=response.usage.prompt_tokens,
27+
output_tokens=response.usage.completion_tokens,
28+
)
29+
return response
30+
31+
The ``track_llm_call`` context manager reads the attempt number
32+
automatically — no need to call ``tracker.set_attempt()`` manually.
33+
"""
34+
35+
from __future__ import annotations
36+
37+
from typing import Any
38+
39+
from botanu.tracking.llm import _retry_attempt
40+
41+
42+
def botanu_before(retry_state: Any) -> None:
43+
"""Tenacity ``before`` callback — sets the current attempt number.
44+
45+
Use as ``@retry(before=botanu_before)`` so that every
46+
``track_llm_call`` inside the retried function automatically
47+
gets the correct attempt number on its span.
48+
"""
49+
_retry_attempt.set(retry_state.attempt_number)
50+
51+
52+
def botanu_after_all(retry_state: Any) -> None:
53+
"""Tenacity ``after`` callback — resets the attempt counter.
54+
55+
Optional but recommended. Prevents a stale attempt number from
56+
leaking into subsequent non-retried calls on the same thread.
57+
58+
Use as ``@retry(after=botanu_after_all)``.
59+
"""
60+
_retry_attempt.set(0)

src/botanu/models/run_context.py

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
66
A "Run" is orthogonal to tracing:
77
- Trace context (W3C): ties distributed spans together (trace_id, span_id)
8-
- Run context (Botanu): ties business execution together (run_id, use_case, outcome)
8+
- Run context (Botanu): ties business execution together (run_id, workflow, outcome)
99
1010
Invariant: A run can span multiple traces (retries, async fanout).
1111
The run_id must remain stable across those boundaries.
@@ -85,9 +85,10 @@ class RunContext:
8585
"""
8686

8787
run_id: str
88-
use_case: str
88+
workflow: str
89+
event_id: str
90+
customer_id: str
8991
environment: str
90-
workflow: Optional[str] = None
9192
workflow_version: Optional[str] = None
9293
tenant_id: Optional[str] = None
9394
parent_run_id: Optional[str] = None
@@ -111,8 +112,9 @@ def __post_init__(self) -> None:
111112
@classmethod
112113
def create(
113114
cls,
114-
use_case: str,
115-
workflow: Optional[str] = None,
115+
workflow: str,
116+
event_id: str,
117+
customer_id: str,
116118
workflow_version: Optional[str] = None,
117119
environment: Optional[str] = None,
118120
tenant_id: Optional[str] = None,
@@ -131,9 +133,10 @@ def create(
131133

132134
return cls(
133135
run_id=run_id,
134-
use_case=use_case,
135-
environment=env,
136136
workflow=workflow,
137+
event_id=event_id,
138+
customer_id=customer_id,
139+
environment=env,
137140
workflow_version=workflow_version,
138141
tenant_id=tenant_id,
139142
parent_run_id=parent_run_id,
@@ -147,8 +150,9 @@ def create(
147150
def create_retry(cls, previous: RunContext) -> RunContext:
148151
"""Create a new RunContext for a retry attempt."""
149152
return cls.create(
150-
use_case=previous.use_case,
151153
workflow=previous.workflow,
154+
event_id=previous.event_id,
155+
customer_id=previous.customer_id,
152156
workflow_version=previous.workflow_version,
153157
environment=previous.environment,
154158
tenant_id=previous.tenant_id,
@@ -215,14 +219,14 @@ def to_baggage_dict(self, lean_mode: Optional[bool] = None) -> Dict[str, str]:
215219

216220
baggage: Dict[str, str] = {
217221
"botanu.run_id": self.run_id,
218-
"botanu.use_case": self.use_case,
222+
"botanu.workflow": self.workflow,
223+
"botanu.event_id": self.event_id,
224+
"botanu.customer_id": self.customer_id,
219225
}
220226
if lean_mode:
221227
return baggage
222228

223229
baggage["botanu.environment"] = self.environment
224-
if self.workflow:
225-
baggage["botanu.workflow"] = self.workflow
226230
if self.tenant_id:
227231
baggage["botanu.tenant_id"] = self.tenant_id
228232
if self.parent_run_id:
@@ -243,12 +247,12 @@ def to_span_attributes(self) -> Dict[str, Union[str, float, int, bool]]:
243247
"""Convert to dict for span attributes."""
244248
attrs: Dict[str, Union[str, float, int, bool]] = {
245249
"botanu.run_id": self.run_id,
246-
"botanu.use_case": self.use_case,
250+
"botanu.workflow": self.workflow,
251+
"botanu.event_id": self.event_id,
252+
"botanu.customer_id": self.customer_id,
247253
"botanu.environment": self.environment,
248254
"botanu.run.start_time": self.start_time.isoformat(),
249255
}
250-
if self.workflow:
251-
attrs["botanu.workflow"] = self.workflow
252256
if self.workflow_version:
253257
attrs["botanu.workflow.version"] = self.workflow_version
254258
if self.tenant_id:
@@ -285,8 +289,8 @@ def to_span_attributes(self) -> Dict[str, Union[str, float, int, bool]]:
285289
def from_baggage(cls, baggage: Dict[str, str]) -> Optional[RunContext]:
286290
"""Reconstruct RunContext from baggage dict."""
287291
run_id = baggage.get("botanu.run_id")
288-
use_case = baggage.get("botanu.use_case")
289-
if not run_id or not use_case:
292+
workflow = baggage.get("botanu.workflow")
293+
if not run_id or not workflow:
290294
return None
291295

292296
attempt_str = baggage.get("botanu.attempt", "1")
@@ -305,11 +309,15 @@ def from_baggage(cls, baggage: Dict[str, str]) -> Optional[RunContext]:
305309

306310
cancelled = baggage.get("botanu.cancelled", "").lower() == "true"
307311

312+
event_id = baggage.get("botanu.event_id", "")
313+
customer_id = baggage.get("botanu.customer_id", "")
314+
308315
return cls(
309316
run_id=run_id,
310-
use_case=use_case,
317+
workflow=workflow,
318+
event_id=event_id,
319+
customer_id=customer_id,
311320
environment=baggage.get("botanu.environment", "unknown"),
312-
workflow=baggage.get("botanu.workflow"),
313321
tenant_id=baggage.get("botanu.tenant_id"),
314322
parent_run_id=baggage.get("botanu.parent_run_id"),
315323
root_run_id=baggage.get("botanu.root_run_id") or run_id,

src/botanu/processors/enricher.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,27 +30,30 @@ class RunContextEnricher(SpanProcessor):
3030
"""Enriches ALL spans with run context from baggage.
3131
3232
This ensures that every span (including auto-instrumented ones)
33-
gets ``botanu.run_id``, ``botanu.use_case``, etc. attributes.
33+
gets ``botanu.run_id``, ``botanu.workflow``, etc. attributes.
3434
3535
Without this processor, only the root ``botanu.run`` span would
3636
have these attributes.
3737
38-
In ``lean_mode`` (default), only ``run_id`` and ``use_case`` are
38+
In ``lean_mode`` (default), only ``run_id`` and ``workflow`` are
3939
propagated to minimise per-span overhead.
4040
"""
4141

4242
BAGGAGE_KEYS_FULL: ClassVar[List[str]] = [
4343
"botanu.run_id",
44-
"botanu.use_case",
4544
"botanu.workflow",
45+
"botanu.event_id",
46+
"botanu.customer_id",
4647
"botanu.environment",
4748
"botanu.tenant_id",
4849
"botanu.parent_run_id",
4950
]
5051

5152
BAGGAGE_KEYS_LEAN: ClassVar[List[str]] = [
5253
"botanu.run_id",
53-
"botanu.use_case",
54+
"botanu.workflow",
55+
"botanu.event_id",
56+
"botanu.customer_id",
5457
]
5558

5659
def __init__(self, lean_mode: bool = True) -> None:

src/botanu/resources/__init__.py

Lines changed: 82 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,87 @@
11
# SPDX-FileCopyrightText: 2026 The Botanu Authors
22
# SPDX-License-Identifier: Apache-2.0
33

4-
"""Botanu resource detection."""
4+
"""Resource detection using official OTel community detectors.
55
6-
from botanu.resources.detector import detect_all_resources, get_resource_attributes
6+
Instead of a custom reimplementation, we try to import the official
7+
OpenTelemetry resource detector packages. Each one is a lightweight
8+
pip package that auto-detects environment attributes (K8s, AWS, GCP,
9+
Azure, container). If a package isn't installed, we gracefully skip it.
710
8-
__all__ = ["detect_all_resources", "get_resource_attributes"]
11+
Install detectors for your environment::
12+
13+
pip install botanu[aws] # AWS EC2/ECS/EKS/Lambda
14+
pip install botanu[gcp] # GCE/GKE/Cloud Run/Cloud Functions
15+
pip install botanu[azure] # Azure VMs/App Service/Functions
16+
pip install botanu[cloud] # All cloud detectors
17+
"""
18+
19+
from __future__ import annotations
20+
21+
import importlib
22+
import logging
23+
from typing import Any, Dict, List, Tuple
24+
25+
logger = logging.getLogger(__name__)
26+
27+
# (module_path, class_name) — tried in order.
28+
# Each entry corresponds to a pip package from opentelemetry-python-contrib.
29+
_DETECTOR_REGISTRY: List[Tuple[str, str]] = [
30+
# Built-in (opentelemetry-sdk — always available)
31+
("opentelemetry.sdk.resources", "ProcessResourceDetector"),
32+
# opentelemetry-resource-detector-aws
33+
("opentelemetry.resource.detector.aws.ec2", "AwsEc2ResourceDetector"),
34+
("opentelemetry.resource.detector.aws.ecs", "AwsEcsResourceDetector"),
35+
("opentelemetry.resource.detector.aws.eks", "AwsEksResourceDetector"),
36+
("opentelemetry.resource.detector.aws.lambda_", "AwsLambdaResourceDetector"),
37+
# opentelemetry-resource-detector-gcp
38+
("opentelemetry.resource.detector.gcp", "GoogleCloudResourceDetector"),
39+
# opentelemetry-resource-detector-azure
40+
("opentelemetry.resource.detector.azure.vm", "AzureVMResourceDetector"),
41+
("opentelemetry.resource.detector.azure.app_service", "AzureAppServiceResourceDetector"),
42+
# opentelemetry-resource-detector-container
43+
("opentelemetry.resource.detector.container", "ContainerResourceDetector"),
44+
]
45+
46+
47+
def collect_detectors() -> list:
48+
"""Return instances of all importable OTel resource detectors.
49+
50+
Each detector implements ``opentelemetry.sdk.resources.ResourceDetector``.
51+
Missing packages are silently skipped.
52+
"""
53+
detectors: list = []
54+
for module_path, class_name in _DETECTOR_REGISTRY:
55+
try:
56+
mod = importlib.import_module(module_path)
57+
cls = getattr(mod, class_name)
58+
detectors.append(cls())
59+
except (ImportError, AttributeError):
60+
pass
61+
62+
if detectors:
63+
names = [type(d).__name__ for d in detectors]
64+
logger.debug("Available resource detectors: %s", names)
65+
66+
return detectors
67+
68+
69+
def detect_resource_attrs() -> Dict[str, Any]:
70+
"""Detect environment attributes using available OTel detectors.
71+
72+
Returns a flat dict of resource attributes. This is a convenience
73+
wrapper for callers that just need a dict (like bootstrap.py).
74+
"""
75+
attrs: Dict[str, Any] = {}
76+
for detector in collect_detectors():
77+
try:
78+
resource = detector.detect()
79+
attrs.update(dict(resource.attributes))
80+
except Exception:
81+
# Community detectors may raise on network timeouts, missing
82+
# metadata endpoints, etc. Never let detection break SDK init.
83+
logger.debug("Resource detector %s failed", type(detector).__name__, exc_info=True)
84+
return attrs
85+
86+
87+
__all__ = ["collect_detectors", "detect_resource_attrs"]

0 commit comments

Comments
 (0)