Skip to content

Commit e4f27c6

Browse files
authored
feat: added agent_invocations (strands-agents#1387)
1 parent 067d259 commit e4f27c6

5 files changed

Lines changed: 195 additions & 23 deletions

File tree

src/strands/agent/agent.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -565,6 +565,8 @@ async def stream_async(
565565
"""
566566
self._interrupt_state.resume(prompt)
567567

568+
self.event_loop_metrics.reset_usage_metrics()
569+
568570
merged_state = {}
569571
if kwargs:
570572
warnings.warn("`**kwargs` parameter is deprecating, use `invocation_state` instead.", stacklevel=2)

src/strands/telemetry/metrics.py

Lines changed: 99 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,34 @@ def add_call(
151151
metrics_client.tool_error_count.add(1, attributes=attributes)
152152

153153

154+
@dataclass
155+
class EventLoopCycleMetric:
156+
"""Aggregated metrics for a single event loop cycle.
157+
158+
Attributes:
159+
event_loop_cycle_id: Current eventLoop cycle id.
160+
usage: Total token usage for the entire cycle (succeeded model invocation, excluding tool invocations).
161+
"""
162+
163+
event_loop_cycle_id: str
164+
usage: Usage
165+
166+
167+
@dataclass
168+
class AgentInvocation:
169+
"""Metrics for a single agent invocation.
170+
171+
AgentInvocation contains all the event loop cycles and accumulated token usage for that invocation.
172+
173+
Attributes:
174+
cycles: List of event loop cycles that occurred during this invocation.
175+
usage: Accumulated token usage for this invocation across all cycles.
176+
"""
177+
178+
cycles: list[EventLoopCycleMetric] = field(default_factory=list)
179+
usage: Usage = field(default_factory=lambda: Usage(inputTokens=0, outputTokens=0, totalTokens=0))
180+
181+
154182
@dataclass
155183
class EventLoopMetrics:
156184
"""Aggregated metrics for an event loop's execution.
@@ -159,15 +187,17 @@ class EventLoopMetrics:
159187
cycle_count: Number of event loop cycles executed.
160188
tool_metrics: Metrics for each tool used, keyed by tool name.
161189
cycle_durations: List of durations for each cycle in seconds.
190+
agent_invocations: Agent invocation metrics containing cycles and usage data.
162191
traces: List of execution traces.
163-
accumulated_usage: Accumulated token usage across all model invocations.
192+
accumulated_usage: Accumulated token usage across all model invocations (across all requests).
164193
accumulated_metrics: Accumulated performance metrics across all model invocations.
165194
"""
166195

167196
cycle_count: int = 0
168-
tool_metrics: Dict[str, ToolMetrics] = field(default_factory=dict)
169-
cycle_durations: List[float] = field(default_factory=list)
170-
traces: List[Trace] = field(default_factory=list)
197+
tool_metrics: dict[str, ToolMetrics] = field(default_factory=dict)
198+
cycle_durations: list[float] = field(default_factory=list)
199+
agent_invocations: list[AgentInvocation] = field(default_factory=list)
200+
traces: list[Trace] = field(default_factory=list)
171201
accumulated_usage: Usage = field(default_factory=lambda: Usage(inputTokens=0, outputTokens=0, totalTokens=0))
172202
accumulated_metrics: Metrics = field(default_factory=lambda: Metrics(latencyMs=0))
173203

@@ -176,14 +206,23 @@ def _metrics_client(self) -> "MetricsClient":
176206
"""Get the singleton MetricsClient instance."""
177207
return MetricsClient()
178208

209+
@property
210+
def latest_agent_invocation(self) -> Optional[AgentInvocation]:
211+
"""Get the most recent agent invocation.
212+
213+
Returns:
214+
The most recent AgentInvocation, or None if no invocations exist.
215+
"""
216+
return self.agent_invocations[-1] if self.agent_invocations else None
217+
179218
def start_cycle(
180219
self,
181-
attributes: Optional[Dict[str, Any]] = None,
220+
attributes: Dict[str, Any],
182221
) -> Tuple[float, Trace]:
183222
"""Start a new event loop cycle and create a trace for it.
184223
185224
Args:
186-
attributes: attributes of the metrics.
225+
attributes: attributes of the metrics, including event_loop_cycle_id.
187226
188227
Returns:
189228
A tuple containing the start time and the cycle trace object.
@@ -194,6 +233,14 @@ def start_cycle(
194233
start_time = time.time()
195234
cycle_trace = Trace(f"Cycle {self.cycle_count}", start_time=start_time)
196235
self.traces.append(cycle_trace)
236+
237+
self.agent_invocations[-1].cycles.append(
238+
EventLoopCycleMetric(
239+
event_loop_cycle_id=attributes["event_loop_cycle_id"],
240+
usage=Usage(inputTokens=0, outputTokens=0, totalTokens=0),
241+
)
242+
)
243+
197244
return start_time, cycle_trace
198245

199246
def end_cycle(self, start_time: float, cycle_trace: Trace, attributes: Optional[Dict[str, Any]] = None) -> None:
@@ -252,32 +299,53 @@ def add_tool_usage(
252299
)
253300
tool_trace.end()
254301

302+
def _accumulate_usage(self, target: Usage, source: Usage) -> None:
303+
"""Helper method to accumulate usage from source to target.
304+
305+
Args:
306+
target: The Usage object to accumulate into.
307+
source: The Usage object to accumulate from.
308+
"""
309+
target["inputTokens"] += source["inputTokens"]
310+
target["outputTokens"] += source["outputTokens"]
311+
target["totalTokens"] += source["totalTokens"]
312+
313+
if "cacheReadInputTokens" in source:
314+
target["cacheReadInputTokens"] = target.get("cacheReadInputTokens", 0) + source["cacheReadInputTokens"]
315+
316+
if "cacheWriteInputTokens" in source:
317+
target["cacheWriteInputTokens"] = target.get("cacheWriteInputTokens", 0) + source["cacheWriteInputTokens"]
318+
255319
def update_usage(self, usage: Usage) -> None:
256320
"""Update the accumulated token usage with new usage data.
257321
258322
Args:
259323
usage: The usage data to add to the accumulated totals.
260324
"""
325+
# Record metrics to OpenTelemetry
261326
self._metrics_client.event_loop_input_tokens.record(usage["inputTokens"])
262327
self._metrics_client.event_loop_output_tokens.record(usage["outputTokens"])
263-
self.accumulated_usage["inputTokens"] += usage["inputTokens"]
264-
self.accumulated_usage["outputTokens"] += usage["outputTokens"]
265-
self.accumulated_usage["totalTokens"] += usage["totalTokens"]
266328

267-
# Handle optional cached token metrics
329+
# Handle optional cached token metrics for OpenTelemetry
268330
if "cacheReadInputTokens" in usage:
269-
cache_read_tokens = usage["cacheReadInputTokens"]
270-
self._metrics_client.event_loop_cache_read_input_tokens.record(cache_read_tokens)
271-
self.accumulated_usage["cacheReadInputTokens"] = (
272-
self.accumulated_usage.get("cacheReadInputTokens", 0) + cache_read_tokens
273-
)
274-
331+
self._metrics_client.event_loop_cache_read_input_tokens.record(usage["cacheReadInputTokens"])
275332
if "cacheWriteInputTokens" in usage:
276-
cache_write_tokens = usage["cacheWriteInputTokens"]
277-
self._metrics_client.event_loop_cache_write_input_tokens.record(cache_write_tokens)
278-
self.accumulated_usage["cacheWriteInputTokens"] = (
279-
self.accumulated_usage.get("cacheWriteInputTokens", 0) + cache_write_tokens
280-
)
333+
self._metrics_client.event_loop_cache_write_input_tokens.record(usage["cacheWriteInputTokens"])
334+
335+
self._accumulate_usage(self.accumulated_usage, usage)
336+
self._accumulate_usage(self.agent_invocations[-1].usage, usage)
337+
338+
if self.agent_invocations[-1].cycles:
339+
current_cycle = self.agent_invocations[-1].cycles[-1]
340+
self._accumulate_usage(current_cycle.usage, usage)
341+
342+
def reset_usage_metrics(self) -> None:
343+
"""Start a new agent invocation by creating a new AgentInvocation.
344+
345+
This should be called at the start of a new request to begin tracking
346+
a new agent invocation with fresh usage and cycle data.
347+
"""
348+
self.agent_invocations.append(AgentInvocation())
281349

282350
def update_metrics(self, metrics: Metrics) -> None:
283351
"""Update the accumulated performance metrics with new metrics data.
@@ -322,6 +390,16 @@ def get_summary(self) -> Dict[str, Any]:
322390
"traces": [trace.to_dict() for trace in self.traces],
323391
"accumulated_usage": self.accumulated_usage,
324392
"accumulated_metrics": self.accumulated_metrics,
393+
"agent_invocations": [
394+
{
395+
"usage": invocation.usage,
396+
"cycles": [
397+
{"event_loop_cycle_id": cycle.event_loop_cycle_id, "usage": cycle.usage}
398+
for cycle in invocation.cycles
399+
],
400+
}
401+
for invocation in self.agent_invocations
402+
],
325403
}
326404
return summary
327405

tests/strands/event_loop/test_event_loop.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ def agent(model, system_prompt, messages, tool_registry, thread_pool, hook_regis
142142
mock.tool_registry = tool_registry
143143
mock.thread_pool = thread_pool
144144
mock.event_loop_metrics = EventLoopMetrics()
145+
mock.event_loop_metrics.reset_usage_metrics()
145146
mock.hooks = hook_registry
146147
mock.tool_executor = tool_executor
147148
mock._interrupt_state = _InterruptState()

tests/strands/event_loop/test_event_loop_structured_output.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ def mock_agent():
3737
agent.messages = []
3838
agent.tool_registry = ToolRegistry()
3939
agent.event_loop_metrics = EventLoopMetrics()
40+
agent.event_loop_metrics.reset_usage_metrics()
4041
agent.hooks = Mock()
4142
agent.hooks.invoke_callbacks_async = AsyncMock()
4243
agent.trace_span = None

tests/strands/telemetry/test_metrics.py

Lines changed: 92 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,9 +240,15 @@ def test_tool_metrics_add_call(success, tool, tool_metrics, mock_get_meter_provi
240240
@unittest.mock.patch.object(strands.telemetry.metrics.uuid, "uuid4")
241241
def test_event_loop_metrics_start_cycle(mock_uuid4, mock_time, event_loop_metrics, mock_get_meter_provider):
242242
mock_time.return_value = 1
243-
mock_uuid4.return_value = "i1"
243+
mock_event_loop_cycle_id = "i1"
244+
mock_uuid4.return_value = mock_event_loop_cycle_id
244245

245-
tru_start_time, tru_cycle_trace = event_loop_metrics.start_cycle()
246+
# Reset must be called first
247+
event_loop_metrics.reset_usage_metrics()
248+
249+
tru_start_time, tru_cycle_trace = event_loop_metrics.start_cycle(
250+
attributes={"event_loop_cycle_id": mock_event_loop_cycle_id}
251+
)
246252
exp_start_time, exp_cycle_trace = 1, strands.telemetry.metrics.Trace("Cycle 1")
247253

248254
tru_attrs = {"cycle_count": event_loop_metrics.cycle_count, "traces": event_loop_metrics.traces}
@@ -256,6 +262,13 @@ def test_event_loop_metrics_start_cycle(mock_uuid4, mock_time, event_loop_metric
256262
and tru_attrs == exp_attrs
257263
)
258264

265+
assert len(event_loop_metrics.agent_invocations) == 1
266+
assert len(event_loop_metrics.agent_invocations[0].cycles) == 1
267+
assert event_loop_metrics.agent_invocations[0].cycles[0].event_loop_cycle_id == "i1"
268+
assert event_loop_metrics.agent_invocations[0].cycles[0].usage["inputTokens"] == 0
269+
assert event_loop_metrics.agent_invocations[0].cycles[0].usage["outputTokens"] == 0
270+
assert event_loop_metrics.agent_invocations[0].cycles[0].usage["totalTokens"] == 0
271+
259272

260273
@unittest.mock.patch.object(strands.telemetry.metrics.time, "time")
261274
def test_event_loop_metrics_end_cycle(mock_time, trace, event_loop_metrics, mock_get_meter_provider):
@@ -324,13 +337,24 @@ def test_event_loop_metrics_add_tool_usage(mock_time, trace, tool, event_loop_me
324337

325338

326339
def test_event_loop_metrics_update_usage(usage, event_loop_metrics, mock_get_meter_provider):
340+
event_loop_metrics.reset_usage_metrics()
341+
event_loop_metrics.start_cycle(attributes={"event_loop_cycle_id": "test-cycle"})
342+
327343
for _ in range(3):
328344
event_loop_metrics.update_usage(usage)
329345

330346
tru_usage = event_loop_metrics.accumulated_usage
331347
exp_usage = Usage(inputTokens=3, outputTokens=6, totalTokens=9, cacheWriteInputTokens=6)
332348

333349
assert tru_usage == exp_usage
350+
351+
assert event_loop_metrics.latest_agent_invocation.usage == exp_usage
352+
353+
assert len(event_loop_metrics.agent_invocations) == 1
354+
assert len(event_loop_metrics.agent_invocations[0].cycles) == 1
355+
assert event_loop_metrics.agent_invocations[0].cycles[0].event_loop_cycle_id == "test-cycle"
356+
assert event_loop_metrics.agent_invocations[0].cycles[0].usage == exp_usage
357+
334358
mock_get_meter_provider.return_value.get_meter.assert_called()
335359
metrics_client = event_loop_metrics._metrics_client
336360
metrics_client.event_loop_input_tokens.record.assert_called()
@@ -370,6 +394,7 @@ def test_event_loop_metrics_get_summary(trace, tool, event_loop_metrics, mock_ge
370394
"outputTokens": 0,
371395
"totalTokens": 0,
372396
},
397+
"agent_invocations": [],
373398
"average_cycle_time": 0,
374399
"tool_usage": {
375400
"tool1": {
@@ -476,3 +501,68 @@ def test_use_ProxyMeter_if_no_global_meter_provider():
476501

477502
# Verify it's using a _ProxyMeter
478503
assert isinstance(metrics_client.meter, _ProxyMeter)
504+
505+
506+
def test_latest_agent_invocation_property(usage, event_loop_metrics, mock_get_meter_provider):
507+
"""Test the latest_agent_invocation property getter"""
508+
# Initially, no invocations exist
509+
assert event_loop_metrics.latest_agent_invocation is None
510+
511+
event_loop_metrics.reset_usage_metrics()
512+
event_loop_metrics.start_cycle(attributes={"event_loop_cycle_id": "cycle-1"})
513+
event_loop_metrics.update_usage(usage)
514+
515+
# latest_agent_invocation should return the first invocation
516+
current = event_loop_metrics.latest_agent_invocation
517+
assert current is not None
518+
assert current.usage["inputTokens"] == 1
519+
assert len(current.cycles) == 1
520+
521+
event_loop_metrics.reset_usage_metrics()
522+
event_loop_metrics.start_cycle(attributes={"event_loop_cycle_id": "cycle-2"})
523+
usage2 = Usage(inputTokens=10, outputTokens=20, totalTokens=30)
524+
event_loop_metrics.update_usage(usage2)
525+
526+
# Should return the second invocation
527+
current = event_loop_metrics.latest_agent_invocation
528+
assert current is not None
529+
assert current.usage["inputTokens"] == 10
530+
assert len(current.cycles) == 1
531+
532+
assert len(event_loop_metrics.agent_invocations) == 2
533+
534+
assert current is event_loop_metrics.agent_invocations[-1]
535+
536+
537+
def test_reset_usage_metrics(usage, event_loop_metrics, mock_get_meter_provider):
538+
"""Test that reset_usage_metrics creates a new agent invocation but preserves accumulated_usage"""
539+
# Add some usage across multiple cycles in first invocation
540+
event_loop_metrics.reset_usage_metrics()
541+
event_loop_metrics.start_cycle(attributes={"event_loop_cycle_id": "cycle-1"})
542+
event_loop_metrics.update_usage(usage)
543+
544+
event_loop_metrics.start_cycle(attributes={"event_loop_cycle_id": "cycle-2"})
545+
usage2 = Usage(inputTokens=10, outputTokens=20, totalTokens=30)
546+
event_loop_metrics.update_usage(usage2)
547+
548+
assert len(event_loop_metrics.agent_invocations) == 1
549+
assert event_loop_metrics.latest_agent_invocation.usage["inputTokens"] == 11
550+
assert len(event_loop_metrics.latest_agent_invocation.cycles) == 2
551+
assert event_loop_metrics.accumulated_usage["inputTokens"] == 11
552+
553+
# Reset - creates a new invocation
554+
event_loop_metrics.reset_usage_metrics()
555+
556+
assert len(event_loop_metrics.agent_invocations) == 2
557+
558+
assert event_loop_metrics.latest_agent_invocation.usage["inputTokens"] == 0
559+
assert event_loop_metrics.latest_agent_invocation.usage["outputTokens"] == 0
560+
assert event_loop_metrics.latest_agent_invocation.usage["totalTokens"] == 0
561+
assert len(event_loop_metrics.latest_agent_invocation.cycles) == 0
562+
563+
# Verify first invocation data is preserved
564+
assert event_loop_metrics.agent_invocations[0].usage["inputTokens"] == 11
565+
assert len(event_loop_metrics.agent_invocations[0].cycles) == 2
566+
567+
# Verify accumulated_usage is NOT cleared
568+
assert event_loop_metrics.accumulated_usage["inputTokens"] == 11

0 commit comments

Comments
 (0)