diff --git a/src/strands/models/ollama.py b/src/strands/models/ollama.py index 54805ac16..cf7108c3a 100644 --- a/src/strands/models/ollama.py +++ b/src/strands/models/ollama.py @@ -280,7 +280,7 @@ def format_chunk(self, event: dict[str, Any]) -> StreamEvent: "totalTokens": event["data"].eval_count + event["data"].prompt_eval_count, }, "metrics": { - "latencyMs": event["data"].total_duration / 1e6, + "latencyMs": int(event["data"].total_duration / 1e6), }, }, } diff --git a/tests/strands/models/test_ollama.py b/tests/strands/models/test_ollama.py index 7a6bbf97c..360683d08 100644 --- a/tests/strands/models/test_ollama.py +++ b/tests/strands/models/test_ollama.py @@ -407,7 +407,7 @@ def test_format_chunk_metadata(model): "totalTokens": 150, }, "metrics": { - "latencyMs": 1.0, + "latencyMs": 1, }, }, } @@ -447,7 +447,7 @@ async def test_stream(ollama_client, model, agenerator, alist, captured_warnings { "metadata": { "usage": {"inputTokens": 5, "outputTokens": 10, "totalTokens": 15}, - "metrics": {"latencyMs": 1.0}, + "metrics": {"latencyMs": 1}, } }, ] @@ -525,7 +525,7 @@ async def test_stream_with_tool_calls(ollama_client, model, agenerator, alist): assert tru_events[8] == { "metadata": { "usage": {"inputTokens": 8, "outputTokens": 15, "totalTokens": 23}, - "metrics": {"latencyMs": 2.0}, + "metrics": {"latencyMs": 2}, } } expected_request = {