66import base64
77import json
88import logging
9+ import time
910from typing import Any , AsyncGenerator , Iterable , Optional , Type , TypeVar , Union
1011
1112import mistralai
@@ -334,7 +335,8 @@ def format_chunk(self, event: dict[str, Any]) -> StreamEvent:
334335 return {"messageStop" : {"stopReason" : reason }}
335336
336337 case "metadata" :
337- usage = event ["data" ]
338+ usage = event ["data" ]["usage" ]
339+ metrics = event ["data" ]["metrics" ]
338340 return {
339341 "metadata" : {
340342 "usage" : {
@@ -343,7 +345,7 @@ def format_chunk(self, event: dict[str, Any]) -> StreamEvent:
343345 "totalTokens" : usage .total_tokens ,
344346 },
345347 "metrics" : {
346- "latencyMs" : event . get ( "latency_ms" , 0 ) ,
348+ "latencyMs" : metrics [ "latency" ] * 1000 ,
347349 },
348350 },
349351 }
@@ -360,6 +362,8 @@ def _handle_non_streaming_response(self, response: Any) -> Iterable[dict[str, An
360362 Yields:
361363 Formatted events that match the streaming format.
362364 """
365+ start_time = time .time ()
366+
363367 yield {"chunk_type" : "message_start" }
364368
365369 content_started = False
@@ -389,7 +393,12 @@ def _handle_non_streaming_response(self, response: Any) -> Iterable[dict[str, An
389393 yield {"chunk_type" : "message_stop" , "data" : finish_reason }
390394
391395 if hasattr (response , "usage" ) and response .usage :
392- yield {"chunk_type" : "metadata" , "data" : response .usage }
396+ end_time = time .time ()
397+ latency = end_time - start_time
398+ yield {
399+ "chunk_type" : "metadata" ,
400+ "data" : {"usage" : response .usage , "metrics" : {"latency" : latency }},
401+ }
393402
394403 @override
395404 async def stream (
@@ -434,6 +443,7 @@ async def stream(
434443
435444 # Use the streaming API
436445 async with mistralai .Mistral (** self .client_args ) as client :
446+ start_time = time .time ()
437447 stream_response = await client .chat .stream_async (** request )
438448
439449 yield self .format_chunk ({"chunk_type" : "message_start" })
@@ -488,7 +498,14 @@ async def stream(
488498 yield self .format_chunk ({"chunk_type" : "message_stop" , "data" : choice .finish_reason })
489499
490500 if hasattr (chunk , "usage" ):
491- yield self .format_chunk ({"chunk_type" : "metadata" , "data" : chunk .usage })
501+ end_time = time .time ()
502+ latency = end_time - start_time
503+ yield self .format_chunk (
504+ {
505+ "chunk_type" : "metadata" ,
506+ "data" : {"usage" : chunk .usage , "metrics" : {"latency" : latency }},
507+ }
508+ )
492509
493510 except Exception as e :
494511 if "rate" in str (e ).lower () or "429" in str (e ):
0 commit comments