@@ -150,8 +150,6 @@ class ActiveRequest:
150150 """Current state of the driver."""
151151
152152 #################### Information relevant for generation #####################
153- # The unique id for the activeRequest, used for tracking the request's status
154- request_id : uuid .UUID
155153 max_tokens : int
156154 # We keep prefill and decode information together in the same object so that
157155 # there is less indirection about where this return channel is.
@@ -163,6 +161,10 @@ class ActiveRequest:
163161 prefill_result : Any = None
164162 # The number of responses for one request.
165163 num_samples : int = 1
164+ # The unique id for the activeRequest, used for tracking the request's status
165+ # TODO(wyzhang): Figure out how to set request uuid without potentially
166+ # causing jax.jit re-compilation for engine api implementation.
167+ request_id : Optional [uuid .UUID ] = None
166168 #################### Information relevant for prefill ########################
167169 prefill_content : Optional [str | list [int ]] = None
168170 ################## Information relevant for detokenization ###################
@@ -1342,7 +1344,6 @@ async def Decode( # pylint: disable=invalid-overridden-method
13421344 )
13431345 # Wrap request as an ActiveRequest.
13441346 active_request = ActiveRequest (
1345- request_id = uuid .uuid4 (),
13461347 max_tokens = request .max_tokens ,
13471348 prefill_content = prefill_content ,
13481349 is_client_side_tokenization = is_client_side_tokenization ,
0 commit comments