Skip to content

Commit b8ad727

Browse files
authored
[Performance] Not set request uuid to resolve performance regression (#228)
Why: When set request uuid, it can trigger jax.jit recompilation for maxtext due to request uuid being unique, resulting in performance regression.
1 parent d95a4b7 commit b8ad727

1 file changed

Lines changed: 4 additions & 3 deletions

File tree

jetstream/core/orchestrator.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -150,8 +150,6 @@ class ActiveRequest:
150150
"""Current state of the driver."""
151151

152152
#################### Information relevant for generation #####################
153-
# The unique id for the activeRequest, used for tracking the request's status
154-
request_id: uuid.UUID
155153
max_tokens: int
156154
# We keep prefill and decode information together in the same object so that
157155
# there is less indirection about where this return channel is.
@@ -163,6 +161,10 @@ class ActiveRequest:
163161
prefill_result: Any = None
164162
# The number of responses for one request.
165163
num_samples: int = 1
164+
# The unique id for the activeRequest, used for tracking the request's status
165+
# TODO(wyzhang): Figure out how to set request uuid without potentially
166+
# causing jax.jit re-compilation for engine api implementation.
167+
request_id: Optional[uuid.UUID] = None
166168
#################### Information relevant for prefill ########################
167169
prefill_content: Optional[str | list[int]] = None
168170
################## Information relevant for detokenization ###################
@@ -1342,7 +1344,6 @@ async def Decode( # pylint: disable=invalid-overridden-method
13421344
)
13431345
# Wrap request as an ActiveRequest.
13441346
active_request = ActiveRequest(
1345-
request_id=uuid.uuid4(),
13461347
max_tokens=request.max_tokens,
13471348
prefill_content=prefill_content,
13481349
is_client_side_tokenization=is_client_side_tokenization,

0 commit comments

Comments
 (0)