Skip to content

Commit df658b9

Browse files
ziyangliu-666windreamer
authored andcommitted
Fix issues found in Copilot review of n parameter support
- Fix TypeError in chat streaming path: create_stream_response_json was returning model_dump_json() (str) but cache_block_ids injection subscripted it as a dict; switch to model_dump() + json.dumps() - Fix stateful GptOssChatParser shared across concurrent asyncio.gather calls in non-streaming n>1 path; create a fresh instance per choice, consistent with the streaming path - Fix tool-call parse exceptions being swallowed and misreported as "Client disconnected"; re-raise so asyncio.gather propagates them, wrap gather in try/except to return INTERNAL_SERVER_ERROR - Add missing test_completion_n_negative_rejected to match the existing test_chat_n_negative_rejected
1 parent c5e86fd commit df658b9

2 files changed

Lines changed: 14 additions & 5 deletions

File tree

lmdeploy/serve/openai/api_server.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -546,7 +546,7 @@ def create_stream_response_json(index: int,
546546
choices=[choice_data],
547547
usage=usage,
548548
)
549-
response_json = response.model_dump_json()
549+
response_json = response.model_dump()
550550

551551
return response_json
552552

@@ -628,7 +628,7 @@ async def completion_stream_generator() -> AsyncGenerator[str, None]:
628628
if res.cache_block_ids is not None:
629629
response_json['cache_block_ids'] = res.cache_block_ids
630630
response_json['remote_token_ids'] = res.token_ids
631-
yield f'data: {response_json}\n\n'
631+
yield f'data: {json.dumps(response_json)}\n\n'
632632
yield 'data: [DONE]\n\n'
633633

634634
# Streaming response
@@ -664,7 +664,8 @@ async def _collect_chat_response(_i, _gen, _sess):
664664
remote_token_ids_i.append(res.token_ids)
665665

666666
if gpt_oss_parser:
667-
message_i = gpt_oss_parser.parse_full(final_token_ids_i)
667+
_parser_i = GptOssChatParser()
668+
message_i = _parser_i.parse_full(final_token_ids_i)
668669
if final_res_i.finish_reason == 'stop' and len(message_i.tool_calls) > 0:
669670
final_res_i.finish_reason = 'tool_calls'
670671
else:
@@ -679,7 +680,7 @@ async def _collect_chat_response(_i, _gen, _sess):
679680
final_res_i.finish_reason = 'tool_calls'
680681
except Exception as e:
681682
logger.error(f'Failed to parse {text_i}. Exception: {e}.')
682-
return False
683+
raise
683684
elif request.tool_choice != 'none' and request.tools is not None and VariableInterface.tool_parser is None:
684685
logger.error('Please launch the api_server with --tool-call-parser if you want to use tool.')
685686

@@ -713,7 +714,10 @@ async def _collect_chat_response(_i, _gen, _sess):
713714
_completion_tokens += final_res_i.generate_token_len
714715
return True
715716

716-
results = await asyncio.gather(*[_collect_chat_response(_i, generators[_i], sessions[_i]) for _i in range(_n)])
717+
try:
718+
results = await asyncio.gather(*[_collect_chat_response(_i, generators[_i], sessions[_i]) for _i in range(_n)])
719+
except Exception as e:
720+
return create_error_response(HTTPStatus.INTERNAL_SERVER_ERROR, str(e))
717721
if not all(results):
718722
return create_error_response(HTTPStatus.BAD_REQUEST, 'Client disconnected')
719723

tests/test_lmdeploy/test_n_parameter.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,11 @@ def test_chat_n_negative_rejected(self):
9292
req = ChatCompletionRequest(model='m', messages='hi', n=-1)
9393
assert chat_check_request(req, ctx) != ''
9494

95+
def test_completion_n_negative_rejected(self):
96+
ctx = self._make_server_context()
97+
req = CompletionRequest(model='m', prompt='hi', n=-1)
98+
assert completion_check_request(req, ctx) != ''
99+
95100

96101
# ---------------------------------------------------------------------------
97102
# API handler tests (mocking VariableInterface and raw_request)

0 commit comments

Comments
 (0)