Skip to content

Commit 1f6d748

Browse files
committed
fix: convert guided decoding schema into Harmony-native mode to avoid Harmony/JSON mode conflict for GPT-OSS
1 parent 20ae545 commit 1f6d748

1 file changed

Lines changed: 27 additions & 4 deletions

File tree

lmdeploy/serve/openai/api_server.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -391,10 +391,6 @@ async def chat_completions_v1(request: ChatCompletionRequest, raw_request: Reque
391391
adapter_name = model_name # got a adapter name
392392
request_id = str(session.session_id)
393393
created_time = int(time.time())
394-
gpt_oss_parser = None
395-
if VariableInterface.async_engine.arch == 'GptOssForCausalLM':
396-
gpt_oss_parser = GptOssChatParser()
397-
398394
if isinstance(request.stop, str):
399395
request.stop = [request.stop]
400396

@@ -405,6 +401,33 @@ async def chat_completions_v1(request: ChatCompletionRequest, raw_request: Reque
405401
if request.response_format and request.response_format.type != 'text':
406402
response_format = request.response_format.model_dump()
407403

404+
gpt_oss_parser = None
405+
if VariableInterface.async_engine.arch == 'GptOssForCausalLM':
406+
gpt_oss_parser = GptOssChatParser()
407+
if response_format:
408+
logger.info(f'[GPT-OSS:{request_id}] Structured output requested, converting to Harmony-native mode')
409+
schema_json = json.dumps(response_format, ensure_ascii=False)
410+
format_section = f'\n\n# Response Formats\n\n{schema_json}'
411+
try:
412+
messages = request.messages
413+
if isinstance(messages, str):
414+
messages += format_section
415+
else:
416+
for msg in messages:
417+
if msg['role'] == 'system':
418+
msg['content'] += format_section
419+
break
420+
else:
421+
system_msg = {
422+
'role': 'system',
423+
'content': f'You must follow the specified response format.{format_section}'
424+
}
425+
messages.insert(0, system_msg)
426+
427+
response_format = None
428+
except Exception as e:
429+
logger.error(f'[GPT-OSS:{request_id}] Failed to convert response_format to Harmony mode: {str(e)}')
430+
408431
if request.logit_bias is not None:
409432
try:
410433
logits_processors = [

0 commit comments

Comments
 (0)