Skip to content

Commit d3f847a

Browse files
windreamerCopilot
andcommitted
fix: convert guided decoding schema into Harmony-native mode to avoid Harmony/JSON mode conflict for GPT-OSS
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 20ae545 commit d3f847a

1 file changed

Lines changed: 34 additions & 4 deletions

File tree

lmdeploy/serve/openai/api_server.py

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -391,10 +391,6 @@ async def chat_completions_v1(request: ChatCompletionRequest, raw_request: Reque
391391
adapter_name = model_name # got a adapter name
392392
request_id = str(session.session_id)
393393
created_time = int(time.time())
394-
gpt_oss_parser = None
395-
if VariableInterface.async_engine.arch == 'GptOssForCausalLM':
396-
gpt_oss_parser = GptOssChatParser()
397-
398394
if isinstance(request.stop, str):
399395
request.stop = [request.stop]
400396

@@ -405,6 +401,40 @@ async def chat_completions_v1(request: ChatCompletionRequest, raw_request: Reque
405401
if request.response_format and request.response_format.type != 'text':
406402
response_format = request.response_format.model_dump()
407403

404+
gpt_oss_parser = None
405+
if VariableInterface.async_engine.arch == 'GptOssForCausalLM':
406+
gpt_oss_parser = GptOssChatParser()
407+
if response_format:
408+
logger.info(f'[GPT-OSS:{request_id}] Structured output requested, converting to Harmony-native mode')
409+
schema_json = json.dumps(response_format, ensure_ascii=False)
410+
format_section = f'\n\n# Response Formats\n\n{schema_json}'
411+
try:
412+
if isinstance(request.messages, str):
413+
# For string prompts, append the format section directly to request.messages
414+
request.messages += format_section
415+
else:
416+
messages = request.messages
417+
appended_to_system = False
418+
for msg in messages:
419+
if msg.get('role') == 'system':
420+
content = msg.get('content')
421+
if content is None:
422+
content = ''
423+
if isinstance(content, str):
424+
msg['content'] = content + format_section
425+
appended_to_system = True
426+
break
427+
if not appended_to_system:
428+
system_msg = {
429+
'role': 'system',
430+
'content': f'You must follow the specified response format.{format_section}'
431+
}
432+
messages.insert(0, system_msg)
433+
434+
response_format = None
435+
except Exception as e:
436+
logger.error(f'[GPT-OSS:{request_id}] Failed to convert response_format to Harmony mode: {str(e)}')
437+
408438
if request.logit_bias is not None:
409439
try:
410440
logits_processors = [

0 commit comments

Comments
 (0)