Skip to content

Commit 8cb9ef2

Browse files
windreamerCopilot
andcommitted
fix: convert guided decoding schema into Harmony-native mode to avoid Harmony/JSON mode conflict for GPT-OSS
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 81be529 commit 8cb9ef2

1 file changed

Lines changed: 38 additions & 0 deletions

File tree

lmdeploy/serve/openai/api_server.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,44 @@ async def chat_completions_v1(request: ChatCompletionRequest, raw_request: Reque
400400
gen_logprobs, logits_processors = None, None
401401
if request.logprobs and request.top_logprobs:
402402
gen_logprobs = request.top_logprobs
403+
response_format = None
404+
if request.response_format and request.response_format.type != 'text':
405+
response_format = request.response_format.model_dump()
406+
407+
gpt_oss_parser = None
408+
if VariableInterface.async_engine.arch == 'GptOssForCausalLM':
409+
gpt_oss_parser = GptOssChatParser()
410+
if response_format:
411+
logger.info(f'[GPT-OSS:{request_id}] Structured output requested, converting to Harmony-native mode')
412+
schema_json = json.dumps(response_format, ensure_ascii=False)
413+
format_section = f'\n\n# Response Formats\n\n{schema_json}'
414+
try:
415+
if isinstance(request.messages, str):
416+
# For string prompts, append the format section directly to request.messages
417+
request.messages += format_section
418+
else:
419+
messages = request.messages
420+
appended_to_system = False
421+
for msg in messages:
422+
if msg.get('role') == 'system':
423+
content = msg.get('content')
424+
if content is None:
425+
content = ''
426+
if isinstance(content, str):
427+
msg['content'] = content + format_section
428+
appended_to_system = True
429+
break
430+
if not appended_to_system:
431+
system_msg = {
432+
'role': 'system',
433+
'content': f'You must follow the specified response format.{format_section}'
434+
}
435+
messages.insert(0, system_msg)
436+
437+
response_format = None
438+
except Exception as e:
439+
logger.error(f'[GPT-OSS:{request_id}] Failed to convert response_format to Harmony mode: {str(e)}')
440+
403441
if request.logit_bias is not None:
404442
try:
405443
logits_processors = [

0 commit comments

Comments
 (0)