Skip to content

Commit d4366e3

Browse files
windreamerCopilot
andcommitted
fix: convert guided decoding schema into Harmony-native mode to avoid Harmony/JSON mode conflict for GPT-OSS
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent c677cdd commit d4366e3

File tree

1 file changed

+34
-4
lines changed

1 file changed

+34
-4
lines changed

lmdeploy/serve/openai/api_server.py

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -424,10 +424,6 @@ async def chat_completions_v1(request: ChatCompletionRequest, raw_request: Reque
424424
adapter_name = model_name # got a adapter name
425425
request_id = str(session.session_id)
426426
created_time = int(time.time())
427-
gpt_oss_parser = None
428-
if VariableInterface.async_engine.arch == 'GptOssForCausalLM':
429-
gpt_oss_parser = GptOssChatParser()
430-
431427
if isinstance(request.stop, str):
432428
request.stop = [request.stop]
433429

@@ -438,6 +434,40 @@ async def chat_completions_v1(request: ChatCompletionRequest, raw_request: Reque
438434
if request.response_format and request.response_format.type != 'text':
439435
response_format = request.response_format.model_dump()
440436

437+
gpt_oss_parser = None
438+
if VariableInterface.async_engine.arch == 'GptOssForCausalLM':
439+
gpt_oss_parser = GptOssChatParser()
440+
if response_format:
441+
logger.info(f'[GPT-OSS:{request_id}] Structured output requested, converting to Harmony-native mode')
442+
schema_json = json.dumps(response_format, ensure_ascii=False)
443+
format_section = f'\n\n# Response Formats\n\n{schema_json}'
444+
try:
445+
if isinstance(request.messages, str):
446+
# For string prompts, append the format section directly to request.messages
447+
request.messages += format_section
448+
else:
449+
messages = request.messages
450+
appended_to_system = False
451+
for msg in messages:
452+
if msg.get('role') == 'system':
453+
content = msg.get('content')
454+
if content is None:
455+
content = ''
456+
if isinstance(content, str):
457+
msg['content'] = content + format_section
458+
appended_to_system = True
459+
break
460+
if not appended_to_system:
461+
system_msg = {
462+
'role': 'system',
463+
'content': f'You must follow the specified response format.{format_section}'
464+
}
465+
messages.insert(0, system_msg)
466+
467+
response_format = None
468+
except Exception as e:
469+
logger.error(f'[GPT-OSS:{request_id}] Failed to convert response_format to Harmony mode: {str(e)}')
470+
441471
if request.logit_bias is not None:
442472
try:
443473
logits_processors = [

0 commit comments

Comments
 (0)