@@ -391,10 +391,6 @@ async def chat_completions_v1(request: ChatCompletionRequest, raw_request: Reque
391391 adapter_name = model_name # got a adapter name
392392 request_id = str (session .session_id )
393393 created_time = int (time .time ())
394- gpt_oss_parser = None
395- if VariableInterface .async_engine .arch == 'GptOssForCausalLM' :
396- gpt_oss_parser = GptOssChatParser ()
397-
398394 if isinstance (request .stop , str ):
399395 request .stop = [request .stop ]
400396
@@ -405,6 +401,33 @@ async def chat_completions_v1(request: ChatCompletionRequest, raw_request: Reque
405401 if request .response_format and request .response_format .type != 'text' :
406402 response_format = request .response_format .model_dump ()
407403
404+ gpt_oss_parser = None
405+ if VariableInterface .async_engine .arch == 'GptOssForCausalLM' :
406+ gpt_oss_parser = GptOssChatParser ()
407+ if response_format :
408+ logger .info (f'[GPT-OSS:{ request_id } ] Structured output requested, converting to Harmony-native mode' )
409+ schema_json = json .dumps (response_format , ensure_ascii = False )
410+ format_section = f'\n \n # Response Formats\n \n { schema_json } '
411+ try :
412+ messages = request .messages
413+ if isinstance (messages , str ):
414+ messages += format_section
415+ else :
416+ for msg in messages :
417+ if msg ['role' ] == 'system' :
418+ msg ['content' ] += format_section
419+ break
420+ else :
421+ system_msg = {
422+ 'role' : 'system' ,
423+ 'content' : f'You must follow the specified response format.{ format_section } '
424+ }
425+ messages .insert (0 , system_msg )
426+
427+ response_format = None
428+ except Exception as e :
429+ logger .error (f'[GPT-OSS:{ request_id } ] Failed to convert response_format to Harmony mode: { str (e )} ' )
430+
408431 if request .logit_bias is not None :
409432 try :
410433 logits_processors = [
0 commit comments