@@ -391,10 +391,6 @@ async def chat_completions_v1(request: ChatCompletionRequest, raw_request: Reque
391391 adapter_name = model_name # got a adapter name
392392 request_id = str (session .session_id )
393393 created_time = int (time .time ())
394- gpt_oss_parser = None
395- if VariableInterface .async_engine .arch == 'GptOssForCausalLM' :
396- gpt_oss_parser = GptOssChatParser ()
397-
398394 if isinstance (request .stop , str ):
399395 request .stop = [request .stop ]
400396
@@ -405,6 +401,40 @@ async def chat_completions_v1(request: ChatCompletionRequest, raw_request: Reque
405401 if request .response_format and request .response_format .type != 'text' :
406402 response_format = request .response_format .model_dump ()
407403
404+ gpt_oss_parser = None
405+ if VariableInterface .async_engine .arch == 'GptOssForCausalLM' :
406+ gpt_oss_parser = GptOssChatParser ()
407+ if response_format :
408+ logger .info (f'[GPT-OSS:{ request_id } ] Structured output requested, converting to Harmony-native mode' )
409+ schema_json = json .dumps (response_format , ensure_ascii = False )
410+ format_section = f'\n \n # Response Formats\n \n { schema_json } '
411+ try :
412+ if isinstance (request .messages , str ):
413+ # For string prompts, append the format section directly to request.messages
414+ request .messages += format_section
415+ else :
416+ messages = request .messages
417+ appended_to_system = False
418+ for msg in messages :
419+ if msg .get ('role' ) == 'system' :
420+ content = msg .get ('content' )
421+ if content is None :
422+ content = ''
423+ if isinstance (content , str ):
424+ msg ['content' ] = content + format_section
425+ appended_to_system = True
426+ break
427+ if not appended_to_system :
428+ system_msg = {
429+ 'role' : 'system' ,
430+ 'content' : f'You must follow the specified response format.{ format_section } '
431+ }
432+ messages .insert (0 , system_msg )
433+
434+ response_format = None
435+ except Exception as e :
436+ logger .error (f'[GPT-OSS:{ request_id } ] Failed to convert response_format to Harmony mode: { str (e )} ' )
437+
408438 if request .logit_bias is not None :
409439 try :
410440 logits_processors = [
0 commit comments