@@ -335,21 +335,16 @@ class TestGptOssResponseFormatHarmonyConversion:
335335 """Tests for
336336 :meth:`GptOssResponseParser._convert_response_format_to_harmony`."""
337337
338- @pytest .fixture ()
338+ @pytest .fixture (autouse = True )
339339 def _patch_streamable_parser (self , monkeypatch ):
340340 monkeypatch .setattr (
341341 openai_harmony_mod ,
342342 'StreamableParser' ,
343343 lambda * args , ** kwargs : _FakeStreamableParser ({}),
344344 )
345345
346- def test_response_format_cleared_after_conversion (self , monkeypatch ):
346+ def test_response_format_cleared_after_conversion (self ):
347347 """response_format must be None after the parser processes it."""
348- monkeypatch .setattr (
349- openai_harmony_mod ,
350- 'StreamableParser' ,
351- lambda * args , ** kwargs : _FakeStreamableParser ({}),
352- )
353348 from lmdeploy .serve .openai .protocol import JsonSchema , ResponseFormat
354349
355350 request = ChatCompletionRequest (
@@ -366,14 +361,9 @@ def test_response_format_cleared_after_conversion(self, monkeypatch):
366361 parser = gpt_oss_mod .GptOssResponseParser (request = request , tokenizer = object ())
367362 assert parser .request .response_format is None
368363
369- def test_schema_appended_to_existing_system_message (self , monkeypatch ):
364+ def test_schema_appended_to_existing_system_message (self ):
370365 """When a system message already exists the schema is appended to
371366 it."""
372- monkeypatch .setattr (
373- openai_harmony_mod ,
374- 'StreamableParser' ,
375- lambda * args , ** kwargs : _FakeStreamableParser ({}),
376- )
377367 import json as _json
378368
379369 from lmdeploy .serve .openai .protocol import JsonSchema , ResponseFormat
@@ -403,14 +393,9 @@ def test_schema_appended_to_existing_system_message(self, monkeypatch):
403393 # No leading blank lines in the appended section
404394 assert '\n \n # Response Formats' in msgs [0 ]['content' ]
405395
406- def test_schema_inserted_as_new_system_message_when_none_exists (self , monkeypatch ):
396+ def test_schema_inserted_as_new_system_message_when_none_exists (self ):
407397 """When no system message exists a new one is inserted at position
408398 0."""
409- monkeypatch .setattr (
410- openai_harmony_mod ,
411- 'StreamableParser' ,
412- lambda * args , ** kwargs : _FakeStreamableParser ({}),
413- )
414399 import json as _json
415400
416401 from lmdeploy .serve .openai .protocol import JsonSchema , ResponseFormat
@@ -436,13 +421,8 @@ def test_schema_inserted_as_new_system_message_when_none_exists(self, monkeypatc
436421 # The user message is still present after the inserted system message
437422 assert msgs [1 ]['role' ] == 'user'
438423
439- def test_text_response_format_is_not_converted (self , monkeypatch ):
424+ def test_text_response_format_is_not_converted (self ):
440425 """A text-type response_format should be left untouched."""
441- monkeypatch .setattr (
442- openai_harmony_mod ,
443- 'StreamableParser' ,
444- lambda * args , ** kwargs : _FakeStreamableParser ({}),
445- )
446426 from lmdeploy .serve .openai .protocol import ResponseFormat
447427
448428 request = ChatCompletionRequest (
@@ -454,17 +434,129 @@ def test_text_response_format_is_not_converted(self, monkeypatch):
454434 assert parser .request .response_format is not None
455435 assert parser .request .response_format .type == 'text'
456436
457- def test_no_response_format_leaves_request_unchanged (self , monkeypatch ):
437+ def test_no_response_format_leaves_request_unchanged (self ):
458438 """When response_format is None the request is not modified."""
459- monkeypatch .setattr (
460- openai_harmony_mod ,
461- 'StreamableParser' ,
462- lambda * args , ** kwargs : _FakeStreamableParser ({}),
463- )
464439 request = ChatCompletionRequest (
465440 model = 'openai/gpt-oss-20b' ,
466441 messages = [{'role' : 'user' , 'content' : 'hi' }],
467442 )
468443 parser = gpt_oss_mod .GptOssResponseParser (request = request , tokenizer = object ())
469444 assert parser .request .response_format is None
470445 assert len (parser .request .messages ) == 1
446+
447+ def test_str_messages_gets_schema_appended (self ):
448+ """When messages is a string, the schema section is appended to it."""
449+ import json as _json
450+
451+ from lmdeploy .serve .openai .protocol import JsonSchema , ResponseFormat
452+
453+ schema_dict = {'type' : 'object' , 'properties' : {'x' : {'type' : 'integer' }}}
454+ request = ChatCompletionRequest (
455+ model = 'openai/gpt-oss-20b' ,
456+ messages = 'Tell me a joke' ,
457+ response_format = ResponseFormat (
458+ type = 'json_schema' ,
459+ json_schema = JsonSchema (name = 'test' , schema = schema_dict ),
460+ ),
461+ )
462+ parser = gpt_oss_mod .GptOssResponseParser (request = request , tokenizer = object ())
463+
464+ assert parser .request .response_format is None
465+ assert isinstance (parser .request .messages , str )
466+ assert parser .request .messages .startswith ('Tell me a joke' )
467+ assert '# Response Formats' in parser .request .messages
468+ assert _json .dumps (schema_dict ) in parser .request .messages
469+
470+ def test_non_pydantic_request_messages_updated (self ):
471+ """Non-Pydantic sentinel requests also get messages updated."""
472+ import json as _json
473+
474+ from lmdeploy .serve .openai .protocol import JsonSchema , ResponseFormat
475+
476+ schema_dict = {'type' : 'object' , 'properties' : {'y' : {'type' : 'number' }}}
477+ fmt = ResponseFormat (
478+ type = 'json_schema' ,
479+ json_schema = JsonSchema (name = 'test' , schema = schema_dict ),
480+ )
481+
482+ # Sentinel must NOT have tools/tool_choice attrs so that __init__
483+ # skips the Pydantic-dependent tool-rendering branch.
484+ class _Sentinel :
485+ messages = [{'role' : 'user' , 'content' : 'hi' }]
486+ response_format = fmt
487+
488+ sentinel = _Sentinel ()
489+ parser = gpt_oss_mod .GptOssResponseParser (request = sentinel , tokenizer = object ())
490+
491+ assert parser .request .response_format is None
492+ msgs = parser .request .messages
493+ assert isinstance (msgs , list )
494+ assert msgs [0 ]['role' ] == 'system'
495+ assert '# Response Formats' in msgs [0 ]['content' ]
496+ assert _json .dumps (schema_dict ) in msgs [0 ]['content' ]
497+
498+ def test_list_content_system_message_gets_text_block_appended (self ):
499+ """When system message content is a list (multimodal), append a text
500+ block."""
501+ import json as _json
502+
503+ from lmdeploy .serve .openai .protocol import JsonSchema , ResponseFormat
504+
505+ schema_dict = {'type' : 'object' , 'properties' : {'z' : {'type' : 'boolean' }}}
506+ request = ChatCompletionRequest (
507+ model = 'openai/gpt-oss-20b' ,
508+ messages = [
509+ {'role' : 'system' , 'content' : [
510+ {'type' : 'text' , 'text' : 'You are helpful.' },
511+ {'type' : 'image_url' , 'image_url' : {'url' : 'http://example.com/img.png' }},
512+ ]},
513+ {'role' : 'user' , 'content' : 'hi' },
514+ ],
515+ response_format = ResponseFormat (
516+ type = 'json_schema' ,
517+ json_schema = JsonSchema (name = 'test' , schema = schema_dict ),
518+ ),
519+ )
520+ parser = gpt_oss_mod .GptOssResponseParser (request = request , tokenizer = object ())
521+
522+ assert parser .request .response_format is None
523+ sys_msg = parser .request .messages [0 ]
524+ assert sys_msg ['role' ] == 'system'
525+ content = sys_msg ['content' ]
526+ assert isinstance (content , list )
527+ assert len (content ) == 3
528+ # Original two blocks preserved
529+ assert content [0 ]['type' ] == 'text'
530+ assert content [0 ]['text' ] == 'You are helpful.'
531+ assert content [1 ]['type' ] == 'image_url'
532+ # Schema appended as a text block
533+ assert content [2 ]['type' ] == 'text'
534+ assert '# Response Formats' in content [2 ]['text' ]
535+ assert _json .dumps (schema_dict ) in content [2 ]['text' ]
536+
537+ def test_none_content_system_message_inserts_separate_system (self ):
538+ """When system message content is None, insert a new system message."""
539+ import json as _json
540+
541+ from lmdeploy .serve .openai .protocol import JsonSchema , ResponseFormat
542+
543+ schema_dict = {'type' : 'object' , 'properties' : {'w' : {'type' : 'string' }}}
544+ request = ChatCompletionRequest (
545+ model = 'openai/gpt-oss-20b' ,
546+ messages = [
547+ {'role' : 'system' , 'content' : None },
548+ {'role' : 'user' , 'content' : 'hi' },
549+ ],
550+ response_format = ResponseFormat (
551+ type = 'json_schema' ,
552+ json_schema = JsonSchema (name = 'test' , schema = schema_dict ),
553+ ),
554+ )
555+ parser = gpt_oss_mod .GptOssResponseParser (request = request , tokenizer = object ())
556+
557+ assert parser .request .response_format is None
558+ msgs = parser .request .messages
559+ # A new system message with the schema is inserted at position 0
560+ assert msgs [0 ]['role' ] == 'system'
561+ assert '# Response Formats' in msgs [0 ]['content' ]
562+ assert _json .dumps (schema_dict ) in msgs [0 ]['content' ]
0 commit comments