Skip to content

Commit cb76d43

Browse files
committed
fix: address unresolved PR review comments
1. Guard model_copy() with hasattr check: extract _clear_response_format() helper that falls back to in-place mutation for non-Pydantic request objects (e.g. test sentinels). Prevents double-raise in the except path. 2. Use logger.exception() instead of logger.error(f'...{e}') so that stack traces are preserved in the log output. 3. Mark _patch_streamable_parser fixture as autouse=True and remove redundant monkeypatch.setattr calls from individual test methods.
1 parent 0ffa417 commit cb76d43

2 files changed

Lines changed: 162 additions & 44 deletions

File tree

lmdeploy/serve/parsers/_openai_harmony.py

Lines changed: 39 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,15 @@ def _convert_response_format_to_harmony(self):
8787
format_body = f'# Response Formats\n{format_json}'
8888
messages = self.request.messages
8989

90+
if isinstance(messages, str):
91+
messages = messages + '\n\n' + format_body
92+
self._clear_response_format(messages=messages)
93+
return
94+
9095
if not isinstance(messages, list):
9196
logger.warning('Cannot inject response_format schema into '
9297
'non-list messages for GPT-OSS; clearing response_format only.')
93-
self.request = self.request.model_copy(update={'response_format': None})
98+
self._clear_response_format()
9499
return
95100

96101
new_messages = list(messages)
@@ -100,22 +105,43 @@ def _convert_response_format_to_harmony(self):
100105
)
101106

102107
if system_idx is not None:
103-
content = new_messages[system_idx].get('content') or ''
104-
new_messages[system_idx] = {
105-
**new_messages[system_idx],
106-
'content': content + '\n\n' + format_body,
107-
}
108+
content = new_messages[system_idx].get('content')
109+
if isinstance(content, list):
110+
# Multimodal content blocks — append a text block.
111+
new_messages[system_idx] = {
112+
**new_messages[system_idx],
113+
'content': content + [{'type': 'text', 'text': format_body}],
114+
}
115+
elif isinstance(content, str):
116+
new_messages[system_idx] = {
117+
**new_messages[system_idx],
118+
'content': (content + '\n\n' + format_body) if content else format_body,
119+
}
120+
else:
121+
# content is None or unexpected type — insert a separate
122+
# system message so the schema is still available.
123+
new_messages.insert(0, {'role': 'system', 'content': format_body})
108124
else:
109125
new_messages.insert(0, {'role': 'system', 'content': format_body})
110126

111-
self.request = self.request.model_copy(update={
112-
'response_format': None,
113-
'messages': new_messages,
114-
})
115-
except Exception as e:
116-
logger.error(f'Failed to convert response_format to Harmony-native mode for GPT-OSS: {e}')
127+
self._clear_response_format(messages=new_messages)
128+
except Exception:
129+
logger.exception('Failed to convert response_format to Harmony-native mode for GPT-OSS')
117130
# Still clear response_format to avoid the Harmony/JSON mode conflict
118-
self.request = self.request.model_copy(update={'response_format': None})
131+
self._clear_response_format()
132+
133+
def _clear_response_format(self, messages=None):
134+
"""Clear response_format on the request, handling both Pydantic and
135+
plain objects."""
136+
if hasattr(self.request, 'model_copy'):
137+
update = {'response_format': None}
138+
if messages is not None:
139+
update['messages'] = messages
140+
self.request = self.request.model_copy(update=update)
141+
else:
142+
self.request.response_format = None
143+
if messages is not None:
144+
self.request.messages = messages
119145

120146
def stream_chunk(self, delta_text: str, delta_token_ids: list[int], **kwargs) -> tuple[DeltaMessage | None, bool]:
121147
if (

tests/test_lmdeploy/serve/parsers/test_gpt_oss_parser.py

Lines changed: 123 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -335,21 +335,16 @@ class TestGptOssResponseFormatHarmonyConversion:
335335
"""Tests for
336336
:meth:`GptOssResponseParser._convert_response_format_to_harmony`."""
337337

338-
@pytest.fixture()
338+
@pytest.fixture(autouse=True)
339339
def _patch_streamable_parser(self, monkeypatch):
340340
monkeypatch.setattr(
341341
openai_harmony_mod,
342342
'StreamableParser',
343343
lambda *args, **kwargs: _FakeStreamableParser({}),
344344
)
345345

346-
def test_response_format_cleared_after_conversion(self, monkeypatch):
346+
def test_response_format_cleared_after_conversion(self):
347347
"""response_format must be None after the parser processes it."""
348-
monkeypatch.setattr(
349-
openai_harmony_mod,
350-
'StreamableParser',
351-
lambda *args, **kwargs: _FakeStreamableParser({}),
352-
)
353348
from lmdeploy.serve.openai.protocol import JsonSchema, ResponseFormat
354349

355350
request = ChatCompletionRequest(
@@ -366,14 +361,9 @@ def test_response_format_cleared_after_conversion(self, monkeypatch):
366361
parser = gpt_oss_mod.GptOssResponseParser(request=request, tokenizer=object())
367362
assert parser.request.response_format is None
368363

369-
def test_schema_appended_to_existing_system_message(self, monkeypatch):
364+
def test_schema_appended_to_existing_system_message(self):
370365
"""When a system message already exists the schema is appended to
371366
it."""
372-
monkeypatch.setattr(
373-
openai_harmony_mod,
374-
'StreamableParser',
375-
lambda *args, **kwargs: _FakeStreamableParser({}),
376-
)
377367
import json as _json
378368

379369
from lmdeploy.serve.openai.protocol import JsonSchema, ResponseFormat
@@ -403,14 +393,9 @@ def test_schema_appended_to_existing_system_message(self, monkeypatch):
403393
# No leading blank lines in the appended section
404394
assert '\n\n# Response Formats' in msgs[0]['content']
405395

406-
def test_schema_inserted_as_new_system_message_when_none_exists(self, monkeypatch):
396+
def test_schema_inserted_as_new_system_message_when_none_exists(self):
407397
"""When no system message exists a new one is inserted at position
408398
0."""
409-
monkeypatch.setattr(
410-
openai_harmony_mod,
411-
'StreamableParser',
412-
lambda *args, **kwargs: _FakeStreamableParser({}),
413-
)
414399
import json as _json
415400

416401
from lmdeploy.serve.openai.protocol import JsonSchema, ResponseFormat
@@ -436,13 +421,8 @@ def test_schema_inserted_as_new_system_message_when_none_exists(self, monkeypatc
436421
# The user message is still present after the inserted system message
437422
assert msgs[1]['role'] == 'user'
438423

439-
def test_text_response_format_is_not_converted(self, monkeypatch):
424+
def test_text_response_format_is_not_converted(self):
440425
"""A text-type response_format should be left untouched."""
441-
monkeypatch.setattr(
442-
openai_harmony_mod,
443-
'StreamableParser',
444-
lambda *args, **kwargs: _FakeStreamableParser({}),
445-
)
446426
from lmdeploy.serve.openai.protocol import ResponseFormat
447427

448428
request = ChatCompletionRequest(
@@ -454,17 +434,129 @@ def test_text_response_format_is_not_converted(self, monkeypatch):
454434
assert parser.request.response_format is not None
455435
assert parser.request.response_format.type == 'text'
456436

457-
def test_no_response_format_leaves_request_unchanged(self, monkeypatch):
437+
def test_no_response_format_leaves_request_unchanged(self):
458438
"""When response_format is None the request is not modified."""
459-
monkeypatch.setattr(
460-
openai_harmony_mod,
461-
'StreamableParser',
462-
lambda *args, **kwargs: _FakeStreamableParser({}),
463-
)
464439
request = ChatCompletionRequest(
465440
model='openai/gpt-oss-20b',
466441
messages=[{'role': 'user', 'content': 'hi'}],
467442
)
468443
parser = gpt_oss_mod.GptOssResponseParser(request=request, tokenizer=object())
469444
assert parser.request.response_format is None
470445
assert len(parser.request.messages) == 1
446+
447+
def test_str_messages_gets_schema_appended(self):
448+
"""When messages is a string, the schema section is appended to it."""
449+
import json as _json
450+
451+
from lmdeploy.serve.openai.protocol import JsonSchema, ResponseFormat
452+
453+
schema_dict = {'type': 'object', 'properties': {'x': {'type': 'integer'}}}
454+
request = ChatCompletionRequest(
455+
model='openai/gpt-oss-20b',
456+
messages='Tell me a joke',
457+
response_format=ResponseFormat(
458+
type='json_schema',
459+
json_schema=JsonSchema(name='test', schema=schema_dict),
460+
),
461+
)
462+
parser = gpt_oss_mod.GptOssResponseParser(request=request, tokenizer=object())
463+
464+
assert parser.request.response_format is None
465+
assert isinstance(parser.request.messages, str)
466+
assert parser.request.messages.startswith('Tell me a joke')
467+
assert '# Response Formats' in parser.request.messages
468+
assert _json.dumps(schema_dict) in parser.request.messages
469+
470+
def test_non_pydantic_request_messages_updated(self):
471+
"""Non-Pydantic sentinel requests also get messages updated."""
472+
import json as _json
473+
474+
from lmdeploy.serve.openai.protocol import JsonSchema, ResponseFormat
475+
476+
schema_dict = {'type': 'object', 'properties': {'y': {'type': 'number'}}}
477+
fmt = ResponseFormat(
478+
type='json_schema',
479+
json_schema=JsonSchema(name='test', schema=schema_dict),
480+
)
481+
482+
# Sentinel must NOT have tools/tool_choice attrs so that __init__
483+
# skips the Pydantic-dependent tool-rendering branch.
484+
class _Sentinel:
485+
messages = [{'role': 'user', 'content': 'hi'}]
486+
response_format = fmt
487+
488+
sentinel = _Sentinel()
489+
parser = gpt_oss_mod.GptOssResponseParser(request=sentinel, tokenizer=object())
490+
491+
assert parser.request.response_format is None
492+
msgs = parser.request.messages
493+
assert isinstance(msgs, list)
494+
assert msgs[0]['role'] == 'system'
495+
assert '# Response Formats' in msgs[0]['content']
496+
assert _json.dumps(schema_dict) in msgs[0]['content']
497+
498+
def test_list_content_system_message_gets_text_block_appended(self):
499+
"""When system message content is a list (multimodal), append a text
500+
block."""
501+
import json as _json
502+
503+
from lmdeploy.serve.openai.protocol import JsonSchema, ResponseFormat
504+
505+
schema_dict = {'type': 'object', 'properties': {'z': {'type': 'boolean'}}}
506+
request = ChatCompletionRequest(
507+
model='openai/gpt-oss-20b',
508+
messages=[
509+
{'role': 'system', 'content': [
510+
{'type': 'text', 'text': 'You are helpful.'},
511+
{'type': 'image_url', 'image_url': {'url': 'http://example.com/img.png'}},
512+
]},
513+
{'role': 'user', 'content': 'hi'},
514+
],
515+
response_format=ResponseFormat(
516+
type='json_schema',
517+
json_schema=JsonSchema(name='test', schema=schema_dict),
518+
),
519+
)
520+
parser = gpt_oss_mod.GptOssResponseParser(request=request, tokenizer=object())
521+
522+
assert parser.request.response_format is None
523+
sys_msg = parser.request.messages[0]
524+
assert sys_msg['role'] == 'system'
525+
content = sys_msg['content']
526+
assert isinstance(content, list)
527+
assert len(content) == 3
528+
# Original two blocks preserved
529+
assert content[0]['type'] == 'text'
530+
assert content[0]['text'] == 'You are helpful.'
531+
assert content[1]['type'] == 'image_url'
532+
# Schema appended as a text block
533+
assert content[2]['type'] == 'text'
534+
assert '# Response Formats' in content[2]['text']
535+
assert _json.dumps(schema_dict) in content[2]['text']
536+
537+
def test_none_content_system_message_inserts_separate_system(self):
538+
"""When system message content is None, insert a new system message."""
539+
import json as _json
540+
541+
from lmdeploy.serve.openai.protocol import JsonSchema, ResponseFormat
542+
543+
schema_dict = {'type': 'object', 'properties': {'w': {'type': 'string'}}}
544+
request = ChatCompletionRequest(
545+
model='openai/gpt-oss-20b',
546+
messages=[
547+
{'role': 'system', 'content': None},
548+
{'role': 'user', 'content': 'hi'},
549+
],
550+
response_format=ResponseFormat(
551+
type='json_schema',
552+
json_schema=JsonSchema(name='test', schema=schema_dict),
553+
),
554+
)
555+
parser = gpt_oss_mod.GptOssResponseParser(request=request, tokenizer=object())
556+
557+
assert parser.request.response_format is None
558+
msgs = parser.request.messages
559+
# A new system message with the schema is inserted at position 0
560+
assert msgs[0]['role'] == 'system'
561+
assert '# Response Formats' in msgs[0]['content']
562+
assert _json.dumps(schema_dict) in msgs[0]['content']

0 commit comments

Comments
 (0)