Skip to content

Commit 91e9ca5

Browse files
committed
fix: address unresolved PR review comments
1. Guard model_copy() with hasattr check: extract _clear_response_format() helper that falls back to in-place mutation for non-Pydantic request objects (e.g. test sentinels). Prevents double-raise in the except path. 2. Use logger.exception() instead of logger.error(f'...{e}') so that stack traces are preserved in the log output. 3. Mark _patch_streamable_parser fixture as autouse=True and remove redundant monkeypatch.setattr calls from individual test methods.
1 parent 0ffa417 commit 91e9ca5

2 files changed

Lines changed: 80 additions & 39 deletions

File tree

lmdeploy/serve/parsers/_openai_harmony.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,15 @@ def _convert_response_format_to_harmony(self):
8787
format_body = f'# Response Formats\n{format_json}'
8888
messages = self.request.messages
8989

90+
if isinstance(messages, str):
91+
messages = messages + '\n\n' + format_body
92+
self._clear_response_format(messages=messages)
93+
return
94+
9095
if not isinstance(messages, list):
9196
logger.warning('Cannot inject response_format schema into '
9297
'non-list messages for GPT-OSS; clearing response_format only.')
93-
self.request = self.request.model_copy(update={'response_format': None})
98+
self._clear_response_format()
9499
return
95100

96101
new_messages = list(messages)
@@ -108,14 +113,24 @@ def _convert_response_format_to_harmony(self):
108113
else:
109114
new_messages.insert(0, {'role': 'system', 'content': format_body})
110115

111-
self.request = self.request.model_copy(update={
112-
'response_format': None,
113-
'messages': new_messages,
114-
})
115-
except Exception as e:
116-
logger.error(f'Failed to convert response_format to Harmony-native mode for GPT-OSS: {e}')
116+
self._clear_response_format(messages=new_messages)
117+
except Exception:
118+
logger.exception('Failed to convert response_format to Harmony-native mode for GPT-OSS')
117119
# Still clear response_format to avoid the Harmony/JSON mode conflict
118-
self.request = self.request.model_copy(update={'response_format': None})
120+
self._clear_response_format()
121+
122+
def _clear_response_format(self, messages=None):
123+
"""Clear response_format on the request, handling both Pydantic and
124+
plain objects."""
125+
if hasattr(self.request, 'model_copy'):
126+
update = {'response_format': None}
127+
if messages is not None:
128+
update['messages'] = messages
129+
self.request = self.request.model_copy(update=update)
130+
else:
131+
self.request.response_format = None
132+
if messages is not None:
133+
self.request.messages = messages
119134

120135
def stream_chunk(self, delta_text: str, delta_token_ids: list[int], **kwargs) -> tuple[DeltaMessage | None, bool]:
121136
if (

tests/test_lmdeploy/serve/parsers/test_gpt_oss_parser.py

Lines changed: 57 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -335,21 +335,16 @@ class TestGptOssResponseFormatHarmonyConversion:
335335
"""Tests for
336336
:meth:`GptOssResponseParser._convert_response_format_to_harmony`."""
337337

338-
@pytest.fixture()
338+
@pytest.fixture(autouse=True)
339339
def _patch_streamable_parser(self, monkeypatch):
340340
monkeypatch.setattr(
341341
openai_harmony_mod,
342342
'StreamableParser',
343343
lambda *args, **kwargs: _FakeStreamableParser({}),
344344
)
345345

346-
def test_response_format_cleared_after_conversion(self, monkeypatch):
346+
def test_response_format_cleared_after_conversion(self):
347347
"""response_format must be None after the parser processes it."""
348-
monkeypatch.setattr(
349-
openai_harmony_mod,
350-
'StreamableParser',
351-
lambda *args, **kwargs: _FakeStreamableParser({}),
352-
)
353348
from lmdeploy.serve.openai.protocol import JsonSchema, ResponseFormat
354349

355350
request = ChatCompletionRequest(
@@ -366,14 +361,9 @@ def test_response_format_cleared_after_conversion(self, monkeypatch):
366361
parser = gpt_oss_mod.GptOssResponseParser(request=request, tokenizer=object())
367362
assert parser.request.response_format is None
368363

369-
def test_schema_appended_to_existing_system_message(self, monkeypatch):
364+
def test_schema_appended_to_existing_system_message(self):
370365
"""When a system message already exists the schema is appended to
371366
it."""
372-
monkeypatch.setattr(
373-
openai_harmony_mod,
374-
'StreamableParser',
375-
lambda *args, **kwargs: _FakeStreamableParser({}),
376-
)
377367
import json as _json
378368

379369
from lmdeploy.serve.openai.protocol import JsonSchema, ResponseFormat
@@ -403,14 +393,9 @@ def test_schema_appended_to_existing_system_message(self, monkeypatch):
403393
# No leading blank lines in the appended section
404394
assert '\n\n# Response Formats' in msgs[0]['content']
405395

406-
def test_schema_inserted_as_new_system_message_when_none_exists(self, monkeypatch):
396+
def test_schema_inserted_as_new_system_message_when_none_exists(self):
407397
"""When no system message exists a new one is inserted at position
408398
0."""
409-
monkeypatch.setattr(
410-
openai_harmony_mod,
411-
'StreamableParser',
412-
lambda *args, **kwargs: _FakeStreamableParser({}),
413-
)
414399
import json as _json
415400

416401
from lmdeploy.serve.openai.protocol import JsonSchema, ResponseFormat
@@ -436,13 +421,8 @@ def test_schema_inserted_as_new_system_message_when_none_exists(self, monkeypatc
436421
# The user message is still present after the inserted system message
437422
assert msgs[1]['role'] == 'user'
438423

439-
def test_text_response_format_is_not_converted(self, monkeypatch):
424+
def test_text_response_format_is_not_converted(self):
440425
"""A text-type response_format should be left untouched."""
441-
monkeypatch.setattr(
442-
openai_harmony_mod,
443-
'StreamableParser',
444-
lambda *args, **kwargs: _FakeStreamableParser({}),
445-
)
446426
from lmdeploy.serve.openai.protocol import ResponseFormat
447427

448428
request = ChatCompletionRequest(
@@ -454,17 +434,63 @@ def test_text_response_format_is_not_converted(self, monkeypatch):
454434
assert parser.request.response_format is not None
455435
assert parser.request.response_format.type == 'text'
456436

457-
def test_no_response_format_leaves_request_unchanged(self, monkeypatch):
437+
def test_no_response_format_leaves_request_unchanged(self):
458438
"""When response_format is None the request is not modified."""
459-
monkeypatch.setattr(
460-
openai_harmony_mod,
461-
'StreamableParser',
462-
lambda *args, **kwargs: _FakeStreamableParser({}),
463-
)
464439
request = ChatCompletionRequest(
465440
model='openai/gpt-oss-20b',
466441
messages=[{'role': 'user', 'content': 'hi'}],
467442
)
468443
parser = gpt_oss_mod.GptOssResponseParser(request=request, tokenizer=object())
469444
assert parser.request.response_format is None
470445
assert len(parser.request.messages) == 1
446+
447+
def test_str_messages_gets_schema_appended(self):
448+
"""When messages is a string, the schema section is appended to it."""
449+
import json as _json
450+
451+
from lmdeploy.serve.openai.protocol import JsonSchema, ResponseFormat
452+
453+
schema_dict = {'type': 'object', 'properties': {'x': {'type': 'integer'}}}
454+
request = ChatCompletionRequest(
455+
model='openai/gpt-oss-20b',
456+
messages='Tell me a joke',
457+
response_format=ResponseFormat(
458+
type='json_schema',
459+
json_schema=JsonSchema(name='test', schema=schema_dict),
460+
),
461+
)
462+
parser = gpt_oss_mod.GptOssResponseParser(request=request, tokenizer=object())
463+
464+
assert parser.request.response_format is None
465+
assert isinstance(parser.request.messages, str)
466+
assert parser.request.messages.startswith('Tell me a joke')
467+
assert '# Response Formats' in parser.request.messages
468+
assert _json.dumps(schema_dict) in parser.request.messages
469+
470+
def test_non_pydantic_request_messages_updated(self):
471+
"""Non-Pydantic sentinel requests also get messages updated."""
472+
import json as _json
473+
474+
from lmdeploy.serve.openai.protocol import JsonSchema, ResponseFormat
475+
476+
schema_dict = {'type': 'object', 'properties': {'y': {'type': 'number'}}}
477+
fmt = ResponseFormat(
478+
type='json_schema',
479+
json_schema=JsonSchema(name='test', schema=schema_dict),
480+
)
481+
482+
# Sentinel must NOT have tools/tool_choice attrs so that __init__
483+
# skips the Pydantic-dependent tool-rendering branch.
484+
class _Sentinel:
485+
messages = [{'role': 'user', 'content': 'hi'}]
486+
response_format = fmt
487+
488+
sentinel = _Sentinel()
489+
parser = gpt_oss_mod.GptOssResponseParser(request=sentinel, tokenizer=object())
490+
491+
assert parser.request.response_format is None
492+
msgs = parser.request.messages
493+
assert isinstance(msgs, list)
494+
assert msgs[0]['role'] == 'system'
495+
assert '# Response Formats' in msgs[0]['content']
496+
assert _json.dumps(schema_dict) in msgs[0]['content']

0 commit comments

Comments
 (0)