fix: address unresolved PR review comments

windreamer · windreamer · commit 91e9ca5ca56a · 2026-05-07T17:01:59.000+08:00
1. Guard model_copy() with hasattr check: extract _clear_response_format()
   helper that falls back to in-place mutation for non-Pydantic request
   objects (e.g. test sentinels). Prevents double-raise in the except path.

2. Use logger.exception() instead of logger.error(f'...{e}') so that
   stack traces are preserved in the log output.

3. Mark _patch_streamable_parser fixture as autouse=True and remove
   redundant monkeypatch.setattr calls from individual test methods.
diff --git a/lmdeploy/serve/parsers/_openai_harmony.py b/lmdeploy/serve/parsers/_openai_harmony.py
@@ -87,10 +87,15 @@ def _convert_response_format_to_harmony(self):
             format_body = f'# Response Formats\n{format_json}'
             messages = self.request.messages
 
+            if isinstance(messages, str):
+                messages = messages + '\n\n' + format_body
+                self._clear_response_format(messages=messages)
+                return
+
             if not isinstance(messages, list):
                 logger.warning('Cannot inject response_format schema into '
                                'non-list messages for GPT-OSS; clearing response_format only.')
-                self.request = self.request.model_copy(update={'response_format': None})
+                self._clear_response_format()
                 return
 
             new_messages = list(messages)
@@ -108,14 +113,24 @@ def _convert_response_format_to_harmony(self):
             else:
                 new_messages.insert(0, {'role': 'system', 'content': format_body})
 
-            self.request = self.request.model_copy(update={
-                'response_format': None,
-                'messages': new_messages,
-            })
-        except Exception as e:
-            logger.error(f'Failed to convert response_format to Harmony-native mode for GPT-OSS: {e}')
+            self._clear_response_format(messages=new_messages)
+        except Exception:
+            logger.exception('Failed to convert response_format to Harmony-native mode for GPT-OSS')
             # Still clear response_format to avoid the Harmony/JSON mode conflict
-            self.request = self.request.model_copy(update={'response_format': None})
+            self._clear_response_format()
+
+    def _clear_response_format(self, messages=None):
+        """Clear response_format on the request, handling both Pydantic and
+        plain objects."""
+        if hasattr(self.request, 'model_copy'):
+            update = {'response_format': None}
+            if messages is not None:
+                update['messages'] = messages
+            self.request = self.request.model_copy(update=update)
+        else:
+            self.request.response_format = None
+            if messages is not None:
+                self.request.messages = messages
 
     def stream_chunk(self, delta_text: str, delta_token_ids: list[int], **kwargs) -> tuple[DeltaMessage | None, bool]:
         if (
diff --git a/tests/test_lmdeploy/serve/parsers/test_gpt_oss_parser.py b/tests/test_lmdeploy/serve/parsers/test_gpt_oss_parser.py
@@ -335,21 +335,16 @@ class TestGptOssResponseFormatHarmonyConversion:
     """Tests for
     :meth:`GptOssResponseParser._convert_response_format_to_harmony`."""
 
-    @pytest.fixture()
+    @pytest.fixture(autouse=True)
     def _patch_streamable_parser(self, monkeypatch):
         monkeypatch.setattr(
             openai_harmony_mod,
             'StreamableParser',
             lambda *args, **kwargs: _FakeStreamableParser({}),
         )
 
-    def test_response_format_cleared_after_conversion(self, monkeypatch):
+    def test_response_format_cleared_after_conversion(self):
         """response_format must be None after the parser processes it."""
-        monkeypatch.setattr(
-            openai_harmony_mod,
-            'StreamableParser',
-            lambda *args, **kwargs: _FakeStreamableParser({}),
-        )
         from lmdeploy.serve.openai.protocol import JsonSchema, ResponseFormat
 
         request = ChatCompletionRequest(
@@ -366,14 +361,9 @@ def test_response_format_cleared_after_conversion(self, monkeypatch):
         parser = gpt_oss_mod.GptOssResponseParser(request=request, tokenizer=object())
         assert parser.request.response_format is None
 
-    def test_schema_appended_to_existing_system_message(self, monkeypatch):
+    def test_schema_appended_to_existing_system_message(self):
         """When a system message already exists the schema is appended to
         it."""
-        monkeypatch.setattr(
-            openai_harmony_mod,
-            'StreamableParser',
-            lambda *args, **kwargs: _FakeStreamableParser({}),
-        )
         import json as _json
 
         from lmdeploy.serve.openai.protocol import JsonSchema, ResponseFormat
@@ -403,14 +393,9 @@ def test_schema_appended_to_existing_system_message(self, monkeypatch):
         # No leading blank lines in the appended section
         assert '\n\n# Response Formats' in msgs[0]['content']
 
-    def test_schema_inserted_as_new_system_message_when_none_exists(self, monkeypatch):
+    def test_schema_inserted_as_new_system_message_when_none_exists(self):
         """When no system message exists a new one is inserted at position
         0."""
-        monkeypatch.setattr(
-            openai_harmony_mod,
-            'StreamableParser',
-            lambda *args, **kwargs: _FakeStreamableParser({}),
-        )
         import json as _json
 
         from lmdeploy.serve.openai.protocol import JsonSchema, ResponseFormat
@@ -436,13 +421,8 @@ def test_schema_inserted_as_new_system_message_when_none_exists(self, monkeypatc
         # The user message is still present after the inserted system message
         assert msgs[1]['role'] == 'user'
 
-    def test_text_response_format_is_not_converted(self, monkeypatch):
+    def test_text_response_format_is_not_converted(self):
         """A text-type response_format should be left untouched."""
-        monkeypatch.setattr(
-            openai_harmony_mod,
-            'StreamableParser',
-            lambda *args, **kwargs: _FakeStreamableParser({}),
-        )
         from lmdeploy.serve.openai.protocol import ResponseFormat
 
         request = ChatCompletionRequest(
@@ -454,17 +434,63 @@ def test_text_response_format_is_not_converted(self, monkeypatch):
         assert parser.request.response_format is not None
         assert parser.request.response_format.type == 'text'
 
-    def test_no_response_format_leaves_request_unchanged(self, monkeypatch):
+    def test_no_response_format_leaves_request_unchanged(self):
         """When response_format is None the request is not modified."""
-        monkeypatch.setattr(
-            openai_harmony_mod,
-            'StreamableParser',
-            lambda *args, **kwargs: _FakeStreamableParser({}),
-        )
         request = ChatCompletionRequest(
             model='openai/gpt-oss-20b',
             messages=[{'role': 'user', 'content': 'hi'}],
         )
         parser = gpt_oss_mod.GptOssResponseParser(request=request, tokenizer=object())
         assert parser.request.response_format is None
         assert len(parser.request.messages) == 1
+
+    def test_str_messages_gets_schema_appended(self):
+        """When messages is a string, the schema section is appended to it."""
+        import json as _json
+
+        from lmdeploy.serve.openai.protocol import JsonSchema, ResponseFormat
+
+        schema_dict = {'type': 'object', 'properties': {'x': {'type': 'integer'}}}
+        request = ChatCompletionRequest(
+            model='openai/gpt-oss-20b',
+            messages='Tell me a joke',
+            response_format=ResponseFormat(
+                type='json_schema',
+                json_schema=JsonSchema(name='test', schema=schema_dict),
+            ),
+        )
+        parser = gpt_oss_mod.GptOssResponseParser(request=request, tokenizer=object())
+
+        assert parser.request.response_format is None
+        assert isinstance(parser.request.messages, str)
+        assert parser.request.messages.startswith('Tell me a joke')
+        assert '# Response Formats' in parser.request.messages
+        assert _json.dumps(schema_dict) in parser.request.messages
+
+    def test_non_pydantic_request_messages_updated(self):
+        """Non-Pydantic sentinel requests also get messages updated."""
+        import json as _json
+
+        from lmdeploy.serve.openai.protocol import JsonSchema, ResponseFormat
+
+        schema_dict = {'type': 'object', 'properties': {'y': {'type': 'number'}}}
+        fmt = ResponseFormat(
+            type='json_schema',
+            json_schema=JsonSchema(name='test', schema=schema_dict),
+        )
+
+        # Sentinel must NOT have tools/tool_choice attrs so that __init__
+        # skips the Pydantic-dependent tool-rendering branch.
+        class _Sentinel:
+            messages = [{'role': 'user', 'content': 'hi'}]
+            response_format = fmt
+
+        sentinel = _Sentinel()
+        parser = gpt_oss_mod.GptOssResponseParser(request=sentinel, tokenizer=object())
+
+        assert parser.request.response_format is None
+        msgs = parser.request.messages
+        assert isinstance(msgs, list)
+        assert msgs[0]['role'] == 'system'
+        assert '# Response Formats' in msgs[0]['content']
+        assert _json.dumps(schema_dict) in msgs[0]['content']