fix: refine vLLM structured output implementation

eureka0928 · eureka0928 · commit d65b35f92a27 · 2026-03-06T04:03:25.000+01:00
- Make GuidedDecodingParams import conditional (try/except) for backwards compatibility with older vLLM versions - Remove GBNF grammar fallback — vLLM expects EBNF, not GBNF, so passing LocalAI's GBNF grammar would produce confusing errors - Pass JSONSchema as string directly instead of parsing to dict (safer across vLLM versions) - Add GBNF grammar generation for json_schema in completion endpoint so non-vLLM backends (llama.cpp) also get grammar enforcement Ref: #6857 Signed-off-by: eureka928 <meobius123@gmail.com>
diff --git a/backend/python/vllm/backend.py b/backend/python/vllm/backend.py
@@ -16,7 +16,11 @@
 import grpc
 from vllm.engine.arg_utils import AsyncEngineArgs
 from vllm.engine.async_llm_engine import AsyncLLMEngine
-from vllm.sampling_params import SamplingParams, GuidedDecodingParams
+from vllm.sampling_params import SamplingParams
+try:
+    from vllm.sampling_params import GuidedDecodingParams
+except ImportError:
+    GuidedDecodingParams = None
 from vllm.utils import random_uuid
 from vllm.transformers_utils.tokenizer import get_tokenizer
 from vllm.multimodal.utils import fetch_image
@@ -231,20 +235,15 @@ async def _predict(self, request, context, streaming=False):
                     setattr(sampling_params, param_field, value)
 
         # Handle structured output via guided decoding
-        guided_decoding = None
-        if hasattr(request, 'JSONSchema') and request.JSONSchema:
-            try:
-                schema = json.loads(request.JSONSchema)
-                guided_decoding = GuidedDecodingParams(json_schema=schema)
-            except json.JSONDecodeError as e:
-                print(f"Failed to parse JSONSchema: {e}", file=sys.stderr)
-        elif hasattr(request, 'ResponseFormat') and request.ResponseFormat == "json_object":
-            guided_decoding = GuidedDecodingParams(json_object=True)
-        elif hasattr(request, 'Grammar') and request.Grammar:
-            guided_decoding = GuidedDecodingParams(grammar=request.Grammar)
-
-        if guided_decoding is not None:
-            sampling_params.guided_decoding = guided_decoding
+        if GuidedDecodingParams is not None:
+            guided_decoding = None
+            if hasattr(request, 'JSONSchema') and request.JSONSchema:
+                guided_decoding = GuidedDecodingParams(json_schema=request.JSONSchema)
+            elif hasattr(request, 'ResponseFormat') and request.ResponseFormat == "json_object":
+                guided_decoding = GuidedDecodingParams(json_object=True)
+
+            if guided_decoding is not None:
+                sampling_params.guided_decoding = guided_decoding
 
         # Extract image paths and process images
         prompt = request.Prompt
diff --git a/core/http/endpoints/openai/completion.go b/core/http/endpoints/openai/completion.go
@@ -101,6 +101,15 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva
 						if err == nil {
 							config.JSONSchema = string(schemaBytes)
 						}
+						fs := &functions.JSONFunctionStructure{
+							AnyOf: []functions.Item{jsr.JsonSchema.Schema},
+						}
+						g, err := fs.Grammar(config.FunctionsConfig.GrammarOptions()...)
+						if err == nil {
+							input.Grammar = g
+						} else {
+							xlog.Error("Failed generating grammar", "error", err)
+						}
 					}
 				}
 			}

Original file line number	Diff line number	Diff line change
`@@ -101,6 +101,15 @@ func CompletionEndpoint(cl config.ModelConfigLoader, ml model.ModelLoader, eva`
`101`	`101`	`if err == nil {`
`102`	`102`	`config.JSONSchema = string(schemaBytes)`
`103`	`103`	`}`
	`104`	`+ fs := &functions.JSONFunctionStructure{`
	`105`	`+ AnyOf: []functions.Item{jsr.JsonSchema.Schema},`
	`106`	`+ }`
	`107`	`+ g, err := fs.Grammar(config.FunctionsConfig.GrammarOptions()...)`
	`108`	`+ if err == nil {`
	`109`	`+ input.Grammar = g`
	`110`	`+ } else {`
	`111`	`+ xlog.Error("Failed generating grammar", "error", err)`
	`112`	`+ }`
`104`	`113`	`}`
`105`	`114`	`}`
`106`	`115`	`}`