Skip to content

Commit d65b35f

Browse files
committed
fix: refine vLLM structured output implementation
- Make GuidedDecodingParams import conditional (try/except) for backwards compatibility with older vLLM versions - Remove GBNF grammar fallback — vLLM expects EBNF, not GBNF, so passing LocalAI's GBNF grammar would produce confusing errors - Pass JSONSchema as string directly instead of parsing to dict (safer across vLLM versions) - Add GBNF grammar generation for json_schema in completion endpoint so non-vLLM backends (llama.cpp) also get grammar enforcement Ref: #6857 Signed-off-by: eureka928 <meobius123@gmail.com>
1 parent ea89ee8 commit d65b35f

2 files changed

Lines changed: 23 additions & 15 deletions

File tree

backend/python/vllm/backend.py

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,11 @@
1616
import grpc
1717
from vllm.engine.arg_utils import AsyncEngineArgs
1818
from vllm.engine.async_llm_engine import AsyncLLMEngine
19-
from vllm.sampling_params import SamplingParams, GuidedDecodingParams
19+
from vllm.sampling_params import SamplingParams
20+
try:
21+
from vllm.sampling_params import GuidedDecodingParams
22+
except ImportError:
23+
GuidedDecodingParams = None
2024
from vllm.utils import random_uuid
2125
from vllm.transformers_utils.tokenizer import get_tokenizer
2226
from vllm.multimodal.utils import fetch_image
@@ -231,20 +235,15 @@ async def _predict(self, request, context, streaming=False):
231235
setattr(sampling_params, param_field, value)
232236

233237
# Handle structured output via guided decoding
234-
guided_decoding = None
235-
if hasattr(request, 'JSONSchema') and request.JSONSchema:
236-
try:
237-
schema = json.loads(request.JSONSchema)
238-
guided_decoding = GuidedDecodingParams(json_schema=schema)
239-
except json.JSONDecodeError as e:
240-
print(f"Failed to parse JSONSchema: {e}", file=sys.stderr)
241-
elif hasattr(request, 'ResponseFormat') and request.ResponseFormat == "json_object":
242-
guided_decoding = GuidedDecodingParams(json_object=True)
243-
elif hasattr(request, 'Grammar') and request.Grammar:
244-
guided_decoding = GuidedDecodingParams(grammar=request.Grammar)
245-
246-
if guided_decoding is not None:
247-
sampling_params.guided_decoding = guided_decoding
238+
if GuidedDecodingParams is not None:
239+
guided_decoding = None
240+
if hasattr(request, 'JSONSchema') and request.JSONSchema:
241+
guided_decoding = GuidedDecodingParams(json_schema=request.JSONSchema)
242+
elif hasattr(request, 'ResponseFormat') and request.ResponseFormat == "json_object":
243+
guided_decoding = GuidedDecodingParams(json_object=True)
244+
245+
if guided_decoding is not None:
246+
sampling_params.guided_decoding = guided_decoding
248247

249248
# Extract image paths and process images
250249
prompt = request.Prompt

core/http/endpoints/openai/completion.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,15 @@ func CompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, eva
101101
if err == nil {
102102
config.JSONSchema = string(schemaBytes)
103103
}
104+
fs := &functions.JSONFunctionStructure{
105+
AnyOf: []functions.Item{jsr.JsonSchema.Schema},
106+
}
107+
g, err := fs.Grammar(config.FunctionsConfig.GrammarOptions()...)
108+
if err == nil {
109+
input.Grammar = g
110+
} else {
111+
xlog.Error("Failed generating grammar", "error", err)
112+
}
104113
}
105114
}
106115
}

0 commit comments

Comments
 (0)