From 5dfdafc7b6b55b78946dc84b7abaa5d92a6fe104 Mon Sep 17 00:00:00 2001 From: Thang Sian Khawm Date: Tue, 24 Mar 2026 15:46:09 +0700 Subject: [PATCH 1/2] fix(wren-ai-service): strip json_schema response_format for non-OpenAI models and guard empty SQL diagnosis response --- .../src/pipelines/generation/sql_diagnosis.py | 5 ++++- wren-ai-service/src/providers/llm/litellm.py | 9 +++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/wren-ai-service/src/pipelines/generation/sql_diagnosis.py b/wren-ai-service/src/pipelines/generation/sql_diagnosis.py index 3f22b9d512..642a06c103 100644 --- a/wren-ai-service/src/pipelines/generation/sql_diagnosis.py +++ b/wren-ai-service/src/pipelines/generation/sql_diagnosis.py @@ -92,7 +92,10 @@ async def generate_sql_diagnosis( async def post_process( generate_sql_diagnosis: dict, ) -> str: - return orjson.loads(generate_sql_diagnosis.get("replies")[0]) + reply = generate_sql_diagnosis.get("replies", [""])[0] + if not reply or not reply.strip(): + return {"reasoning": "LLM did not return any response."} + return orjson.loads(reply) ## End of Pipeline diff --git a/wren-ai-service/src/providers/llm/litellm.py b/wren-ai-service/src/providers/llm/litellm.py index e94918b8c5..44235b7772 100644 --- a/wren-ai-service/src/providers/llm/litellm.py +++ b/wren-ai-service/src/providers/llm/litellm.py @@ -102,7 +102,16 @@ async def _run( generation_kwargs = { **combined_generation_kwargs, **(generation_kwargs or {}), + } + # Strip response_format with type=json_schema — only supported by OpenAI + # native models. Custom api_base models (e.g. Ollama, LiteLLM proxy with + # non-OpenAI backends) return empty responses when this is forwarded. + # System prompts already include explicit JSON format instructions. + if self._api_base and isinstance( + generation_kwargs.get("response_format"), dict + ) and generation_kwargs["response_format"].get("type") == "json_schema": + generation_kwargs.pop("response_format") allowed_openai_params = generation_kwargs.get( "allowed_openai_params", [] From 8c5e239bda309a3535709be092a2df8bd4ce7fdf Mon Sep 17 00:00:00 2001 From: Thang Sian Khawm Date: Tue, 24 Mar 2026 16:54:44 +0700 Subject: [PATCH 2/2] fix(wren-ai-service): address review comments and improve json_schema stripping logic --- wren-ai-service/src/pipelines/generation/sql_diagnosis.py | 2 +- wren-ai-service/src/providers/llm/litellm.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/wren-ai-service/src/pipelines/generation/sql_diagnosis.py b/wren-ai-service/src/pipelines/generation/sql_diagnosis.py index 642a06c103..60f3285d29 100644 --- a/wren-ai-service/src/pipelines/generation/sql_diagnosis.py +++ b/wren-ai-service/src/pipelines/generation/sql_diagnosis.py @@ -91,7 +91,7 @@ async def generate_sql_diagnosis( @observe(capture_input=False) async def post_process( generate_sql_diagnosis: dict, -) -> str: +) -> dict: reply = generate_sql_diagnosis.get("replies", [""])[0] if not reply or not reply.strip(): return {"reasoning": "LLM did not return any response."} diff --git a/wren-ai-service/src/providers/llm/litellm.py b/wren-ai-service/src/providers/llm/litellm.py index 44235b7772..539d34440a 100644 --- a/wren-ai-service/src/providers/llm/litellm.py +++ b/wren-ai-service/src/providers/llm/litellm.py @@ -101,8 +101,7 @@ async def _run( generation_kwargs = { **combined_generation_kwargs, - **(generation_kwargs or {}), - + **(generation_kwargs or {}), } # Strip response_format with type=json_schema — only supported by OpenAI # native models. Custom api_base models (e.g. Ollama, LiteLLM proxy with