Skip to content

Commit 5b7bc7c

Browse files
authored
fix(llm): avoid Groq schema-tool failures during guideline generation (#264)
Groq gpt-oss can report response-schema support through LiteLLM while still failing when the model returns normal text instead of a required tool call. Guideline generation and guideline consolidation now keep Groq on the existing JSON prompt-and-parse path while leaving structured outputs enabled for providers that handle them reliably. Constraint: Groq openai/gpt-oss-120b may reject schema/tool-backed responses when no tool call is produced Constraint: Upstream renamed tip generation paths to guideline generation during rebase Rejected: Add a Groq SDK client | LiteLLM already fronts all generation paths and the failure is limited to schema routing Rejected: Disable structured outputs globally | OpenAI-compatible providers still benefit from strict schema mode Confidence: high Scope-risk: narrow Directive: Do not re-enable response_format for Groq guideline generation without live testing groq/openai/gpt-oss-120b tool/schema behavior Tested: uv run pytest tests/unit/test_guidelines.py tests/unit/test_combine_guidelines.py -v Tested: uv run ruff check altk_evolve/llm/guidelines/guidelines.py altk_evolve/llm/guidelines/clustering.py tests/unit/test_guidelines.py tests/unit/test_combine_guidelines.py Tested: uv run ruff format --check altk_evolve/llm/guidelines/guidelines.py altk_evolve/llm/guidelines/clustering.py tests/unit/test_guidelines.py tests/unit/test_combine_guidelines.py
1 parent 606e906 commit 5b7bc7c

4 files changed

Lines changed: 79 additions & 5 deletions

File tree

altk_evolve/llm/guidelines/clustering.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ def combine_cluster(entities: list[RecordedEntity]) -> list[Guideline]:
141141
Raises:
142142
EvolveException: If the LLM call fails after 3 attempts.
143143
"""
144+
is_groq = llm_settings.custom_llm_provider == "groq" or llm_settings.guidelines_model.startswith("groq/")
144145
supported_params = get_supported_openai_params(
145146
model=llm_settings.guidelines_model,
146147
custom_llm_provider=llm_settings.custom_llm_provider,
@@ -150,7 +151,7 @@ def combine_cluster(entities: list[RecordedEntity]) -> list[Guideline]:
150151
model=llm_settings.guidelines_model,
151152
custom_llm_provider=llm_settings.custom_llm_provider,
152153
)
153-
constrained_decoding_supported = supports_response_format and response_schema_enabled
154+
constrained_decoding_supported = not is_groq and supports_response_format and response_schema_enabled
154155

155156
# Deduplicate task descriptions
156157
task_descriptions = list(

altk_evolve/llm/guidelines/guidelines.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ def generate_guidelines(messages: list[dict]) -> list[GuidelineGenerationResult]
171171
Returns a list with one GuidelineGenerationResult per subtask (or one for the full
172172
trajectory when segmentation is disabled or produces fewer than 2 subtasks).
173173
"""
174+
is_groq = llm_settings.custom_llm_provider == "groq" or llm_settings.guidelines_model.startswith("groq/")
174175
supported_params = get_supported_openai_params(
175176
model=llm_settings.guidelines_model,
176177
custom_llm_provider=llm_settings.custom_llm_provider,
@@ -180,7 +181,7 @@ def generate_guidelines(messages: list[dict]) -> list[GuidelineGenerationResult]
180181
model=llm_settings.guidelines_model,
181182
custom_llm_provider=llm_settings.custom_llm_provider,
182183
)
183-
constrained_decoding_supported = bool(supports_response_format and response_schema_enabled)
184+
constrained_decoding_supported = bool(not is_groq and supports_response_format and response_schema_enabled)
184185

185186
trajectory_data = parse_openai_agents_trajectory(messages)
186187
task_instruction = trajectory_data["task_instruction"]

tests/unit/test_combine_guidelines.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@
66

77
import pytest
88

9+
from altk_evolve.llm.guidelines import clustering as clustering_module
910
from altk_evolve.llm.guidelines.clustering import combine_cluster
1011
from altk_evolve.schema.core import RecordedEntity
1112
from altk_evolve.schema.exceptions import EvolveException
12-
from altk_evolve.schema.guidelines import Guideline, ConsolidationResult
13+
from altk_evolve.schema.guidelines import ConsolidationResult, Guideline
1314

1415

1516
def _make_entity(entity_id: str, content: str, task_description: str = "do a task") -> RecordedEntity:
@@ -110,7 +111,9 @@ def test_combine_cluster_raises_after_max_retries(self, _mock_params, _mock_sche
110111
@patch("altk_evolve.llm.guidelines.clustering.completion")
111112
@patch("altk_evolve.llm.guidelines.clustering.supports_response_schema", return_value=True)
112113
@patch("altk_evolve.llm.guidelines.clustering.get_supported_openai_params", return_value=["response_format"])
113-
def test_combine_cluster_uses_structured_output(self, _mock_params, _mock_schema, mock_completion):
114+
def test_combine_cluster_uses_structured_output(self, _mock_params, _mock_schema, mock_completion, monkeypatch):
115+
monkeypatch.setattr(clustering_module.llm_settings, "guidelines_model", "gpt-4o")
116+
monkeypatch.setattr(clustering_module.llm_settings, "custom_llm_provider", "openai")
114117
mock_completion.return_value = _mock_completion_response(SAMPLE_GUIDELINES[:1])
115118

116119
entities = [_make_entity("1", "Guideline A"), _make_entity("2", "Guideline B")]
@@ -121,6 +124,29 @@ def test_combine_cluster_uses_structured_output(self, _mock_params, _mock_schema
121124
_, kwargs = mock_completion.call_args
122125
assert "response_format" in kwargs
123126

127+
@patch("altk_evolve.llm.guidelines.clustering.completion")
128+
@patch("altk_evolve.llm.guidelines.clustering.supports_response_schema", return_value=True)
129+
@patch("altk_evolve.llm.guidelines.clustering.get_supported_openai_params", return_value=["response_format"])
130+
def test_combine_cluster_uses_json_prompt_for_groq_even_when_schema_is_reported(
131+
self,
132+
_mock_params,
133+
_mock_schema,
134+
mock_completion,
135+
monkeypatch,
136+
):
137+
monkeypatch.setattr(clustering_module.llm_settings, "guidelines_model", "groq/openai/gpt-oss-120b")
138+
monkeypatch.setattr(clustering_module.llm_settings, "custom_llm_provider", "groq")
139+
mock_completion.return_value = _mock_completion_response(SAMPLE_GUIDELINES[:1])
140+
141+
entities = [_make_entity("1", "Guideline A"), _make_entity("2", "Guideline B")]
142+
result = combine_cluster(entities)
143+
144+
assert len(result) == 1
145+
_, kwargs = mock_completion.call_args
146+
assert "response_format" not in kwargs
147+
assert kwargs["custom_llm_provider"] == "groq"
148+
assert "Output Format (JSON)" in kwargs["messages"][0]["content"]
149+
124150

125151
# ---------------------------------------------------------------------------
126152
# consolidate_guidelines tests

tests/unit/test_guidelines.py

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,19 @@
11
"""Tests for guideline generation utilities."""
22

3+
import json
4+
from unittest.mock import MagicMock, patch
5+
36
import pytest
47

5-
from altk_evolve.llm.guidelines.guidelines import parse_openai_agents_trajectory
8+
from altk_evolve.llm.guidelines import guidelines as guidelines_module
9+
from altk_evolve.llm.guidelines.guidelines import generate_guidelines, parse_openai_agents_trajectory
10+
11+
12+
def _mock_completion_response(payload: dict) -> MagicMock:
13+
response = MagicMock()
14+
response.choices = [MagicMock()]
15+
response.choices[0].message.content = json.dumps(payload)
16+
return response
617

718

819
@pytest.mark.unit
@@ -23,3 +34,38 @@ def test_fallback_when_no_user_message(self):
2334
def test_fallback_when_empty_messages(self):
2435
result = parse_openai_agents_trajectory([])
2536
assert result["task_instruction"] == "Task description unknown"
37+
38+
@patch("altk_evolve.llm.guidelines.guidelines.completion")
39+
@patch("altk_evolve.llm.guidelines.guidelines.supports_response_schema", return_value=True)
40+
@patch("altk_evolve.llm.guidelines.guidelines.get_supported_openai_params", return_value=["response_format"])
41+
def test_generate_guidelines_uses_json_prompt_for_groq_even_when_schema_is_reported(
42+
self,
43+
_mock_params,
44+
_mock_schema,
45+
mock_completion,
46+
monkeypatch,
47+
):
48+
monkeypatch.setattr(guidelines_module.llm_settings, "guidelines_model", "groq/openai/gpt-oss-120b")
49+
monkeypatch.setattr(guidelines_module.llm_settings, "custom_llm_provider", "groq")
50+
monkeypatch.setattr(guidelines_module.evolve_config, "segmentation_enabled", False)
51+
mock_completion.return_value = _mock_completion_response(
52+
{
53+
"guidelines": [
54+
{
55+
"content": "Validate files before parsing",
56+
"rationale": "Avoids parser crashes on empty inputs",
57+
"category": "strategy",
58+
"trigger": "Before reading user-provided CSV files",
59+
"implementation_steps": ["Check file size", "Return an empty DataFrame for empty files"],
60+
}
61+
]
62+
}
63+
)
64+
65+
results = generate_guidelines([{"role": "user", "content": "Fix CSV parsing"}])
66+
67+
assert results[0].guidelines[0].content == "Validate files before parsing"
68+
_, kwargs = mock_completion.call_args
69+
assert "response_format" not in kwargs
70+
assert kwargs["custom_llm_provider"] == "groq"
71+
assert "Output Format (JSON)" in kwargs["messages"][0]["content"]

0 commit comments

Comments
 (0)