diff --git a/altk_evolve/llm/guidelines/clustering.py b/altk_evolve/llm/guidelines/clustering.py index 0ce0a210..b8d39205 100644 --- a/altk_evolve/llm/guidelines/clustering.py +++ b/altk_evolve/llm/guidelines/clustering.py @@ -141,6 +141,7 @@ def combine_cluster(entities: list[RecordedEntity]) -> list[Guideline]: Raises: EvolveException: If the LLM call fails after 3 attempts. """ + is_groq = llm_settings.custom_llm_provider == "groq" or llm_settings.guidelines_model.startswith("groq/") supported_params = get_supported_openai_params( model=llm_settings.guidelines_model, custom_llm_provider=llm_settings.custom_llm_provider, @@ -150,7 +151,7 @@ def combine_cluster(entities: list[RecordedEntity]) -> list[Guideline]: model=llm_settings.guidelines_model, custom_llm_provider=llm_settings.custom_llm_provider, ) - constrained_decoding_supported = supports_response_format and response_schema_enabled + constrained_decoding_supported = not is_groq and supports_response_format and response_schema_enabled # Deduplicate task descriptions task_descriptions = list( diff --git a/altk_evolve/llm/guidelines/guidelines.py b/altk_evolve/llm/guidelines/guidelines.py index 7e992c8b..c65f1eb1 100644 --- a/altk_evolve/llm/guidelines/guidelines.py +++ b/altk_evolve/llm/guidelines/guidelines.py @@ -171,6 +171,7 @@ def generate_guidelines(messages: list[dict]) -> list[GuidelineGenerationResult] Returns a list with one GuidelineGenerationResult per subtask (or one for the full trajectory when segmentation is disabled or produces fewer than 2 subtasks). """ + is_groq = llm_settings.custom_llm_provider == "groq" or llm_settings.guidelines_model.startswith("groq/") supported_params = get_supported_openai_params( model=llm_settings.guidelines_model, custom_llm_provider=llm_settings.custom_llm_provider, @@ -180,7 +181,7 @@ def generate_guidelines(messages: list[dict]) -> list[GuidelineGenerationResult] model=llm_settings.guidelines_model, custom_llm_provider=llm_settings.custom_llm_provider, ) - constrained_decoding_supported = bool(supports_response_format and response_schema_enabled) + constrained_decoding_supported = bool(not is_groq and supports_response_format and response_schema_enabled) trajectory_data = parse_openai_agents_trajectory(messages) task_instruction = trajectory_data["task_instruction"] diff --git a/tests/unit/test_combine_guidelines.py b/tests/unit/test_combine_guidelines.py index 97807422..e032839e 100644 --- a/tests/unit/test_combine_guidelines.py +++ b/tests/unit/test_combine_guidelines.py @@ -6,10 +6,11 @@ import pytest +from altk_evolve.llm.guidelines import clustering as clustering_module from altk_evolve.llm.guidelines.clustering import combine_cluster from altk_evolve.schema.core import RecordedEntity from altk_evolve.schema.exceptions import EvolveException -from altk_evolve.schema.guidelines import Guideline, ConsolidationResult +from altk_evolve.schema.guidelines import ConsolidationResult, Guideline def _make_entity(entity_id: str, content: str, task_description: str = "do a task") -> RecordedEntity: @@ -110,7 +111,9 @@ def test_combine_cluster_raises_after_max_retries(self, _mock_params, _mock_sche @patch("altk_evolve.llm.guidelines.clustering.completion") @patch("altk_evolve.llm.guidelines.clustering.supports_response_schema", return_value=True) @patch("altk_evolve.llm.guidelines.clustering.get_supported_openai_params", return_value=["response_format"]) - def test_combine_cluster_uses_structured_output(self, _mock_params, _mock_schema, mock_completion): + def test_combine_cluster_uses_structured_output(self, _mock_params, _mock_schema, mock_completion, monkeypatch): + monkeypatch.setattr(clustering_module.llm_settings, "guidelines_model", "gpt-4o") + monkeypatch.setattr(clustering_module.llm_settings, "custom_llm_provider", "openai") mock_completion.return_value = _mock_completion_response(SAMPLE_GUIDELINES[:1]) entities = [_make_entity("1", "Guideline A"), _make_entity("2", "Guideline B")] @@ -121,6 +124,29 @@ def test_combine_cluster_uses_structured_output(self, _mock_params, _mock_schema _, kwargs = mock_completion.call_args assert "response_format" in kwargs + @patch("altk_evolve.llm.guidelines.clustering.completion") + @patch("altk_evolve.llm.guidelines.clustering.supports_response_schema", return_value=True) + @patch("altk_evolve.llm.guidelines.clustering.get_supported_openai_params", return_value=["response_format"]) + def test_combine_cluster_uses_json_prompt_for_groq_even_when_schema_is_reported( + self, + _mock_params, + _mock_schema, + mock_completion, + monkeypatch, + ): + monkeypatch.setattr(clustering_module.llm_settings, "guidelines_model", "groq/openai/gpt-oss-120b") + monkeypatch.setattr(clustering_module.llm_settings, "custom_llm_provider", "groq") + mock_completion.return_value = _mock_completion_response(SAMPLE_GUIDELINES[:1]) + + entities = [_make_entity("1", "Guideline A"), _make_entity("2", "Guideline B")] + result = combine_cluster(entities) + + assert len(result) == 1 + _, kwargs = mock_completion.call_args + assert "response_format" not in kwargs + assert kwargs["custom_llm_provider"] == "groq" + assert "Output Format (JSON)" in kwargs["messages"][0]["content"] + # --------------------------------------------------------------------------- # consolidate_guidelines tests diff --git a/tests/unit/test_guidelines.py b/tests/unit/test_guidelines.py index d3ace02a..cb89a922 100644 --- a/tests/unit/test_guidelines.py +++ b/tests/unit/test_guidelines.py @@ -1,8 +1,19 @@ """Tests for guideline generation utilities.""" +import json +from unittest.mock import MagicMock, patch + import pytest -from altk_evolve.llm.guidelines.guidelines import parse_openai_agents_trajectory +from altk_evolve.llm.guidelines import guidelines as guidelines_module +from altk_evolve.llm.guidelines.guidelines import generate_guidelines, parse_openai_agents_trajectory + + +def _mock_completion_response(payload: dict) -> MagicMock: + response = MagicMock() + response.choices = [MagicMock()] + response.choices[0].message.content = json.dumps(payload) + return response @pytest.mark.unit @@ -23,3 +34,38 @@ def test_fallback_when_no_user_message(self): def test_fallback_when_empty_messages(self): result = parse_openai_agents_trajectory([]) assert result["task_instruction"] == "Task description unknown" + + @patch("altk_evolve.llm.guidelines.guidelines.completion") + @patch("altk_evolve.llm.guidelines.guidelines.supports_response_schema", return_value=True) + @patch("altk_evolve.llm.guidelines.guidelines.get_supported_openai_params", return_value=["response_format"]) + def test_generate_guidelines_uses_json_prompt_for_groq_even_when_schema_is_reported( + self, + _mock_params, + _mock_schema, + mock_completion, + monkeypatch, + ): + monkeypatch.setattr(guidelines_module.llm_settings, "guidelines_model", "groq/openai/gpt-oss-120b") + monkeypatch.setattr(guidelines_module.llm_settings, "custom_llm_provider", "groq") + monkeypatch.setattr(guidelines_module.evolve_config, "segmentation_enabled", False) + mock_completion.return_value = _mock_completion_response( + { + "guidelines": [ + { + "content": "Validate files before parsing", + "rationale": "Avoids parser crashes on empty inputs", + "category": "strategy", + "trigger": "Before reading user-provided CSV files", + "implementation_steps": ["Check file size", "Return an empty DataFrame for empty files"], + } + ] + } + ) + + results = generate_guidelines([{"role": "user", "content": "Fix CSV parsing"}]) + + assert results[0].guidelines[0].content == "Validate files before parsing" + _, kwargs = mock_completion.call_args + assert "response_format" not in kwargs + assert kwargs["custom_llm_provider"] == "groq" + assert "Output Format (JSON)" in kwargs["messages"][0]["content"]