fix: address remaining PR feedback, expand test coverage

anticomputer · anticomputer · commit 69fc1e3ca76d · 2026-03-20T13:18:09.000-04:00
- cli: TASK_AGENT_DEBUG="0"/"false" no longer enables debug mode
- capi: allow arbitrary API endpoints with graceful fallback
- runner: defer tool result pop until after template rendering
- test: 72 new unit tests for runner, cli, session, prompt parser, capi
- examples: add edge_case_test.yaml for nested JSON repeat_prompt
diff --git a/examples/taskflows/edge_case_test.yaml b/examples/taskflows/edge_case_test.yaml
@@ -0,0 +1,69 @@
+# SPDX-FileCopyrightText: GitHub, Inc.
+# SPDX-License-Identifier: MIT
+
+# Edge-case test taskflow targeting less-exercised code paths:
+#   - shell task producing nested JSON for repeat_prompt
+#   - repeat_prompt over dictionary items (not just arrays)
+#   - env variable scoping (task-level env)
+#   - globals CLI override combined with file defaults
+#   - max_steps constraint
+#   - must_complete on a non-tool task
+#   - empty taskflow section handling
+
+seclab-taskflow-agent:
+  version: "1.0"
+  filetype: taskflow
+
+model_config: examples.model_configs.model_config
+
+globals:
+  category: edge-cases
+  default_value: from-file
+
+taskflow:
+  # ---------------------------------------------------------------
+  # Task 1: Shell task with nested JSON structure
+  # Tests: run, must_complete, complex JSON output
+  # ---------------------------------------------------------------
+  - task:
+      name: nested-json-shell
+      must_complete: true
+      run: |
+        echo '[{"id": 1, "data": {"label": "alpha", "score": 0.95}}, {"id": 2, "data": {"label": "beta", "score": 0.87}}]'
+
+  # ---------------------------------------------------------------
+  # Task 2: Repeat over nested structure, sequential (not async)
+  # Tests: repeat_prompt (sequential), nested result access,
+  #        globals reference, inputs, env scoping, max_steps
+  # ---------------------------------------------------------------
+  - task:
+      name: sequential-repeat
+      repeat_prompt: true
+      must_complete: true
+      model: gpt_default
+      max_steps: 5
+      agents:
+        - examples.personalities.fruit_expert
+      inputs:
+        output_format: json
+      env:
+        EDGE_TEST_MODE: "sequential"
+      user_prompt: |
+        Category: {{ globals.category }}, default: {{ globals.default_value }}.
+        Item ID {{ result.id }}: label={{ result.data.label }}, score={{ result.data.score }}.
+        Respond with exactly one sentence summarizing this item in {{ inputs.output_format }} awareness.
+
+  # ---------------------------------------------------------------
+  # Task 3: Simple prompt with no tools (headless, no toolboxes)
+  # Tests: pure LLM task, exclude_from_context, model alias
+  # ---------------------------------------------------------------
+  - task:
+      name: pure-llm-task
+      model: gpt_default
+      exclude_from_context: true
+      agents:
+        - examples.personalities.fruit_expert
+      max_steps: 3
+      user_prompt: |
+        The category is {{ globals.category }}.
+        Say "edge case test passed" and nothing else.
diff --git a/src/seclab_taskflow_agent/capi.py b/src/seclab_taskflow_agent/capi.py
@@ -79,10 +79,8 @@ def list_capi_models(token: str) -> dict[str, dict]:
             case AI_API_ENDPOINT_ENUM.AI_API_OPENAI:
                 models_catalog = "models"
             case _:
-                raise ValueError(
-                    f"Unsupported Model Endpoint: {api_endpoint}\n"
-                    f"Supported endpoints: {[e.to_url() for e in AI_API_ENDPOINT_ENUM]}"
-                )
+                # Unknown endpoint — try the OpenAI-style models catalog
+                models_catalog = "models"
         r = httpx.get(
             httpx.URL(api_endpoint).join(models_catalog),
             headers={
@@ -100,6 +98,10 @@ def list_capi_models(token: str) -> dict[str, dict]:
                 models_list = r.json()
             case AI_API_ENDPOINT_ENUM.AI_API_OPENAI:
                 models_list = r.json().get("data", [])
+            case _:
+                # Unknown endpoint — try OpenAI-style {"data": [...]}
+                body = r.json()
+                models_list = body.get("data", body) if isinstance(body, dict) else body
         for model in models_list:
             models[model.get("id")] = dict(model)
     except httpx.RequestError:
@@ -123,10 +125,9 @@ def supports_tool_calls(model: str, models: dict[str, dict]) -> bool:
         case AI_API_ENDPOINT_ENUM.AI_API_OPENAI:
             return "gpt-" in model.lower()
         case _:
-            raise ValueError(
-                f"Unsupported Model Endpoint: {api_endpoint}\n"
-                f"Supported endpoints: {[e.to_url() for e in AI_API_ENDPOINT_ENUM]}"
-            )
+            # Unknown endpoint — optimistically assume tool-call support
+            # if the model is present in the catalog.
+            return model in models
 
 
 def list_tool_call_models(token: str) -> dict[str, dict]:
diff --git a/src/seclab_taskflow_agent/cli.py b/src/seclab_taskflow_agent/cli.py
@@ -113,7 +113,7 @@ def main(
 ) -> None:
     """Run a taskflow or personality-based agent session."""
     # Debug mode from flag or env var
-    debug = debug or bool(os.getenv("TASK_AGENT_DEBUG"))
+    debug = debug or os.getenv("TASK_AGENT_DEBUG", "").strip().lower() in ("1", "true", "yes")
 
     # Validate mutual exclusivity (resume is standalone)
     if resume and (personality or taskflow or list_models):
diff --git a/src/seclab_taskflow_agent/runner.py b/src/seclab_taskflow_agent/runner.py
@@ -214,9 +214,6 @@ async def _build_prompts_to_run(
             logging.critical("No last MCP tool result available")
             raise
 
-        # Consume only after successful parse
-        last_mcp_tool_results.pop()
-
         if not iterable_result:
             await render_model_output("** 🤖❗MCP tool result iterable is empty!\n")
         else:
@@ -234,6 +231,10 @@ async def _build_prompts_to_run(
                 except jinja2.TemplateError as e:
                     logging.error(f"Error rendering template for result {value}: {e}")
                     raise ValueError(f"Template rendering failed: {e}")
+
+        # Consume only after all prompts rendered successfully so that
+        # the result remains available for retry/resume on failure.
+        last_mcp_tool_results.pop()
     else:
         prompts_to_run.append(task_prompt)
     return prompts_to_run
diff --git a/tests/test_api_endpoint_config.py b/tests/test_api_endpoint_config.py
@@ -62,15 +62,14 @@ def test_to_url_openai(self):
         assert endpoint.to_url() == "https://api.openai.com/v1"
 
     def test_unsupported_endpoint(self, monkeypatch):
-        """Test that unsupported API endpoint raises ValueError."""
+        """Test that unsupported API endpoint falls back gracefully."""
         api_endpoint = "https://unsupported.example.com"
         monkeypatch.setenv("AI_API_ENDPOINT", api_endpoint)
-        with pytest.raises(ValueError) as excinfo:
-            list_capi_models("abc")
-        msg = str(excinfo.value)
-        assert "Unsupported Model Endpoint" in msg
-        assert "https://models.github.ai/inference" in msg
-        assert "https://api.githubcopilot.com" in msg
+        # Unknown endpoints should not raise; they try OpenAI-style catalog
+        # and return an empty dict on connection failure.
+        result = list_capi_models("abc")
+        assert isinstance(result, dict)
+        assert result == {}
 
 
 if __name__ == "__main__":
diff --git a/tests/test_capi_extended.py b/tests/test_capi_extended.py
@@ -0,0 +1,97 @@
+# SPDX-FileCopyrightText: GitHub, Inc.
+# SPDX-License-Identifier: MIT
+
+"""Extended tests for capi module."""
+
+from __future__ import annotations
+
+from seclab_taskflow_agent.capi import AI_API_ENDPOINT_ENUM, supports_tool_calls
+
+
+class TestSupportsToolCalls:
+    """Tests for supports_tool_calls with unknown endpoints."""
+
+    def test_unknown_endpoint_known_model(self, monkeypatch):
+        """Unknown endpoint returns True when model is in the catalog."""
+        monkeypatch.setenv("AI_API_ENDPOINT", "https://custom.api.example.com/v1")
+        models = {"my-model": {"id": "my-model"}}
+        assert supports_tool_calls("my-model", models) is True
+
+    def test_unknown_endpoint_unknown_model(self, monkeypatch):
+        """Unknown endpoint returns False when model is NOT in the catalog."""
+        monkeypatch.setenv("AI_API_ENDPOINT", "https://custom.api.example.com/v1")
+        models = {"other-model": {"id": "other-model"}}
+        assert supports_tool_calls("missing-model", models) is False
+
+    def test_copilot_endpoint_with_capabilities(self, monkeypatch):
+        """Copilot endpoint checks capabilities.supports.tool_calls."""
+        monkeypatch.setenv("AI_API_ENDPOINT", "https://api.githubcopilot.com")
+        models = {
+            "gpt-4o": {
+                "id": "gpt-4o",
+                "capabilities": {"supports": {"tool_calls": True}},
+            }
+        }
+        assert supports_tool_calls("gpt-4o", models) is True
+
+    def test_copilot_endpoint_without_capabilities(self, monkeypatch):
+        """Copilot endpoint returns False when tool_calls not in capabilities."""
+        monkeypatch.setenv("AI_API_ENDPOINT", "https://api.githubcopilot.com")
+        models = {
+            "text-only": {
+                "id": "text-only",
+                "capabilities": {"supports": {}},
+            }
+        }
+        assert supports_tool_calls("text-only", models) is False
+
+    def test_models_github_endpoint(self, monkeypatch):
+        """models.github.ai checks for 'tool-calling' in capabilities list."""
+        monkeypatch.setenv("AI_API_ENDPOINT", "https://models.github.ai/inference")
+        models = {
+            "openai/gpt-4o": {
+                "id": "openai/gpt-4o",
+                "capabilities": ["tool-calling", "chat"],
+            }
+        }
+        assert supports_tool_calls("openai/gpt-4o", models) is True
+
+    def test_models_github_endpoint_no_tool_calling(self, monkeypatch):
+        """models.github.ai returns False when 'tool-calling' not in list."""
+        monkeypatch.setenv("AI_API_ENDPOINT", "https://models.github.ai/inference")
+        models = {
+            "some-model": {
+                "id": "some-model",
+                "capabilities": ["chat"],
+            }
+        }
+        assert supports_tool_calls("some-model", models) is False
+
+    def test_openai_endpoint_gpt_model(self, monkeypatch):
+        """OpenAI endpoint returns True for models containing 'gpt-'."""
+        monkeypatch.setenv("AI_API_ENDPOINT", "https://api.openai.com/v1")
+        assert supports_tool_calls("gpt-4o", {}) is True
+
+    def test_openai_endpoint_non_gpt_model(self, monkeypatch):
+        """OpenAI endpoint returns False for non-GPT models."""
+        monkeypatch.setenv("AI_API_ENDPOINT", "https://api.openai.com/v1")
+        assert supports_tool_calls("claude-3-opus", {}) is False
+
+
+class TestAIAPIEndpointEnum:
+    """Tests for the AI_API_ENDPOINT_ENUM StrEnum."""
+
+    def test_enum_values(self):
+        """All expected endpoint values exist."""
+        assert AI_API_ENDPOINT_ENUM.AI_API_MODELS_GITHUB == "models.github.ai"
+        assert AI_API_ENDPOINT_ENUM.AI_API_GITHUBCOPILOT == "api.githubcopilot.com"
+        assert AI_API_ENDPOINT_ENUM.AI_API_OPENAI == "api.openai.com"
+
+    def test_to_url_models_github(self):
+        assert AI_API_ENDPOINT_ENUM.AI_API_MODELS_GITHUB.to_url() == "https://models.github.ai/inference"
+
+    def test_to_url_copilot(self):
+        assert AI_API_ENDPOINT_ENUM.AI_API_GITHUBCOPILOT.to_url() == "https://api.githubcopilot.com"
+
+    def test_to_url_openai(self):
+        assert AI_API_ENDPOINT_ENUM.AI_API_OPENAI.to_url() == "https://api.openai.com/v1"
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -0,0 +1,84 @@
+# SPDX-FileCopyrightText: GitHub, Inc.
+# SPDX-License-Identifier: MIT
+
+"""Unit tests for the Typer CLI module."""
+
+from __future__ import annotations
+
+import pytest
+import typer
+
+from seclab_taskflow_agent.cli import _parse_global
+
+
+class TestParseGlobal:
+    """Tests for _parse_global KEY=VALUE parsing."""
+
+    def test_valid_key_value(self):
+        """Standard KEY=VALUE is parsed correctly."""
+        assert _parse_global("fruit=apple") == ("fruit", "apple")
+
+    def test_missing_equals_raises(self):
+        """A string without '=' raises BadParameter."""
+        with pytest.raises(typer.BadParameter, match="Expected KEY=VALUE"):
+            _parse_global("no_equals_here")
+
+    def test_value_with_equals_sign(self):
+        """Only the first '=' is used as the delimiter."""
+        key, val = _parse_global("url=https://example.com?foo=bar")
+        assert key == "url"
+        assert val == "https://example.com?foo=bar"
+
+    def test_whitespace_stripped(self):
+        """Leading/trailing whitespace in key and value is stripped."""
+        key, val = _parse_global("  key  =  value  ")
+        assert key == "key"
+        assert val == "value"
+
+    def test_empty_value(self):
+        """An empty value after '=' is allowed."""
+        key, val = _parse_global("key=")
+        assert key == "key"
+        assert val == ""
+
+    def test_empty_key(self):
+        """An empty key before '=' is technically allowed by the parser."""
+        key, val = _parse_global("=value")
+        assert key == ""
+        assert val == "value"
+
+
+class TestDebugEnvParsing:
+    """Tests for the TASK_AGENT_DEBUG environment variable expression."""
+
+    @staticmethod
+    def _is_debug(env_value: str) -> bool:
+        """Reproduce the debug expression from cli.py."""
+        return env_value.strip().lower() in ("1", "true", "yes")
+
+    def test_zero_is_false(self):
+        assert self._is_debug("0") is False
+
+    def test_one_is_true(self):
+        assert self._is_debug("1") is True
+
+    def test_true_string_is_true(self):
+        assert self._is_debug("true") is True
+
+    def test_TRUE_string_is_true(self):
+        assert self._is_debug("TRUE") is True
+
+    def test_yes_string_is_true(self):
+        assert self._is_debug("yes") is True
+
+    def test_empty_string_is_false(self):
+        assert self._is_debug("") is False
+
+    def test_false_string_is_false(self):
+        assert self._is_debug("false") is False
+
+    def test_whitespace_trimmed(self):
+        assert self._is_debug("  1  ") is True
+
+    def test_random_text_is_false(self):
+        assert self._is_debug("enabled") is False
diff --git a/tests/test_prompt_parser_edge.py b/tests/test_prompt_parser_edge.py
diff --git a/tests/test_runner.py b/tests/test_runner.py
diff --git a/tests/test_session_edge.py b/tests/test_session_edge.py