Refactor Llama Stack utilities and update E2E tests

jrobertboos · jrobertboos · commit bcd351fbf9ed · 2026-03-15T10:12:53.000-04:00
- Renamed `llama_stack_shields.py` to `llama_stack_utils.py` and expanded its functionality to manage both toolgroups and shields.
- Removed the deprecated `llama_stack_tools.py` file.
- Updated E2E test scenarios to utilize the new utility functions for unregistering toolgroups and clearing Llama Stack storage.
- Enhanced feature files to include comments indicating pending fixes for skipped scenarios.
diff --git a/docs/e2e_testing.md b/docs/e2e_testing.md
@@ -58,7 +58,7 @@ tests/e2e/
 ├── utils/
 │   ├── utils.py                 # restart_container, switch_config, wait_for_container_health, etc.
 │   ├── prow_utils.py            # Prow/OpenShift helpers (restore_llama_stack_pod, etc.)
-│   └── llama_stack_shields.py   # Shield unregister/register (server mode, optional)
+│   └── llama_stack_utils.py    # Toolgroups + shield unregister/register (server mode, optional)
 ├── mock_mcp_server/             # Mock MCP server for MCP tests
 └── rag/                         # RAG test data (e.g. for FAISS)
 ```
diff --git a/tests/e2e/features/environment.py b/tests/e2e/features/environment.py
@@ -16,12 +16,13 @@
 from tests.e2e.utils.prow_utils import restore_llama_stack_pod
 from behave.runner import Context
 
-from tests.e2e.utils.llama_stack_tools import unregister_mcp_toolgroups
-from tests.e2e.utils.llama_stack_shields import (
+from tests.e2e.utils.llama_stack_utils import (
     register_shield,
+    unregister_mcp_toolgroups,
     unregister_shield,
 )
 from tests.e2e.utils.utils import (
+    clear_llama_stack_storage,
     create_config_backup,
     is_prow_environment,
     remove_config_backup,
@@ -239,6 +240,8 @@ def before_scenario(context: Context, scenario: Scenario) -> None:
     if config_name is not None:
         if not context.is_library_mode:
             unregister_mcp_toolgroups()
+        else:
+            clear_llama_stack_storage()
         context.scenario_config = _get_config_path(config_name, mode_dir)
         switch_config(context.scenario_config)
         restart_container("lightspeed-stack")
diff --git a/tests/e2e/features/mcp.feature b/tests/e2e/features/mcp.feature
@@ -7,7 +7,7 @@ Feature: MCP tests
 
 
 # File-based
-  @skip
+  @skip  # will be fixed by LCORE-1461
   @MCPFileAuthConfig
   Scenario: Check if tools endpoint succeeds when MCP file-based auth token is passed 
     Given The system is in default state
@@ -46,7 +46,7 @@ Feature: MCP tests
         | Hello                     |
     And The token metrics should have increased
 
-  @skip
+  @skip  # will be fixed by LCORE-1461
   @InvalidMCPFileAuthConfig
   Scenario: Check if tools endpoint reports error when MCP file-based invalid auth token is passed 
     Given The system is in default state
@@ -62,7 +62,7 @@ Feature: MCP tests
         }
     """
 
-  @skip
+  @skip  # will be fixed by LCORE-1463
   @InvalidMCPFileAuthConfig
   Scenario: Check if query endpoint reports error when MCP file-based invalid auth token is passed 
     Given The system is in default state
@@ -81,7 +81,7 @@ Feature: MCP tests
         }
     """
 
-  @skip
+  @skip  # will be fixed by LCORE-1463
   @InvalidMCPFileAuthConfig
   Scenario: Check if streaming_query endpoint reports error when MCP file-based invalid auth token is passed 
     Given The system is in default state
@@ -101,7 +101,7 @@ Feature: MCP tests
     """
 
 # Kubernetes
-  @skip
+  @skip  # will be fixed by LCORE-1461
   @MCPKubernetesAuthConfig
   Scenario: Check if tools endpoint succeeds when MCP kubernetes auth token is passed 
     Given The system is in default state
@@ -143,7 +143,7 @@ Feature: MCP tests
         | Hello                     |
     And The token metrics should have increased
 
-  @skip
+  @skip  # will be fixed by LCORE-1461
   @MCPKubernetesAuthConfig
   Scenario: Check if tools endpoint reports error when MCP kubernetes invalid auth token is passed 
     Given The system is in default state
@@ -160,7 +160,7 @@ Feature: MCP tests
         }
     """
 
-  @skip
+  @skip  # will be fixed by LCORE-1463
   @MCPKubernetesAuthConfig
   Scenario: Check if query endpoint reports error when MCP kubernetes invalid auth token is passed
     Given The system is in default state
@@ -180,7 +180,7 @@ Feature: MCP tests
         }
     """
 
-  @skip
+  @skip  # will be fixed by LCORE-1463
   @MCPKubernetesAuthConfig
   Scenario: Check if streaming_query endpoint reports error when MCP kubernetes invalid auth token is passed 
     Given The system is in default state
@@ -201,7 +201,7 @@ Feature: MCP tests
     """
 
 # Client-provided
-  @skip
+  @skip  # will be fixed by LCORE-1462
   @MCPClientAuthConfig
   Scenario: Check if tools endpoint succeeds by skipping when MCP client-provided auth token is omitted
     Given The system is in default state
diff --git a/tests/e2e/utils/llama_stack_tools.py b/tests/e2e/utils/llama_stack_tools.py
diff --git a/tests/e2e/utils/llama_stack_utils.py b/tests/e2e/utils/llama_stack_utils.py
@@ -1,9 +1,12 @@
-"""E2E helpers to unregister and re-register Llama Stack shields via the client API.
+"""E2E test utilities for Llama Stack (toolgroups and shields).
 
-Used by the @disable-shields tag: before the scenario we call client.shields.delete()
-to unregister the shield; after the scenario we call client.shields.register()
-to restore it. Only applies in server mode (Llama Stack as a separate service).
-Requires E2E_LLAMA_STACK_URL or E2E_LLAMA_HOSTNAME/E2E_LLAMA_PORT.
+This module provides functions to manage MCP toolgroups and shields on a running
+Llama Stack instance during end-to-end tests: unregister MCP toolgroups when
+switching configurations or testing MCP auth, and unregister/re-register shields
+(e.g. for the @disable-shields tag).
+
+Only applies when running Llama Stack as a separate service (server mode).
+Requires E2E_LLAMA_STACK_URL or E2E_LLAMA_HOSTNAME and E2E_LLAMA_PORT.
 """
 
 import asyncio
@@ -29,6 +32,54 @@ def _get_llama_stack_client() -> AsyncLlamaStackClient:
     return AsyncLlamaStackClient(base_url=base_url, api_key=api_key, timeout=timeout)
 
 
+# -----------------------------------------------------------------------------
+# Toolgroups
+# -----------------------------------------------------------------------------
+
+
+async def _unregister_toolgroup_async(identifier: str) -> None:
+    """Unregister a toolgroup by identifier; return (provider_id, provider_shield_id) for restore."""
+    client = _get_llama_stack_client()
+    try:
+        await client.toolgroups.unregister(identifier)
+    except APIConnectionError:
+        raise
+    except APIStatusError as e:
+        # 400 "not found": toolgroup already absent, scenario can proceed
+        if e.status_code == 400 and "not found" in str(e).lower():
+            return None
+        raise
+    finally:
+        await client.close()
+
+
+async def _unregister_mcp_toolgroups_async() -> None:
+    """Unregister all MCP toolgroups."""
+    client = _get_llama_stack_client()
+    try:
+        toolgroups = await client.toolgroups.list()
+        for toolgroup in toolgroups:
+            if (
+                toolgroup.identifier
+                and toolgroup.provider_id == "model-context-protocol"
+            ):
+                await _unregister_toolgroup_async(toolgroup.identifier)
+    except APIConnectionError:
+        raise
+    finally:
+        await client.close()
+
+
+def unregister_mcp_toolgroups() -> None:
+    """Unregister all MCP toolgroups."""
+    asyncio.run(_unregister_mcp_toolgroups_async())
+
+
+# -----------------------------------------------------------------------------
+# Shields
+# -----------------------------------------------------------------------------
+
+
 async def _unregister_shield_async(identifier: str) -> Optional[tuple[str, str]]:
     """Unregister a shield by identifier; return (provider_id, provider_shield_id) for restore."""
     client = _get_llama_stack_client()
diff --git a/tests/e2e/utils/utils.py b/tests/e2e/utils/utils.py
@@ -246,6 +246,45 @@ def remove_config_backup(backup_path: str) -> None:
             print(f"Warning: Could not remove backup file {backup_path}: {e}")
 
 
+# Llama Stack storage paths inside the lightspeed-stack container/pod (library mode).
+# Used when clearing embedded Llama Stack storage before MCP config scenarios.
+_LLAMA_STORAGE_ROOT = "/opt/app-root/src/.llama/storage"
+
+
+def clear_llama_stack_storage(container_name: str = "lightspeed-stack") -> None:
+    """Clear Llama Stack storage in library mode (embedded Llama Stack).
+
+    Removes SQLite/KV store databases and file storage contents so that
+    toolgroups and other persisted state are reset. Used before MCP config
+    scenarios when not running in server mode (no separate Llama Stack to
+    unregister toolgroups from). Only runs when using Docker (skipped in Prow).
+
+    Parameters:
+        container_name (str): Docker container name (default "lightspeed-stack").
+
+    Returns:
+        None
+    """
+
+    storage_root = _LLAMA_STORAGE_ROOT
+    commands = [
+        f"rm -f {storage_root}/rag/kv_store.db",
+        f"rm -f {storage_root}/sql_store.db",
+        f"rm -rf {storage_root}/files/*",
+    ]
+    try:
+        for cmd in commands:
+            subprocess.run(
+                ["docker", "exec", container_name, "sh", "-c", cmd],
+                capture_output=True,
+                text=True,
+                timeout=10,
+                check=False,
+            )
+    except subprocess.TimeoutExpired as e:
+        print(f"Warning: Could not clear Llama Stack storage: {e}")
+
+
 def restart_container(container_name: str) -> None:
     """Restart a Docker container by name and wait until it is healthy.