Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions .github/workflows/e2e_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@ jobs:
matrix:
mode: ["server", "library"]
environment: ["ci"]

name: "E2E: ${{ matrix.mode }} mode / ${{ matrix.environment }}"
e2e_group: [1, 2, 3]

name: "E2E: ${{ matrix.mode }} mode / ${{ matrix.environment }} / group ${{ matrix.e2e_group }}"

env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
Expand Down Expand Up @@ -103,6 +104,7 @@ jobs:
echo "=== Configuration Summary ==="
echo "Deployment mode: ${{ matrix.mode }}"
echo "Environment: ${{ matrix.environment }}"
echo "E2E shard (Makefile test-e2e-tagged): @e2e_group_${{ matrix.e2e_group }} (with not @skip)"
echo "Source config: tests/e2e/configs/run-${{ matrix.environment }}.yaml"
echo ""
echo "=== Configuration Preview ==="
Expand Down Expand Up @@ -187,13 +189,15 @@ jobs:
TERM: xterm-256color
FORCE_COLOR: 1
E2E_DEPLOYMENT_MODE: ${{ matrix.mode }}
# Matches Makefile test-e2e-tagged / E2E_BEHAVE_TAG_EXPR (one @e2e_group_* per job).
E2E_BEHAVE_TAG_EXPR: "not @skip and @e2e_group_${{ matrix.e2e_group }}"
run: |
echo "Installing test dependencies..."
pip install uv
uv sync

echo "Running comprehensive e2e test suite..."
make test-e2e
echo "Running e2e tests (E2E_BEHAVE_TAG_EXPR=${E2E_BEHAVE_TAG_EXPR})..."
make test-e2e-tagged

- name: Show logs on failure
if: failure()
Expand Down
12 changes: 11 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,19 @@ test-integration: ## Run integration tests tests
test-e2e: ## Run end to end tests for the service
script -q -e -c "uv run behave --color --format pretty --tags=-skip -D dump_errors=true @tests/e2e/test_list.txt"

test-e2e-local: ## Run end to end tests for the service
test-e2e-local: ## Run end to end tests for the service (no script wrapper)
uv run behave --color --format pretty --tags=-skip -D dump_errors=true @tests/e2e/test_list.txt

# Tag-based subsets (@e2e_group_* on feature files). Default runs all groups; override for one shard, e.g.
# E2E_BEHAVE_TAG_EXPR='not @skip and @e2e_group_2' make test-e2e-tagged-local
E2E_BEHAVE_TAG_EXPR ?= not @skip and (e2e_group_1 or e2e_group_2 or e2e_group_3)

test-e2e-tagged: ## Run e2e tests with E2E_BEHAVE_TAG_EXPR (default: all @e2e_group_*)
script -q -e -c "uv run behave --color --format pretty --tags=\"$(E2E_BEHAVE_TAG_EXPR)\" -D dump_errors=true @tests/e2e/test_list.txt"

test-e2e-tagged-local: ## Same as test-e2e-tagged without script wrapper
uv run behave --color --format pretty --tags="$(E2E_BEHAVE_TAG_EXPR)" -D dump_errors=true @tests/e2e/test_list.txt

benchmarks: ## Run benchmarks
uv run python -m pytest -vv tests/benchmarks/

Expand Down
1 change: 1 addition & 0 deletions tests/e2e/features/authorized_noop.feature
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
@e2e_group_1
Feature: Authorized endpoint API tests for the noop authentication module

Background:
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/features/authorized_noop_token.feature
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@Authorized
@e2e_group_2 @Authorized
Feature: Authorized endpoint API tests for the noop-with-token authentication module

Background:
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/features/authorized_rh_identity.feature
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@RHIdentity
@e2e_group_3 @RHIdentity
Feature: Authorized endpoint API tests for the rh-identity authentication module

Background:
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/features/conversation_cache_v2.feature
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@Authorized
@e2e_group_2 @Authorized
Feature: Conversation Cache V2 API tests

Background:
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/features/conversations.feature
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@Authorized
@e2e_group_2 @Authorized
Feature: conversations endpoint API tests

Background:
Expand Down
23 changes: 23 additions & 0 deletions tests/e2e/features/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@
FALLBACK_MODEL = "gpt-4o-mini"
FALLBACK_PROVIDER = "openai"

# Wall-clock start for each feature (on ``Feature``; survives Behave context resets).
_E2E_FEATURE_PERF_START_ATTR = "_lightspeed_e2e_feature_perf_start"


def _fetch_models_from_service() -> dict:
"""Query /v1/models endpoint and return first LLM model.
Expand Down Expand Up @@ -349,7 +352,11 @@ def before_feature(context: Context, feature: Feature) -> None:

Per-feature setup that is not expressed in Gherkin (e.g. feedback cleanup state).
Lightspeed YAML is applied in feature Backgrounds via ``configure_service``.

Records monotonic start time on ``feature`` for duration logging in
``after_feature`` (includes scenarios and feature teardown).
"""
setattr(feature, _E2E_FEATURE_PERF_START_ATTR, time.perf_counter())
reset_active_lightspeed_stack_config_basename()
context.active_lightspeed_stack_config_basename = None
# One real Llama disruption per feature (module-level flag; survives context resets)
Expand Down Expand Up @@ -398,3 +405,19 @@ def after_feature(context: Context, feature: Feature) -> None:

_stop_proxy(context, "tunnel_proxy", "proxy_loop")
_stop_proxy(context, "interception_proxy", "interception_proxy_loop")

start = getattr(feature, _E2E_FEATURE_PERF_START_ATTR, None)
if start is not None:
elapsed_s = time.perf_counter() - start
try:
delattr(feature, _E2E_FEATURE_PERF_START_ATTR)
except AttributeError:
pass
feat_path = getattr(feature, "filename", "") or ""
label = os.path.basename(feat_path) if feat_path else feature.name
print(f"[e2e feature timing] {elapsed_s:.2f}s {label}", flush=True)


# Behave captures hook stdout by default; output is only shown in some failure paths.
# Disable capture so feature timing lines always appear on the real console/CI log.
after_feature.capture = False # type: ignore[attr-defined]
2 changes: 1 addition & 1 deletion tests/e2e/features/faiss.feature
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@Authorized
@e2e_group_1 @Authorized
Feature: FAISS support tests

Background:
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/features/feedback.feature
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@Feedback
@e2e_group_3 @Feedback
Feature: feedback endpoint API tests


Expand Down
1 change: 1 addition & 0 deletions tests/e2e/features/health.feature
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
@e2e_group_2
Feature: REST API tests


Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/features/http_401_unauthorized.feature
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@Authorized @Feedback @RHIdentity @RBAC
@e2e_group_3 @Authorized @Feedback @RHIdentity @RBAC
Feature: HTTP 401 Unauthorized

Aggregates end-to-end scenarios that assert a 401 response when authentication
Expand Down
1 change: 1 addition & 0 deletions tests/e2e/features/info.feature
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
@e2e_group_3
Feature: Info tests


Expand Down
1 change: 1 addition & 0 deletions tests/e2e/features/inline_rag.feature
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
@e2e_group_3
Feature: Inline RAG (BYOK) support tests

Background:
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/features/llama_stack_disrupted.feature
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@skip-in-library-mode @Authorized
@e2e_group_3 @skip-in-library-mode @Authorized
Feature: Llama Stack connection disrupted

End-to-end scenarios that stop the Llama Stack container (or simulate disconnect) and
Expand Down
1 change: 1 addition & 0 deletions tests/e2e/features/mcp.feature
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
@e2e_group_2
Feature: MCP tests

Background:
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/features/mcp_servers_api.feature
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@MCP
@e2e_group_3 @MCP
Feature: MCP Server Management API tests

Tests for the dynamic MCP server management endpoints:
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/features/mcp_servers_api_auth.feature
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@MCPServerAPIAuth
@e2e_group_1 @MCPServerAPIAuth
Feature: MCP Server Management API authentication tests

Tests that the MCP server management endpoints enforce authentication
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/features/mcp_servers_api_no_config.feature
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@MCPNoConfig
@e2e_group_1 @MCPNoConfig
Feature: MCP Server API tests without configured MCP servers

Tests that the MCP server management endpoints work correctly
Expand Down
1 change: 1 addition & 0 deletions tests/e2e/features/models.feature
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
@e2e_group_2
Feature: Models endpoint tests


Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/features/proxy.feature
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@skip-in-library-mode
@e2e_group_3 @skip-in-library-mode
Feature: Proxy and TLS networking tests for Llama Stack providers

Verify that the Lightspeed Stack works correctly when Llama Stack's
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/features/query.feature
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@Authorized
@e2e_group_3 @Authorized
Feature: Query endpoint API tests

Background:
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/features/rbac.feature
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@RBAC
@e2e_group_2 @RBAC
Feature: Role-Based Access Control (RBAC)

Comprehensive tests for role-based access control to ensure
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/features/responses.feature
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@Authorized
@e2e_group_1 @Authorized
Feature: Responses endpoint API tests

Background:
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/features/responses_streaming.feature
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@Authorized
@e2e_group_1 @Authorized
Feature: Responses endpoint streaming API tests

# Same coverage as ``responses.feature`` with ``stream=true`` (SSE for success paths;
Expand Down
1 change: 1 addition & 0 deletions tests/e2e/features/rest_api.feature
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
@e2e_group_1
Feature: REST API tests


Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/features/rlsapi_v1.feature
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@Authorized
@e2e_group_2 @Authorized
Feature: rlsapi v1 /infer endpoint API tests

Background:
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/features/rlsapi_v1_errors.feature
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@RBAC
@e2e_group_1 @RBAC
Feature: rlsapi v1 /infer endpoint error response tests

Tests for error conditions on the rlsapi v1 /infer endpoint including
Expand Down
1 change: 1 addition & 0 deletions tests/e2e/features/smoketests.feature
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
@e2e_group_3
Feature: Smoke tests


Expand Down
10 changes: 5 additions & 5 deletions tests/e2e/features/steps/proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@
The proxy sits between Llama Stack and the LLM provider (e.g., OpenAI).

Config switching uses the same pattern as other e2e tests: overwrite the
host-mounted run.yaml and restart Docker containers. Cleanup is handled
by a Background step that restores the backup before each scenario.
host-mounted run.yaml and restart Docker containers. Restarts are not
triggered from ``The original Llama Stack config is restored if modified``;
list ``Llama Stack is restarted`` / ``Lightspeed Stack is restarted`` in the
feature file so readers see every restart. Cleanup restores the backup file
(and stops proxy servers) before each scenario.
"""

import asyncio
Expand Down Expand Up @@ -192,9 +195,6 @@ def restore_if_modified(context: Context) -> None:
f"Restoring original Llama Stack config from {_LLAMA_STACK_CONFIG_BACKUP}..."
)
shutil.move(_LLAMA_STACK_CONFIG_BACKUP, _LLAMA_STACK_CONFIG)
restart_container("llama-stack")
restart_container("lightspeed-stack")
wait_for_lightspeed_stack_http_ready()


# --- Service Restart Steps ---
Expand Down
5 changes: 2 additions & 3 deletions tests/e2e/features/steps/tls.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,8 @@ def _configure_tls(tls_config: dict[str, Any], base_url: Optional[str] = None) -


# --- Background Steps ---
# Restart steps ("The original Llama Stack config is restored if modified",
# "Llama Stack is restarted", "Lightspeed Stack is restarted") are defined in
# proxy.py and shared across features by behave.
# ``The original Llama Stack config is restored if modified`` only restores
# run.yaml (see proxy.py). Restart steps are listed in tls.feature / proxy.feature.


# --- TLS Configuration Steps ---
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/features/streaming_query.feature
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@Authorized
@e2e_group_2 @Authorized
Feature: streaming_query endpoint API tests

Background:
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/features/tls.feature
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@skip-in-library-mode
@e2e_group_1 @skip-in-library-mode
Feature: TLS configuration for remote inference providers
Validate that Llama Stack's NetworkConfig.tls settings are applied correctly
when connecting to a remote inference provider over HTTPS.
Expand Down
Loading