diff --git a/.github/workflows/gateway-integration-test-postgres.yml b/.github/workflows/gateway-integration-test-postgres.yml index 60ad61c9cb..b872cd3ff8 100644 --- a/.github/workflows/gateway-integration-test-postgres.yml +++ b/.github/workflows/gateway-integration-test-postgres.yml @@ -79,7 +79,7 @@ jobs: - name: Run integration tests run: | cd gateway - COMPOSE_FILE=docker-compose.test.postgres.yaml make test-integration + COMPOSE_FILE=docker-compose.test.postgres.yaml IT_GATEWAY_CONTROLLER_HA=true make test-integration - name: Upload coverage report uses: actions/upload-artifact@v4 diff --git a/gateway/it/Makefile b/gateway/it/Makefile index 6691969aa7..eda872fc1c 100644 --- a/gateway/it/Makefile +++ b/gateway/it/Makefile @@ -56,9 +56,13 @@ build-coverage: # Build and run tests in one command test-all: build-coverage test -# Run integration tests against Postgres-backed controller +# Run integration tests against Postgres-backed controller. +# This compose runs two gateway-controllers: the management one (REST) and +# gateway-controller-xds (feeds xDS to gateway-runtime). IT_GATEWAY_CONTROLLER_HA +# tells the suite to probe the runtime controller (host port 9093) for the +# policy-chain xDS-sync version, since that is the version the policy engine echoes. test-postgres: - COMPOSE_FILE=docker-compose.test.postgres.yaml $(MAKE) test + COMPOSE_FILE=docker-compose.test.postgres.yaml IT_GATEWAY_CONTROLLER_HA=true $(MAKE) test # Run vhost integration tests with single-domain gateway vhost config test-vhosts-single: diff --git a/gateway/it/db_helpers.go b/gateway/it/db_helpers.go index cb78e5f1a0..925ce01a92 100644 --- a/gateway/it/db_helpers.go +++ b/gateway/it/db_helpers.go @@ -21,6 +21,7 @@ package it import ( "context" "fmt" + "os" "os/exec" "regexp" "strings" @@ -114,6 +115,24 @@ func containerRunning(ctx context.Context, name string) bool { return strings.TrimSpace(string(out)) == "true" } +// envRuntimeControllerXDS is set (via the Makefile, see test-postgres) for the +// two-controller Postgres topology, where gateway-runtime is fed xDS by +// gateway-controller-xds rather than the management controller. +const envRuntimeControllerXDS = "IT_GATEWAY_CONTROLLER_HA" + +// policySnapshotControllerAdminURL returns the admin base URL the policy-chain +// xDS-sync probe should target, or "" to use the default management controller. +// When IT_GATEWAY_CONTROLLER_HA=true it points at the runtime-facing +// controller (host port 9093), whose policy-chain version the policy engine +// echoes. Returning "" lets waitForPolicySnapshotSync fall back to the +// management controller (single-controller topologies and unit tests). +func policySnapshotControllerAdminURL() string { + if os.Getenv(envRuntimeControllerXDS) == "true" { + return fmt.Sprintf("http://localhost:%s%s", GatewayControllerRuntimeAdminPort, GatewayAdminAPIBasePath) + } + return "" +} + // queryStoredConfiguration runs a SELECT against one of the per-resource-type // tables (rest_apis / websub_apis / etc.) joined with artifacts to look the row // up by handle. Returns the raw configuration JSON blob (the unrendered diff --git a/gateway/it/docker-compose.test.postgres.yaml b/gateway/it/docker-compose.test.postgres.yaml index 9abfdddcd3..1200d6cba8 100644 --- a/gateway/it/docker-compose.test.postgres.yaml +++ b/gateway/it/docker-compose.test.postgres.yaml @@ -20,7 +20,9 @@ # Uses the gateway-runtime container (Router + Policy Engine combined) # Build coverage images with: make build-gateway-runtime-coverage -# NOTE: gateway-controller and gateway-runtime are duplicated in docker-compose.test.yaml — keep in sync. +# NOTE: gateway-controller and gateway-runtime are duplicated in docker-compose.test.yaml +# for the single-controller baseline. This Postgres compose intentionally adds +# gateway-controller-xds to exercise Postgres/EventHub replica sync. services: # Mock platform-api for subscription-validation IT (mimics platform-api WebSocket events) @@ -70,8 +72,8 @@ services: gateway-controller: container_name: it-gateway-controller image: ghcr.io/wso2/api-platform/gateway-controller-coverage:test - mem_limit: 1000m - mem_reservation: 1000m + mem_limit: 500m + mem_reservation: 500m cpus: 0.5 command: ["-config", "/etc/gateway-controller/config.toml"] ports: @@ -117,11 +119,57 @@ services: networks: - it-gateway-runtime-network + # Runtime-facing gateway-controller replica. The integration tests continue to + # send management REST calls to gateway-controller above; this controller only + # serves xDS to gateway-runtime after synchronizing through Postgres/EventHub. + gateway-controller-xds: + container_name: it-gateway-controller-xds + image: ghcr.io/wso2/api-platform/gateway-controller-coverage:test + mem_limit: 500m + mem_reservation: 500m + cpus: 0.5 + command: ["-config", "/etc/gateway-controller/config.toml"] + ports: + # Admin API only. The IT sends management REST calls to gateway-controller + # above; this port is exposed solely so the policy-snapshot xDS-sync probe + # can query the version of the controller that actually feeds gateway-runtime. + - "9093:9092" + environment: + - APIP_GW_CONTROLLER_STORAGE_TYPE=postgres + - APIP_GW_CONTROLLER_STORAGE_POSTGRES_HOST=postgres + - APIP_GW_CONTROLLER_STORAGE_POSTGRES_PORT=5432 + - APIP_GW_CONTROLLER_STORAGE_POSTGRES_DATABASE=gateway_test + - APIP_GW_CONTROLLER_STORAGE_POSTGRES_USER=gateway + - APIP_GW_CONTROLLER_STORAGE_POSTGRES_PASSWORD=gateway + - APIP_GW_CONTROLLER_STORAGE_POSTGRES_SSLMODE=disable + - APIP_GW_CONTROLLER_LOGGING_LEVEL=debug + - GOCOVERDIR=/coverage + # Used by template-functions IT to verify {{ env "..." }} resolution in spec fields + - IT_TEMPLATE_PATH=/anything + volumes: + - controller-data-tests:/app/data + - ./it-aesgcm-keys/default-aesgcm256-v1.bin:/app/data/aesgcm-keys/default-aesgcm256-v1.bin:ro + - ./test-config.toml:/etc/gateway-controller/config.toml:ro + - ../gateway-controller/certificates:/app/certificates + - ../gateway-controller/listener-certs:/app/listener-certs:ro + - ./coverage/gateway-controller:/coverage + depends_on: + gateway-controller: + condition: service_healthy + healthcheck: + test: ["CMD-SHELL", "wget -q -O /dev/null http://localhost:9092/api/admin/v0.9/health || exit 1"] + interval: 5s + timeout: 3s + retries: 10 + start_period: 10s + networks: + - it-gateway-runtime-network + gateway-runtime: container_name: it-gateway-runtime image: ghcr.io/wso2/api-platform/gateway-runtime-coverage:test mem_limit: 2000m - mem_reservation: 2000m + mem_reservation: 1500m cpus: 1 command: ["--pol.config", "/etc/policy-engine/config.toml"] ports: @@ -133,7 +181,7 @@ services: - "9002:9002" # Admin API - "9003:9003" # Metrics environment: - - GATEWAY_CONTROLLER_HOST=it-gateway-controller + - GATEWAY_CONTROLLER_HOST=it-gateway-controller-xds - LOG_LEVEL=info # Override AWS Bedrock Runtime endpoint for testing with mock service - AWS_ENDPOINT_URL_BEDROCK_RUNTIME=http://mock-aws-bedrock-guardrail:8080 @@ -142,7 +190,7 @@ services: - ./coverage/gateway-runtime:/coverage - ./test-config.toml:/etc/policy-engine/config.toml:ro depends_on: - gateway-controller: + gateway-controller-xds: condition: service_healthy mock-openapi: condition: service_healthy diff --git a/gateway/it/setup.go b/gateway/it/setup.go index a2fbe46f5f..65de8e1c4b 100644 --- a/gateway/it/setup.go +++ b/gateway/it/setup.go @@ -51,6 +51,12 @@ const ( // GatewayControllerAdminPort is the controller admin HTTP port GatewayControllerAdminPort = "9092" + // GatewayControllerRuntimeAdminPort is the host port mapped to the + // runtime-facing controller's admin HTTP port (container 9092) in the + // two-controller Postgres topology (docker-compose.test.postgres.yaml). + // It is queried only for the policy-snapshot xDS-sync probe. + GatewayControllerRuntimeAdminPort = "9093" + // RouterPort is the HTTP traffic port for the router RouterPort = "8080" @@ -425,6 +431,7 @@ func CheckPortsAvailable() error { ports := []string{ GatewayControllerPort, // 9090 GatewayControllerAdminPort, + GatewayControllerRuntimeAdminPort, // 9093 RouterPort, // 8080 "8443", // HTTPS EnvoyAdminPort, // 9901 diff --git a/gateway/it/state.go b/gateway/it/state.go index 91152bc390..f6b9d11fd8 100644 --- a/gateway/it/state.go +++ b/gateway/it/state.go @@ -34,20 +34,27 @@ type AuthUser struct { // Config holds configuration for the test suite type Config struct { - GatewayControllerURL string - GatewayControllerAdminURL string - RouterURL string - PolicyEngineURL string - SampleBackendURL string - EchoBackendURL string - MockJWKSURL string - MockAzureContentSafetyURL string - MockAWSBedrockGuardrailURL string - MockEmbeddingProviderURL string - MockPlatformAPIURL string - RedisURL string - HTTPTimeout time.Duration - Users map[string]AuthUser + GatewayControllerURL string + GatewayControllerAdminURL string + // PolicySnapshotControllerAdminURL is the admin base URL probed for the + // policy-chain xDS-sync version. It targets the controller that feeds xDS to + // gateway-runtime, which in the two-controller Postgres topology is + // gateway-controller-xds rather than the management controller. When + // empty, waitForPolicySnapshotSync falls back to GatewayControllerAdminURL + // (single-controller topologies and unit tests). + PolicySnapshotControllerAdminURL string + RouterURL string + PolicyEngineURL string + SampleBackendURL string + EchoBackendURL string + MockJWKSURL string + MockAzureContentSafetyURL string + MockAWSBedrockGuardrailURL string + MockEmbeddingProviderURL string + MockPlatformAPIURL string + RedisURL string + HTTPTimeout time.Duration + Users map[string]AuthUser } // MockPlatformAPIPort is the port for mock-platform-api inject endpoint diff --git a/gateway/it/steps_health.go b/gateway/it/steps_health.go index 5e0d308a81..06064df9c6 100644 --- a/gateway/it/steps_health.go +++ b/gateway/it/steps_health.go @@ -275,7 +275,16 @@ func (h *HealthSteps) waitForPolicySnapshotSync() error { maxAttempts := 50 attemptInterval := 300 * time.Millisecond - controllerURL := fmt.Sprintf("%s/xds_sync_status", h.state.Config.GatewayControllerAdminURL) + // Probe the controller that actually feeds xDS to gateway-runtime, whose + // policy-chain version the policy engine echoes. In the two-controller + // Postgres topology the suite sets PolicySnapshotControllerAdminURL to + // gateway-controller-xds (port 9093); otherwise (single-controller + // topologies, unit tests) we fall back to the management controller. + adminBase := h.state.Config.PolicySnapshotControllerAdminURL + if adminBase == "" { + adminBase = h.state.Config.GatewayControllerAdminURL + } + controllerURL := fmt.Sprintf("%s/xds_sync_status", adminBase) policyEngineURL := fmt.Sprintf("%s/xds_sync_status", h.state.Config.PolicyEngineURL) lastControllerVersion := "" lastRuntimeVersion := "" diff --git a/gateway/it/suite_test.go b/gateway/it/suite_test.go index 7eaaf13948..bef384d115 100644 --- a/gateway/it/suite_test.go +++ b/gateway/it/suite_test.go @@ -213,6 +213,17 @@ func InitializeTestSuite(ctx *godog.TestSuiteContext) { // Initialize global test state testState = NewTestState() + // In the two-controller Postgres topology, gateway-runtime is fed xDS by + // gateway-controller-xds, so the policy-snapshot version probe must + // target that controller's admin API (host port 9093) rather than the + // management controller. The Makefile sets IT_GATEWAY_CONTROLLER_HA + // for that topology; otherwise this is empty and waitForPolicySnapshotSync + // falls back to the management controller (single-controller, unit tests). + if url := policySnapshotControllerAdminURL(); url != "" { + testState.Config.PolicySnapshotControllerAdminURL = url + log.Printf("%s=true; policy-snapshot probe target: %s", envRuntimeControllerXDS, url) + } + // Initialize common step handlers httpSteps = steps.NewHTTPSteps(testState.HTTPClient, map[string]string{ "gateway-controller": testState.Config.GatewayControllerURL,