Skip to content

Commit a99b916

Browse files
feat(gateway): URL-stable identity-based cluster naming for API-level upstreams
Derive API-level main/sandbox cluster names from sha256(apiID) instead of the backend URL, in both the Envoy xDS translator and the RuntimeDeployConfig transform path. The final name shape is main_<16hex> / sandbox_<16hex>; an API's main and sandbox clusters share the hash fragment (the env prefix provides the distinction), so an operator can pair an API's clusters at a glance in stats and config dumps. Because the name no longer encodes the URL, a host, port, or scheme edit is delivered to Envoy as an update to the same named cluster, which Envoy warms and swaps, instead of removing one cluster name and adding another, and a path-only edit touches just the route rewrite: routes never repoint, name-keyed stats stay continuous, and no request failures were observed while updating under continuous traffic. ClusterKey and EnvoyClusterName are unified to the same string so the policy engine's default upstream cluster always resolves to a real Envoy cluster, and the same API's main and sandbox upstreams can no longer collide into one cluster when they share a URL. Cross-API name uniqueness rests on the 64-bit hash truncation (collision-resistant, not impossible). The integration test asserts cluster identity through the Envoy admin endpoint: identity-named clusters present, URL-derived names absent, and the exact cluster-name set unchanged across host-changing URL updates (a new capture/compare step pair), so the scenarios fail under the previous naming scheme. Rollout note: on upgrade, each existing API's main/sandbox clusters are renamed once on their next translation, so per-cluster connection pools and upstream_cluster metric labels churn once; dashboards keyed on the old cluster_<scheme>_<host> names need a one-time update. The data-model spec doc is updated to describe the new naming.
1 parent cb13299 commit a99b916

12 files changed

Lines changed: 706 additions & 96 deletions

File tree

gateway/gateway-controller/cmd/controller/main.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,10 @@ import (
3838
"github.com/wso2/api-platform/gateway/gateway-controller/pkg/policyxds"
3939
"github.com/wso2/api-platform/gateway/gateway-controller/pkg/service/restapi"
4040
"github.com/wso2/api-platform/gateway/gateway-controller/pkg/storage"
41-
"github.com/wso2/api-platform/gateway/gateway-controller/pkg/webhooksecretxds"
4241
"github.com/wso2/api-platform/gateway/gateway-controller/pkg/transform"
4342
"github.com/wso2/api-platform/gateway/gateway-controller/pkg/utils"
4443
"github.com/wso2/api-platform/gateway/gateway-controller/pkg/version"
44+
"github.com/wso2/api-platform/gateway/gateway-controller/pkg/webhooksecretxds"
4545
"github.com/wso2/api-platform/gateway/gateway-controller/pkg/xds"
4646
)
4747

@@ -407,6 +407,9 @@ func main() {
407407
llmTransformer := transform.NewLLMTransformer(configStore, db, &cfg.Router, cfg, policyDefinitions, policyVersionResolver)
408408
transformerRegistry := transform.NewRegistry(restTransformer, llmTransformer)
409409
policyManager.SetTransformers(transformerRegistry)
410+
// In this controller wiring, only policy xDS receives the transformer
411+
// registry. Main Envoy xDS still translates RestAPI configs directly, so
412+
// both paths must keep cluster-name derivation in sync.
410413

411414
// Load runtime configs from existing API configurations on startup.
412415
// We write directly to runtimeStore to avoid triggering N separate snapshot updates;

gateway/gateway-controller/pkg/policyxds/policyxds_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ func TestTranslator_TranslateRuntimeConfigs(t *testing.T) {
117117
OperationPath: "/users",
118118
Vhost: "localhost",
119119
Upstream: models.RouteUpstream{
120-
ClusterKey: "upstream_main_localhost_8080",
120+
ClusterKey: "main_fixture",
121121
},
122122
},
123123
},
@@ -129,7 +129,7 @@ func TestTranslator_TranslateRuntimeConfigs(t *testing.T) {
129129
},
130130
},
131131
UpstreamClusters: map[string]*models.UpstreamCluster{
132-
"upstream_main_localhost_8080": {
132+
"main_fixture": {
133133
BasePath: "/",
134134
Endpoints: []models.Endpoint{{Host: "localhost", Port: 8080}},
135135
},

gateway/gateway-controller/pkg/transform/restapi.go

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
"github.com/wso2/api-platform/gateway/gateway-controller/pkg/config"
3232
"github.com/wso2/api-platform/gateway/gateway-controller/pkg/models"
3333
"github.com/wso2/api-platform/gateway/gateway-controller/pkg/utils"
34+
"github.com/wso2/api-platform/gateway/gateway-controller/pkg/utils/clusterkey"
3435
"github.com/wso2/api-platform/gateway/gateway-controller/pkg/xds"
3536
policyv1alpha "github.com/wso2/api-platform/sdk/core/policy/v1alpha2"
3637
policyenginev1 "github.com/wso2/api-platform/sdk/core/policyengine"
@@ -298,9 +299,10 @@ func (t *RestAPITransformer) buildPolicyChain(
298299
type upstreamClusterResult struct {
299300
// ClusterKey is the internal key used in rdc.UpstreamClusters.
300301
ClusterKey string
301-
// EnvoyClusterName is the Envoy cluster name matching pkg/xds/translator.go's
302-
// sanitizeClusterName format ("cluster_<scheme>_<sanitized_host>").
303-
// This is the value Envoy knows the cluster by, so PE must use it for x-target-upstream.
302+
// EnvoyClusterName is the Envoy cluster name. For API-level upstreams it is
303+
// the URL-stable hashed name "<env>_<16-hex>" (matching ClusterKey). This is
304+
// the value Envoy knows the cluster by, so the policy engine must use it for
305+
// the x-target-upstream header.
304306
EnvoyClusterName string
305307
// BasePath is the URL path component of the upstream (e.g. "/anything/foo").
306308
BasePath string
@@ -337,7 +339,12 @@ func (t *RestAPITransformer) addUpstreamCluster(
337339
basePath = "/"
338340
}
339341

340-
clusterKey := fmt.Sprintf("upstream_%s_%s_%d", upstreamName, parsedURL.Hostname(), port)
342+
// URL-stable cluster naming: "<env>_<sha256(apiID) fragment>" so a URL edit
343+
// updates the same named cluster instead of renaming it (routes and stats
344+
// keys stay continuous). ClusterKey and EnvoyClusterName are intentionally
345+
// the same string so the policy engine's `default_upstream_cluster` metadata
346+
// points at the actual Envoy cluster.
347+
clusterKey := upstreamName + "_" + clusterkey.APILevel(rdc.Metadata.UUID)
341348

342349
rdc.UpstreamClusters[clusterKey] = &models.UpstreamCluster{
343350
BasePath: basePath,
@@ -348,21 +355,17 @@ func (t *RestAPITransformer) addUpstreamCluster(
348355
TLS: &models.UpstreamTLS{Enabled: parsedURL.Scheme == "https"},
349356
}
350357

358+
// ClusterKey and EnvoyClusterName must be the same string. If they differ,
359+
// the default_upstream_cluster metadata written by the policy engine will
360+
// not match the Envoy cluster name, producing 503 NoRoute when the default
361+
// upstream path is taken.
351362
return &upstreamClusterResult{
352363
ClusterKey: clusterKey,
353-
EnvoyClusterName: sanitizeEnvoyClusterName(parsedURL.Host, parsedURL.Scheme),
364+
EnvoyClusterName: clusterKey,
354365
BasePath: basePath,
355366
}, nil
356367
}
357368

358-
// sanitizeEnvoyClusterName computes the Envoy cluster name from a URL host and scheme,
359-
// matching the sanitizeClusterName logic in pkg/xds/translator.go.
360-
func sanitizeEnvoyClusterName(host, scheme string) string {
361-
name := strings.ReplaceAll(host, ".", "_")
362-
name = strings.ReplaceAll(name, ":", "_")
363-
return "cluster_" + scheme + "_" + name
364-
}
365-
366369
// resolveUpstreamURL resolves the URL from an upstream (direct URL or ref). For a ref it
367370
// also returns the referenced definition's base path (from basePath, never the URL); for a
368371
// direct URL the returned base-path pointer is nil, signalling the caller to use the URL path.

gateway/gateway-controller/pkg/transform/restapi_test.go

Lines changed: 136 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,15 @@ package transform
2020

2121
import (
2222
"net/url"
23+
"strings"
2324
"testing"
2425

2526
"github.com/stretchr/testify/assert"
2627
"github.com/stretchr/testify/require"
2728
api "github.com/wso2/api-platform/gateway/gateway-controller/pkg/api/management"
2829
"github.com/wso2/api-platform/gateway/gateway-controller/pkg/config"
2930
"github.com/wso2/api-platform/gateway/gateway-controller/pkg/models"
31+
"github.com/wso2/api-platform/gateway/gateway-controller/pkg/utils/clusterkey"
3032
)
3133

3234
// ptrStr is a helper to get a pointer to a string literal.
@@ -353,7 +355,6 @@ func TestRestAPITransformer_SandboxRouteClusterHeader(t *testing.T) {
353355
defs := map[string]models.PolicyDefinition{}
354356
const sandboxURL = "http://sandbox-backend:9080/sandbox"
355357
const sandboxRouteKey = "GET|/test/hello|sandbox.local"
356-
expectedSandboxCluster := sanitizeEnvoyClusterName("sandbox-backend:9080", "http")
357358

358359
t.Run("without upstreamDefinitions the sandbox route is static", func(t *testing.T) {
359360
transformer := NewRestAPITransformer(testRouterCfg(), &config.Config{}, defs)
@@ -383,7 +384,139 @@ func TestRestAPITransformer_SandboxRouteClusterHeader(t *testing.T) {
383384
r, exists := rdc.Routes[sandboxRouteKey]
384385
require.True(t, exists, "sandbox route should exist")
385386
assert.True(t, r.Upstream.UseClusterHeader)
386-
assert.Equal(t, expectedSandboxCluster, r.Upstream.DefaultCluster,
387-
"sandbox route must default to the sandbox cluster, not main")
387+
assert.True(t, strings.HasPrefix(r.Upstream.DefaultCluster, "sandbox_"),
388+
"sandbox route must default to the URL-stable sandbox cluster (sandbox_<hash>), not main; got %q", r.Upstream.DefaultCluster)
388389
})
389390
}
391+
392+
// makeRestAPIWithOps builds a RestAPI StoredConfig with caller-supplied operations
393+
// and both API-level main and sandbox upstreams configured.
394+
func makeRestAPIWithOps(ops []api.Operation) *models.StoredConfig {
395+
apiData := api.APIConfigData{
396+
DisplayName: "Test API",
397+
Context: "/test",
398+
Version: "1.0.0",
399+
Operations: ops,
400+
Upstream: struct {
401+
Main api.Upstream `json:"main" yaml:"main"`
402+
Sandbox *api.Upstream `json:"sandbox,omitempty" yaml:"sandbox,omitempty"`
403+
}{
404+
Main: api.Upstream{Url: ptrStr("http://api-main:8080")},
405+
Sandbox: &api.Upstream{Url: ptrStr("http://api-sandbox:8080")},
406+
},
407+
}
408+
restAPI := api.RestAPI{
409+
Kind: api.RestAPIKindRestApi,
410+
Metadata: api.Metadata{Name: "test-api"},
411+
Spec: apiData,
412+
}
413+
return &models.StoredConfig{
414+
UUID: "test-api",
415+
Kind: string(api.RestAPIKindRestApi),
416+
Configuration: restAPI,
417+
}
418+
}
419+
420+
// TestRestAPITransformer_APILevelClusterNameShape asserts the URL-stable cluster
421+
// naming contract for API-level main and sandbox upstreams:
422+
// - cluster names are "<env>_<16-hex>" derived from sha256(apiID), shared by main and sandbox
423+
// - ClusterKey and EnvoyClusterName are the SAME string (so the policy engine's
424+
// default_upstream_cluster metadata resolves to a real Envoy cluster)
425+
func TestRestAPITransformer_APILevelClusterNameShape(t *testing.T) {
426+
transformer := NewRestAPITransformer(testRouterCfg(), &config.Config{}, map[string]models.PolicyDefinition{})
427+
cfg := makeRestAPIWithOps([]api.Operation{
428+
{Method: "GET", Path: "/users"},
429+
})
430+
431+
rdc, err := transformer.Transform(cfg)
432+
require.NoError(t, err)
433+
434+
expectedMain := "main_" + clusterkey.APILevel(cfg.UUID)
435+
expectedSandbox := "sandbox_" + clusterkey.APILevel(cfg.UUID)
436+
437+
mainRoute := rdc.Routes["GET|/test/users|main.local"]
438+
require.NotNil(t, mainRoute, "main route must exist")
439+
assert.Equal(t, expectedMain, mainRoute.Upstream.ClusterKey,
440+
"main cluster name should be <env>_<hash> derived from sha256(apiID)")
441+
442+
sandboxRoute := rdc.Routes["GET|/test/users|sandbox.local"]
443+
require.NotNil(t, sandboxRoute, "sandbox route must exist")
444+
assert.Equal(t, expectedSandbox, sandboxRoute.Upstream.ClusterKey,
445+
"sandbox cluster name should be <env>_<hash> derived from sha256(apiID)")
446+
447+
_, mainExists := rdc.UpstreamClusters[expectedMain]
448+
require.True(t, mainExists, "main cluster %q must be registered in UpstreamClusters", expectedMain)
449+
_, sandboxExists := rdc.UpstreamClusters[expectedSandbox]
450+
require.True(t, sandboxExists, "sandbox cluster %q must be registered in UpstreamClusters", expectedSandbox)
451+
}
452+
453+
// TestRestAPITransformer_APILevelDefaultClusterMatchesRealCluster verifies that
454+
// route.Upstream.DefaultCluster matches a cluster registered in
455+
// rdc.UpstreamClusters whenever UseClusterHeader is enabled. The policy engine
456+
// writes DefaultCluster into the x-target-upstream header and Envoy looks up
457+
// the cluster by that value; if the name does not match a registered cluster,
458+
// Envoy returns 503 NoRoute.
459+
func TestRestAPITransformer_APILevelDefaultClusterMatchesRealCluster(t *testing.T) {
460+
transformer := NewRestAPITransformer(testRouterCfg(), &config.Config{}, map[string]models.PolicyDefinition{})
461+
cfg := makeRestAPIWithOps([]api.Operation{
462+
{Method: "GET", Path: "/users"},
463+
})
464+
// Add an upstreamDefinition so UseClusterHeader becomes true and
465+
// DefaultCluster is actually populated.
466+
spec := cfg.Configuration.(api.RestAPI)
467+
spec.Spec.UpstreamDefinitions = &[]api.UpstreamDefinition{
468+
{
469+
Name: "stub-def",
470+
Upstreams: []struct {
471+
Url string `json:"url" yaml:"url"`
472+
Weight *int `json:"weight,omitempty" yaml:"weight,omitempty"`
473+
}{
474+
{Url: "http://stub-def-svc:8080"},
475+
},
476+
},
477+
}
478+
cfg.Configuration = spec
479+
480+
rdc, err := transformer.Transform(cfg)
481+
require.NoError(t, err)
482+
483+
mainRoute := rdc.Routes["GET|/test/users|main.local"]
484+
require.NotNil(t, mainRoute)
485+
require.True(t, mainRoute.Upstream.UseClusterHeader,
486+
"upstreamDefinitions present, UseClusterHeader should be true so DefaultCluster is meaningful")
487+
require.NotEmpty(t, mainRoute.Upstream.DefaultCluster,
488+
"DefaultCluster must be populated when UseClusterHeader is true")
489+
490+
_, exists := rdc.UpstreamClusters[mainRoute.Upstream.DefaultCluster]
491+
assert.True(t, exists,
492+
"DefaultCluster %q must reference a real registered cluster in UpstreamClusters "+
493+
"(prevents 503 NoRoute when policy engine writes x-target-upstream)",
494+
mainRoute.Upstream.DefaultCluster)
495+
assert.Equal(t, mainRoute.Upstream.ClusterKey, mainRoute.Upstream.DefaultCluster,
496+
"DefaultCluster and ClusterKey must be the same string")
497+
}
498+
499+
// TestRestAPITransformer_APILevelURLStableAcrossURLEdit asserts that editing the
500+
// API-level main upstream URL does NOT change the cluster name. This is the
501+
// URL-stable contract: the route keeps pointing at the same named cluster and
502+
// name-keyed stats stay continuous across URL edits.
503+
func TestRestAPITransformer_APILevelURLStableAcrossURLEdit(t *testing.T) {
504+
transformer := NewRestAPITransformer(testRouterCfg(), &config.Config{}, map[string]models.PolicyDefinition{})
505+
506+
cfgA := makeRestAPIWithOps([]api.Operation{{Method: "GET", Path: "/users"}})
507+
rdcA, err := transformer.Transform(cfgA)
508+
require.NoError(t, err)
509+
510+
cfgB := makeRestAPIWithOps([]api.Operation{{Method: "GET", Path: "/users"}})
511+
specB := cfgB.Configuration.(api.RestAPI)
512+
specB.Spec.Upstream.Main.Url = ptrStr("http://api-main-v2:9090")
513+
cfgB.Configuration = specB
514+
rdcB, err := transformer.Transform(cfgB)
515+
require.NoError(t, err)
516+
517+
nameA := rdcA.Routes["GET|/test/users|main.local"].Upstream.ClusterKey
518+
nameB := rdcB.Routes["GET|/test/users|main.local"].Upstream.ClusterKey
519+
assert.Equal(t, nameA, nameB,
520+
"API-level main cluster name must not depend on URL "+
521+
"(URL-stable contract: the name must survive URL edits)")
522+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/*
2+
* Copyright (c) 2026, WSO2 LLC. (https://www.wso2.com).
3+
*
4+
* WSO2 LLC. licenses this file to you under the Apache License,
5+
* Version 2.0 (the "License"); you may not use this file except
6+
* in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing,
12+
* software distributed under the License is distributed on an
13+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
* KIND, either express or implied. See the License for the
15+
* specific language governing permissions and limitations
16+
* under the License.
17+
*/
18+
19+
// Package clusterkey produces deterministic, hex-encoded cluster-key fragments
20+
// used by the gateway-controller to name Envoy clusters. It is a leaf package
21+
// (stdlib imports only) so both xDS builders (pkg/xds and pkg/transform) can
22+
// share one naming source without import cycles.
23+
package clusterkey
24+
25+
import (
26+
"crypto/sha256"
27+
"encoding/hex"
28+
)
29+
30+
// APILevel returns a deterministic, hex-encoded cluster-key fragment for an
31+
// API-level upstream cluster. The key is derived from SHA-256 of the apiID
32+
// alone, so an API's main and sandbox clusters share the same fragment and an
33+
// operator can pair them at a glance; the env prefix the caller prepends
34+
// ("main_"/"sandbox_") distinguishes them. The backend URL is deliberately
35+
// excluded from the input so the cluster NAME stays stable across URL edits:
36+
// host, port, or scheme changes reach Envoy as an update to the same named
37+
// cluster (warmed and swapped) instead of removing one cluster name and
38+
// adding another, and path-only changes touch just the route rewrite, leaving
39+
// the cluster untouched. Routes and name-keyed stats stay continuous either
40+
// way. Cross-API uniqueness rests on the 8-byte (64-bit) truncation, which
41+
// makes collisions between distinct apiIDs cryptographically unlikely, not
42+
// impossible.
43+
func APILevel(apiID string) string {
44+
sum := sha256.Sum256([]byte(apiID))
45+
return hex.EncodeToString(sum[:8])
46+
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/*
2+
* Copyright (c) 2026, WSO2 LLC. (https://www.wso2.com).
3+
*
4+
* WSO2 LLC. licenses this file to you under the Apache License,
5+
* Version 2.0 (the "License"); you may not use this file except
6+
* in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing,
12+
* software distributed under the License is distributed on an
13+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
* KIND, either express or implied. See the License for the
15+
* specific language governing permissions and limitations
16+
* under the License.
17+
*/
18+
19+
package clusterkey
20+
21+
import (
22+
"regexp"
23+
"testing"
24+
25+
"github.com/stretchr/testify/assert"
26+
)
27+
28+
// hexShape16 matches exactly 16 lowercase hex characters - the cluster-key
29+
// fragment shape produced by APILevel.
30+
var hexShape16 = regexp.MustCompile("^[a-f0-9]{16}$")
31+
32+
// TestAPILevel validates the contract for API-level cluster naming:
33+
// - deterministic for identical input
34+
// - distinct for distinct apiIDs
35+
// - 16 hex chars from SHA-256[:8]
36+
//
37+
// The fragment is shared by an API's main and sandbox clusters by design (the
38+
// env prefix the caller prepends provides the distinction), so an operator
39+
// can pair an API's clusters at a glance.
40+
func TestAPILevel(t *testing.T) {
41+
t.Run("deterministic for identical input", func(t *testing.T) {
42+
a := APILevel("api-1")
43+
b := APILevel("api-1")
44+
assert.Equal(t, a, b, "same input must produce same hash")
45+
assert.Regexp(t, hexShape16, a, "hash must be exactly 16 lowercase hex characters")
46+
})
47+
48+
t.Run("different apiID produces different hash", func(t *testing.T) {
49+
a := APILevel("api-1")
50+
b := APILevel("api-2")
51+
assert.NotEqual(t, a, b)
52+
})
53+
}

0 commit comments

Comments
 (0)