Skip to content

Commit bb8fd87

Browse files
committed
Add api gateway peering unit test cases. (#23366)
* add unit test cases for api gateway supporting consul peering * add compiled xds config golden files after running api-gw golden testcases * fix(proxyCfg): propogate meshGatewayConfig to upstreams of API gateway (#23369) * fix(proxyCfg): propogate meshGatewayConfig to upstreams of API gateway - Fixed handleRouteConfigUpdate to properly propagate meshGatewayConfig to API gateway upstreams, which is required during XDS endpoint and cluster config generation. - Added TestStateChangedAPIGateway test cases in state_test.go to validate API gateway update handling. - Added API gateway-specific logging prefix (similar to mesh gateway) to help in debugging. * fix(xds): correct endpoint config generation for API gateway in peered setups (#23370) * fix(xds): correct endpoint config generation for API gateway in peered setups - Previously, API gateway XDS endpoint generation incorrectly relied on cfgSnap.ConnectProxy config (instead of cgfSnap.APIGateway), which caused wrong/no endpoint configuration for peered environments. - Changes made: - Updated makeUpstreamLoadAssignmentForPeerService to fetch localGatewayEndpoint based on cfgSnap kind instead of always using cfgSnap.ConnectProxy. - Updated endpointsFromDiscoveryChain to derive meshGatewayMode based on cfgSnap kind instead of always using cfgSnap.ConnectProxy. - Recompiled golden test file to reflect fix. * removed comment * fix(xds): correct cluster config generation for API gateway in peered setups (#23371) * fix(xds): correct cluster config generation for API gateway in peered setups - Updated makeUpstreamClustersForDiscoveryChain to generate cluster config based on upstream endpoint type. Before this fix, it always generated cluster configs without endpoints, which is incorrect when the upstream endpoint type is hostname and mesh-gateway mode is remote; in such cases, endpoints must also be included in the cluster config. - Added recompiled golden test file to reflect the fix. * fix lint error
1 parent 9cdab16 commit bb8fd87

28 files changed

Lines changed: 1304 additions & 35 deletions

File tree

agent/proxycfg/api_gateway.go

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,19 @@ func (h *handlerAPIGateway) handleRouteConfigUpdate(ctx context.Context, u Updat
367367
for _, rule := range route.Rules {
368368
for _, service := range rule.Services {
369369
effectiveLimits := apiGatewayEffectiveUpstreamLimits(defaultLimits, service.Limits)
370+
371+
// Retrieving the meshGatewayConfig from handlerAPIGateway instance.
372+
// `handlerAPIGateway` embeds `handlerState`, which exposes `serviceInstance.proxyCfg`.
373+
// serviceInstance.proxyCfg.MeshGateway is replicated from NodeService during state setup/update.
374+
// and NodeService populated for all gateway's during service resistration `AgentRegisterService`.
375+
//
376+
// So, Whenever any change happens in NodeService, proxyCfg manager will recreate
377+
// the state where it copies NodeService to serviceInstance and
378+
// then calls this api_gateway handleUpdates method.
379+
// which will update the Mesh-Gateway config to api_gateway upstreams (below).
380+
// h.service = <name of api-gateway>
381+
meshGatewayConfig := h.proxyCfg.MeshGateway
382+
370383
for _, listener := range snap.APIGateway.Listeners {
371384
shouldBind := false
372385
for _, parent := range route.Parents {
@@ -393,6 +406,11 @@ func (h *handlerAPIGateway) handleRouteConfigUpdate(ctx context.Context, u Updat
393406
// Pass the protocol that was configured on the listener in order
394407
// to force that protocol on the Envoy listener.
395408
Config: upstreamCfg,
409+
410+
// Propogate the meshGatewayConfig in api gateway upstreams
411+
// so that meshGatewayMode can be used in XDS for
412+
// endpoints and cluster config generation.
413+
MeshGateway: meshGatewayConfig,
396414
}
397415

398416
listenerKey := APIGatewayListenerKeyFromListener(listener)
@@ -425,6 +443,7 @@ func (h *handlerAPIGateway) handleRouteConfigUpdate(ctx context.Context, u Updat
425443

426444
for _, service := range route.Services {
427445
effectiveLimits := apiGatewayEffectiveUpstreamLimits(defaultLimits, service.Limits)
446+
meshGatewayConfig := h.proxyCfg.MeshGateway
428447
upstreamID := NewUpstreamIDFromServiceName(service.ServiceName())
429448
seenUpstreamIDs.add(upstreamID)
430449

@@ -454,7 +473,8 @@ func (h *handlerAPIGateway) handleRouteConfigUpdate(ctx context.Context, u Updat
454473
LocalBindPort: listener.Port,
455474
// Pass the protocol that was configured on the ingress listener in order
456475
// to force that protocol on the Envoy listener.
457-
Config: upstreamCfg,
476+
Config: upstreamCfg,
477+
MeshGateway: meshGatewayConfig,
458478
}
459479

460480
listenerKey := APIGatewayListenerKeyFromListener(listener)

agent/proxycfg/state.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ func newKindHandler(config stateConfig, s serviceInstance, ch chan UpdateEvent)
226226
case structs.ServiceKindIngressGateway:
227227
handler = &handlerIngressGateway{handlerState: h}
228228
case structs.ServiceKindAPIGateway:
229+
h.logger = config.logger.Named(logging.APIGateway)
229230
handler = &handlerAPIGateway{handlerState: h}
230231
default:
231232
return nil, errors.New("not a connect-proxy, terminating-gateway, mesh-gateway, or ingress-gateway")

agent/proxycfg/state_test.go

Lines changed: 126 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ import (
2626
"github.com/hashicorp/consul/sdk/testutil"
2727
)
2828

29-
func TestStateChanged(t *testing.T) {
29+
func TestStateChangedConnectProxy(t *testing.T) {
3030
tests := []struct {
3131
name string
3232
ns *structs.NodeService
@@ -108,6 +108,131 @@ func TestStateChanged(t *testing.T) {
108108
},
109109
want: true,
110110
},
111+
{
112+
name: "different proxy mesh gateway mode",
113+
ns: structs.TestNodeServiceAPIGateway(t),
114+
token: "foo",
115+
mutate: func(ns structs.NodeService, token string) (*structs.NodeService, string) {
116+
ns.Proxy.MeshGateway.Mode = structs.MeshGatewayModeLocal
117+
return &ns, token
118+
},
119+
want: true,
120+
},
121+
}
122+
123+
for _, tt := range tests {
124+
t.Run(tt.name, func(t *testing.T) {
125+
proxyID := ProxyID{ServiceID: tt.ns.CompoundServiceID()}
126+
state, err := newState(proxyID, tt.ns, testSource, tt.token, stateConfig{logger: hclog.New(nil)}, rate.NewLimiter(rate.Inf, 1))
127+
require.NoError(t, err)
128+
otherNS, otherToken := tt.mutate(*tt.ns, tt.token)
129+
require.Equal(t, tt.want, state.Changed(otherNS, otherToken))
130+
})
131+
}
132+
}
133+
134+
func TestStateChangedAPIGateway(t *testing.T) {
135+
tests := []struct {
136+
name string
137+
ns *structs.NodeService
138+
token string
139+
mutate func(ns structs.NodeService, token string) (*structs.NodeService, string)
140+
want bool
141+
}{
142+
{
143+
name: "nil node service",
144+
ns: structs.TestNodeServiceAPIGateway(t),
145+
mutate: func(ns structs.NodeService, token string) (*structs.NodeService, string) {
146+
return nil, token
147+
},
148+
want: true,
149+
},
150+
{
151+
name: "same service",
152+
ns: structs.TestNodeServiceAPIGateway(t),
153+
mutate: func(ns structs.NodeService, token string) (*structs.NodeService, string) {
154+
return &ns, token
155+
}, want: false,
156+
},
157+
{
158+
name: "same service, different token",
159+
ns: structs.TestNodeServiceAPIGateway(t),
160+
token: "foo",
161+
mutate: func(ns structs.NodeService, token string) (*structs.NodeService, string) {
162+
return &ns, "bar"
163+
},
164+
want: true,
165+
},
166+
{
167+
name: "different address",
168+
ns: structs.TestNodeServiceAPIGateway(t),
169+
token: "foo",
170+
mutate: func(ns structs.NodeService, token string) (*structs.NodeService, string) {
171+
ns.Address = "10.10.10.10"
172+
return &ns, token
173+
},
174+
want: true,
175+
},
176+
{
177+
name: "different port",
178+
ns: structs.TestNodeServiceAPIGateway(t),
179+
token: "foo",
180+
mutate: func(ns structs.NodeService, token string) (*structs.NodeService, string) {
181+
ns.Port = 12345
182+
return &ns, token
183+
},
184+
want: true,
185+
},
186+
{
187+
name: "different service kind",
188+
ns: structs.TestNodeServiceAPIGateway(t),
189+
token: "foo",
190+
mutate: func(ns structs.NodeService, token string) (*structs.NodeService, string) {
191+
ns.Kind = ""
192+
return &ns, token
193+
},
194+
want: true,
195+
},
196+
{
197+
name: "different proxy target",
198+
ns: structs.TestNodeServiceAPIGateway(t),
199+
token: "foo",
200+
mutate: func(ns structs.NodeService, token string) (*structs.NodeService, string) {
201+
ns.Proxy.DestinationServiceName = "badger"
202+
return &ns, token
203+
},
204+
want: true,
205+
},
206+
{
207+
name: "different proxy upstreams",
208+
ns: structs.TestNodeServiceAPIGateway(t),
209+
token: "foo",
210+
mutate: func(ns structs.NodeService, token string) (*structs.NodeService, string) {
211+
ns.Proxy.Upstreams = nil
212+
return &ns, token
213+
},
214+
want: true,
215+
},
216+
{
217+
name: "different mesh gateway mode (local)",
218+
ns: structs.TestNodeServiceAPIGateway(t),
219+
token: "foo",
220+
mutate: func(ns structs.NodeService, token string) (*structs.NodeService, string) {
221+
ns.Proxy.MeshGateway.Mode = structs.MeshGatewayModeLocal
222+
return &ns, token
223+
},
224+
want: true,
225+
},
226+
{
227+
name: "different mesh gateway mode (remote)",
228+
ns: structs.TestNodeServiceAPIGateway(t),
229+
token: "foo",
230+
mutate: func(ns structs.NodeService, token string) (*structs.NodeService, string) {
231+
ns.Proxy.MeshGateway.Mode = structs.MeshGatewayModeRemote
232+
return &ns, token
233+
},
234+
want: true,
235+
},
111236
}
112237

113238
for _, tt := range tests {

agent/structs/testing_catalog.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,10 +176,35 @@ func TestNodeServiceMeshGateway(t testing.T) *NodeService {
176176
}
177177

178178
func TestNodeServiceAPIGateway(t testing.T) *NodeService {
179+
entMeta := DefaultEnterpriseMetaInPartition("")
179180
return &NodeService{
180181
Kind: ServiceKindAPIGateway,
181182
Service: "api-gateway",
182183
Address: "1.1.1.1",
184+
185+
// ---------------------------------------
186+
// Adding TestConnectProxyConfig to the proxy field here within TestNodeServiceAPIGateway
187+
// to test whether APIGateway is able to handle state changes within ConnectProxyConfig (TestStateChangedAPIGateway).
188+
// Please note:
189+
// The naming may suggest that ConnectProxyConfig should only be used for ConnectProxy,
190+
// but "APIGateway state" uses serviceInstance, which embeds ConnectProxyConfig as part of its state,
191+
// so any changes to ConnectProxyConfig will also impact APIGateway, such as change in "mesh gateway mode".
192+
//
193+
// For example, let's say a user updates the mesh gateway mode of an API gateway,
194+
// First NodeService.Proxy will be updated and then proxyCfg manager detects change in config
195+
// and it recreates the state for api_gateway which would copy the
196+
// NodeService.Proxy.MeshGateway to serviceInstance.proxyCfg.MeshGateway in `newServiceInstanceFromNodeService` (serviceInstance is part of state)
197+
// and then proxyCfg manager calls the api_gateway handleUpdates method which would
198+
// update the api_gateway upstreams with new meshGateway config.
199+
//
200+
// Now, this serviceInstance.proxyCfg and NodeService.Proxy
201+
// refers to same proxy configuration, which is of type ConnectProxyConfig.
202+
203+
// So, we need to test it as well.
204+
// ---------------------------------------
205+
206+
Proxy: TestConnectProxyConfig(t),
207+
EnterpriseMeta: *entMeta,
183208
}
184209
}
185210

agent/xds/clusters.go

Lines changed: 116 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1656,25 +1656,47 @@ func (s *ResourceGenerator) makeUpstreamClustersForDiscoveryChain(
16561656
for _, groupedTarget := range targetGroups {
16571657
s.Logger.Debug("generating cluster for", "cluster", groupedTarget.ClusterName)
16581658

1659+
// Now this makeUpstreamClusterForDiscoveryChain, is a generic method
1660+
// and used by connect proxy, ingress gateway and api gateway.
1661+
//
1662+
// Issue: This method always make cluster (without endpoints).
1663+
// `ClusterDiscoveryType: &envoy_cluster_v3.Cluster_Type{Type: envoy_cluster_v3.Cluster_EDS},`
1664+
//
1665+
// Envoy Exception:
1666+
// As we know that any service whose upstream endpoint is of type hostname,
1667+
// envoy cannot resolve hostname as EDS.
1668+
// So, we need to use CDS to send endpoints as well along with cluster configs.
1669+
//
1670+
// Context:
1671+
// When we have 2 consul DC peered with mesh gw ON AWS and
1672+
// we have API gw on DC1 which need to access a service X which exist on DC2.
1673+
// Also, API gateway is configured to in mesh-gw remote mode.
1674+
// In this case, API gateway upstream would be DC2's mesh-gateway and
1675+
// we configure API GW envoy upstream endpoint to it.
1676+
// The problem is envoy exception and AWS mesh-gw lb type.
1677+
// AWS generates hostname based endpoints for mesh-gw lb endpoint and
1678+
// when we have hostname based endpoints envoy cannot resolve it via EDS,
1679+
// So we configure that endpoint via CDS.
1680+
//
1681+
// If not fixed, whenever any service (gateway) whose upstream endpoint is of hostname type,
1682+
// cluster endpoints will be empty and envoy will fail to route traffic to that cluster.
1683+
//
1684+
// Fix: Add logic to check if we should create a cluster config
1685+
// without upstream endpoint or with upstream endpoint (hostnames)
1686+
// based on upstream endpoint type.
1687+
//
1688+
// You can refer makeUpstreamClusterForPeerService - used by connect proxy for similar logic.
1689+
// or makeGatewayCluster - used by mesh gw for peering services.
1690+
16591691
c := &envoy_cluster_v3.Cluster{
1660-
Name: groupedTarget.ClusterName,
1661-
AltStatName: groupedTarget.ClusterName,
1662-
ConnectTimeout: durationpb.New(node.Resolver.ConnectTimeout),
1663-
ClusterDiscoveryType: &envoy_cluster_v3.Cluster_Type{Type: envoy_cluster_v3.Cluster_EDS},
1692+
Name: groupedTarget.ClusterName,
1693+
AltStatName: groupedTarget.ClusterName,
1694+
ConnectTimeout: durationpb.New(node.Resolver.ConnectTimeout),
16641695
CommonLbConfig: &envoy_cluster_v3.Cluster_CommonLbConfig{
16651696
HealthyPanicThreshold: &envoy_type_v3.Percent{
16661697
Value: 0, // disable panic threshold
16671698
},
16681699
},
1669-
EdsClusterConfig: &envoy_cluster_v3.Cluster_EdsClusterConfig{
1670-
EdsConfig: &envoy_core_v3.ConfigSource{
1671-
InitialFetchTimeout: cfgSnap.GetXDSCommonConfig(s.Logger).GetXDSFetchTimeout(),
1672-
ResourceApiVersion: envoy_core_v3.ApiVersion_V3,
1673-
ConfigSourceSpecifier: &envoy_core_v3.ConfigSource_Ads{
1674-
Ads: &envoy_core_v3.AggregatedConfigSource{},
1675-
},
1676-
},
1677-
},
16781700
// TODO(peering): make circuit breakers or outlier detection work?
16791701
CircuitBreakers: &envoy_cluster_v3.CircuitBreakers{
16801702
Thresholds: makeThresholdsIfNeeded(upstreamConfig.Limits),
@@ -1704,13 +1726,67 @@ func (s *ResourceGenerator) makeUpstreamClustersForDiscoveryChain(
17041726
default:
17051727
return nil, fmt.Errorf("cannot have more than one target")
17061728
}
1729+
1730+
targetInfo := groupedTarget.Targets[0]
1731+
targetUID := proxycfg.NewUpstreamIDFromTargetID(targetInfo.TargetID)
1732+
1733+
meshGatewayMode, err := s.getMeshGatewayMode(cfgSnap, upstream, targetUID, groupedTarget.ClusterName)
1734+
if err != nil {
1735+
s.Logger.Error(err.Error(), "cluster", groupedTarget.ClusterName)
1736+
}
1737+
1738+
// Check if cluster need to be configured with hostnames or not.
1739+
useEDS := true
1740+
if targetUID.Peer != "" {
1741+
if _, ok := upstreamsSnapshot.PeerUpstreamEndpointsUseHostnames[targetUID]; ok {
1742+
// If we're using local mesh gw, the fact that upstreams use hostnames doesn't matter.
1743+
// If we're not using local mesh gw, then resort to CDS/DNS.
1744+
if meshGatewayMode != structs.MeshGatewayModeLocal {
1745+
useEDS = false
1746+
}
1747+
}
1748+
}
1749+
1750+
if useEDS {
1751+
c.ClusterDiscoveryType = &envoy_cluster_v3.Cluster_Type{Type: envoy_cluster_v3.Cluster_EDS}
1752+
c.EdsClusterConfig = &envoy_cluster_v3.Cluster_EdsClusterConfig{
1753+
EdsConfig: &envoy_core_v3.ConfigSource{
1754+
InitialFetchTimeout: cfgSnap.GetXDSCommonConfig(s.Logger).GetXDSFetchTimeout(),
1755+
ResourceApiVersion: envoy_core_v3.ApiVersion_V3,
1756+
ConfigSourceSpecifier: &envoy_core_v3.ConfigSource_Ads{
1757+
Ads: &envoy_core_v3.AggregatedConfigSource{},
1758+
},
1759+
},
1760+
}
1761+
} else {
1762+
hostnameEndpoints, ok := upstreamsSnapshot.PeerUpstreamEndpoints.Get(targetUID)
1763+
if !ok || len(hostnameEndpoints) == 0 {
1764+
// The upstream snapshot should deliver hostname endpoints soon; skip this cluster until then.
1765+
s.Logger.Debug("peer hostname endpoints not ready for discovery chain target",
1766+
"target", targetInfo.TargetID,
1767+
"upstream", targetUID,
1768+
"cluster", groupedTarget.ClusterName,
1769+
)
1770+
continue
1771+
}
1772+
c.EdsClusterConfig = nil
1773+
configureClusterWithHostnames(
1774+
s.Logger,
1775+
c,
1776+
"", /*TODO: should make configurable ? */
1777+
hostnameEndpoints,
1778+
true, /*isRemote*/
1779+
false, /*onlyPassing*/
1780+
)
1781+
}
1782+
17071783
var handled bool
17081784
out, handled, err = s.appendEntDiscoveryChainTargetClusters(out, cfgSnap, upstreamsSnapshot, uid, chain, groupedTarget, c, forMeshGateway)
17091785
if err != nil {
17101786
return nil, err
17111787
}
1712-
if !handled {
17131788

1789+
if !handled {
17141790
if targetInfo := groupedTarget.Targets[0]; targetInfo.TLSContext != nil {
17151791
transportSocket, err := makeUpstreamTLSTransportSocket(targetInfo.TLSContext)
17161792
if err != nil {
@@ -1739,6 +1815,32 @@ func (s *ResourceGenerator) makeUpstreamClustersForDiscoveryChain(
17391815
return out, nil
17401816
}
17411817

1818+
func (s *ResourceGenerator) getMeshGatewayMode(
1819+
cfgSnap *proxycfg.ConfigSnapshot, upstream *structs.Upstream,
1820+
targetUID proxycfg.UpstreamID, clusterName string,
1821+
) (structs.MeshGatewayMode, error) {
1822+
defaultMode := structs.MeshGatewayModeDefault
1823+
switch cfgSnap.Kind {
1824+
case structs.ServiceKindConnectProxy:
1825+
upstreamConfig, _ := cfgSnap.ConnectProxy.
1826+
GetUpstream(targetUID, &cfgSnap.ProxyID.EnterpriseMeta)
1827+
if upstreamConfig != nil {
1828+
return upstreamConfig.MeshGateway.Mode, nil
1829+
}
1830+
return defaultMode, nil
1831+
case structs.ServiceKindAPIGateway,
1832+
structs.ServiceKindIngressGateway:
1833+
if upstream != nil {
1834+
return upstream.MeshGateway.Mode, nil
1835+
}
1836+
return defaultMode, nil
1837+
case structs.ServiceKindMeshGateway:
1838+
// Mesh Gateway mesh mode will always be remote.
1839+
return structs.MeshGatewayModeRemote, nil
1840+
}
1841+
return structs.MeshGatewayModeDefault, fmt.Errorf("unexpected service kind %q when determining mesh gateway mode for cluster %q", cfgSnap.Kind, clusterName)
1842+
}
1843+
17421844
func (s *ResourceGenerator) makeExportedUpstreamClustersForMeshGateway(cfgSnap *proxycfg.ConfigSnapshot) ([]proto.Message, error) {
17431845
// NOTE: Despite the mesh gateway already having one cluster per service
17441846
// (and subset) in the local datacenter we cannot reliably use those to

0 commit comments

Comments
 (0)