Skip to content

Commit 685b648

Browse files
committed
new: add a cache to avoid conntrack resolution at each iteration
Signed-off-by: Andrea Terzolo <andreaterzolo3@gmail.com>
1 parent 1b15efe commit 685b648

1 file changed

Lines changed: 31 additions & 3 deletions

File tree

checks/net_pod_correlation.go

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"github.com/StackVista/stackstate-process-agent/pkg/kube"
1818
"github.com/StackVista/stackstate-process-agent/pkg/telemetry"
1919
log "github.com/cihub/seelog"
20+
lru "github.com/hashicorp/golang-lru/v2"
2021
"github.com/prometheus/client_golang/prometheus"
2122
"go.opentelemetry.io/obi/pkg/kubecache/meta"
2223
"go.opentelemetry.io/otel/attribute"
@@ -62,6 +63,11 @@ const (
6263
RemoteLabelsKey = "remote.pod.label"
6364
)
6465

66+
const (
67+
// at the moment this is not configurable by the user
68+
clusterIPCacheDim = 10000
69+
)
70+
6571
var (
6672
// Keep this updated with all the possible keys
6773
AllAttributeKeys = []string{
@@ -103,6 +109,8 @@ type podCorrelationInfo struct {
103109
rootNSIno uint32
104110
attributesKeys []string
105111
rootCtrk *netlink.Conntrack
112+
// we cannot do clusterIP -> Pod because the same clusterIP can match multiple pods according to the connection
113+
clusterIPResolution *lru.Cache[network.ConnectionTuple, *kube.PodInfo]
106114
}
107115

108116
func validateAttributeKeys(keys []string) ([]string, error) {
@@ -177,6 +185,13 @@ func newPodCorrelationInfo(cfg *config.PodCorrelationConfig) (*podCorrelationInf
177185
if podCorrelationInfo.metrics, err = telemetry.NewMetricsExporter(cfg.Exporter); err != nil {
178186
return nil, err
179187
}
188+
189+
// LRU cache to avoid querying the conntrack table for every connection
190+
podCorrelationInfo.clusterIPResolution, err = lru.New[network.ConnectionTuple, *kube.PodInfo](clusterIPCacheDim)
191+
if err != nil {
192+
return nil, fmt.Errorf("could not create cluster IP cache: %v", err)
193+
}
194+
180195
}
181196

182197
return podCorrelationInfo, nil
@@ -330,33 +345,46 @@ func (pi *podCorrelationInfo) tryClusterIpResolution(conn *network.ConnectionSta
330345
}
331346

332347
// dstPodInfo is still nil...
348+
// todo!: possible optimization for the future: we could catch the service CIDR from the k8s api and avoid querying the conntrack table if the dst ip is not in the service cidr!
349+
// at the moment we are quering the conntrack table for every outgoing connection that we are not able to resolve, for example `Pod -> ExternalIP` or `Pod -> Host`, in these case we will never find a resolution.
333350

334351
// if the conntrack ebpf cache is expired, we try to query the conntrack table in the root netns if present. At the moment we don't define it for tests
335352
if pi.rootCtrk == nil {
336353
return nil
337354
}
338355

356+
// Before using the conntrack table we try to use a cache to avoid querying the conntrack table for every connection
357+
dstPodInfo, ok := pi.clusterIPResolution.Get(conn.ConnectionTuple)
358+
if ok {
359+
return dstPodInfo
360+
}
361+
339362
// we convert the tuple in the conntrack format
340363
// we need only the origin field.
341364
origin := netlink.Con{Origin: conntrackOriginTCP(&conn.ConnectionTuple)}
342365
reply, err := (*pi.rootCtrk).Get(&origin)
343366
// if there is no entry we don't return an error we return an empty reply
344367
if err != nil {
345368
log.Warnf("Failed to query conntrack table in root netns for connection %v: %v", conn, err)
346-
return nil
369+
return dstPodInfo
347370
}
348371

372+
// We store the result in the cache even if it is nil (we don't want to query the conntrack table again for the same connection)
373+
defer func() {
374+
pi.clusterIPResolution.Add(conn.ConnectionTuple, dstPodInfo)
375+
}()
376+
349377
// if there are no entries we obtain an empty netlink.Con{}
350378
// we check if the port is 0 to understand if it is empty
351379
if reply.Reply.Src.Port() == 0 {
352-
return nil
380+
return dstPodInfo
353381
}
354382

355383
// Example of a conntrack entry that converts a ClusterIP to Pod IP:
356384
// key: src(10.42.0.10:35926) -> dst(10.43.168.100:5432) --- value: src(10.42.0.9:5432) -> dst(10.42.0.10:35926)
357385
// if there is not natting we are not interested
358386
if conn.ConnectionTuple.Dest.Addr == reply.Reply.Src.Addr() {
359-
return nil
387+
return dstPodInfo
360388
}
361389

362390
// it is possible that we have the translated IP in the reply src field

0 commit comments

Comments
 (0)