Skip to content

Commit 101a48d

Browse files
committed
new: implement short lived conn filter
Signed-off-by: Andrea Terzolo <andreaterzolo3@gmail.com>
1 parent 910effa commit 101a48d

5 files changed

Lines changed: 41 additions & 8 deletions

File tree

checks/net_pod_correlation.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ func (pi *podCorrelationInfo) startKubernetesInformer(cfg *config.PodCorrelation
173173
return fmt.Errorf("failed to create kubernetes informer: %w", err)
174174
}
175175
// if needed we can configure the refresh interval for the deleted pods cache
176-
pi.observer, err = kube.NewObserver(prometheus.DefaultRegisterer, kube.WithPodDebugEndpoint())
176+
pi.observer, err = kube.NewObserver(prometheus.DefaultRegisterer, kube.WithPodDebugEndpoint(), kube.WithShortLivedConnectionsInterval(cfg.ShortLivedConnectionsInterval))
177177
if err != nil {
178178
return fmt.Errorf("failed to create kubernetes observer: %w", err)
179179
}
@@ -336,6 +336,11 @@ func (pi *podCorrelationInfo) processConnections(conns []network.ConnectionStats
336336
continue
337337
}
338338

339+
// skip short lived connections
340+
if pi.observer.FilterShortLivedConnection(conn.Duration) {
341+
continue
342+
}
343+
339344
// Possible cases
340345
// 1. Pod -> Pod
341346
// 2. Pod -> Pod HostNetwork == Pod -> Host

config/config.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ type PodCorrelationConfig struct {
7373
Exporter ExporterConfig
7474
// List of attribute keys to include in the correlation
7575
AttributesKeys []string
76+
// Interval to consider a connection short-lived
77+
ShortLivedConnectionsInterval time.Duration
7678
}
7779

7880
// NetworkTracerConfig contains some[1] of the network tracer configuration options
@@ -288,7 +290,8 @@ func NewDefaultAgentConfig() *AgentConfig {
288290
Endpoint: "",
289291
Interval: 0,
290292
},
291-
AttributesKeys: []string{},
293+
AttributesKeys: []string{},
294+
ShortLivedConnectionsInterval: 0,
292295
},
293296
},
294297

@@ -707,6 +710,7 @@ func mergeEnvironmentVariables(c *AgentConfig) *AgentConfig {
707710
c.NetworkTracer.PodCorrelation.Exporter.Type = translateExporterType(os.Getenv("STS_POD_CORRELATION_EXPORTER_TYPE"))
708711
c.NetworkTracer.PodCorrelation.Exporter.Endpoint = os.Getenv("STS_POD_CORRELATION_EXPORTER_OTLP_ENDPOINT")
709712
c.NetworkTracer.PodCorrelation.Exporter.Interval, _ = time.ParseDuration(os.Getenv("STS_POD_CORRELATION_EXPORTER_INTERVAL"))
713+
c.NetworkTracer.PodCorrelation.ShortLivedConnectionsInterval, _ = time.ParseDuration(os.Getenv("STS_POD_CORRELATION_SHORT_LIVED_CONNECTIONS_INTERVAL"))
710714
}
711715

712716
return c

config/yaml_config.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,8 @@ type YamlAgentConfig struct {
168168
Endpoint string `yaml:"endpoint"`
169169
Interval int `yaml:"interval"`
170170
} `yaml:"exporter"`
171-
Attributes []string `yaml:"attributes"`
171+
Attributes []string `yaml:"attributes"`
172+
ShortLivedConnectionsInterval int `yaml:"short_lived_connections_interval"`
172173
} `yaml:"pod_correlation"`
173174
} `yaml:"network_tracer_config"`
174175
TransactionManager struct {
@@ -456,4 +457,5 @@ func validatePodCorrelationConfig(agentConf *AgentConfig, networkConf *YamlAgent
456457
agentConf.NetworkTracer.PodCorrelation.Exporter.Type = translateExporterType(networkConf.Network.PodCorrelation.Exporter.Type)
457458
agentConf.NetworkTracer.PodCorrelation.Exporter.Interval = time.Duration(networkConf.Network.PodCorrelation.Exporter.Interval) * time.Second
458459
agentConf.NetworkTracer.PodCorrelation.Exporter.Endpoint = networkConf.Network.PodCorrelation.Exporter.Endpoint
460+
agentConf.NetworkTracer.PodCorrelation.ShortLivedConnectionsInterval = time.Duration(networkConf.Network.PodCorrelation.ShortLivedConnectionsInterval) * time.Second
459461
}

k8s-deployment/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ SETUP_AGENT_HELM_CHART_DIR ?= ~/personal/helm-charts/stable/suse-observability-a
55
# SETUP_TYPE can be 'local' or 'remote' (default: local)
66
SETUP_TYPE ?= local
77
SETUP_API_KEY ?= null
8-
SETUP_CLUSTER_NAME ?= minikube-local-setup
8+
SETUP_CLUSTER_NAME ?= local-setup
99
SETUP_RECEIVER_ENDPOINT ?= http://test-server-service:7077/stsAgent
1010
# To be set only in remote mode
1111
SETUP_OTEL_ENDPOINT ?= null

pkg/kube/observer.go

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@ import (
1717
)
1818

1919
const (
20-
defaultDeletePodsAfter = 2 * time.Minute
21-
defaultCleanCacheInterval = 10 * time.Minute
22-
defaultMaxEstimatedCPLatency = 5 * time.Second
20+
defaultDeletePodsAfter = 2 * time.Minute
21+
defaultCleanCacheInterval = 10 * time.Minute
22+
defaultMaxEstimatedCPLatency = 5 * time.Second
23+
defaultShortLivedConnInterval = 0
2324

2425
prometheusNamespace = "stackstate_process_agent"
2526
prometheusSubsystem = "observer"
@@ -54,6 +55,8 @@ type Observer struct {
5455
maxControlPlaneLatency int64
5556
// just used for testing
5657
nowFunc func() time.Time
58+
// if the connection is younger than this interval, we consider it a short lived connection and we don't try to correlate it.
59+
shortLivedConnInterval time.Duration
5760

5861
// Metrics
5962
controlPlaneLatency prometheus.Histogram
@@ -114,6 +117,12 @@ func WithPodDebugEndpoint() ObserverOption {
114117
}
115118
}
116119

120+
func WithShortLivedConnectionsInterval(interval time.Duration) ObserverOption {
121+
return func(o *Observer) {
122+
o.shortLivedConnInterval = interval
123+
}
124+
}
125+
117126
// NewObserver creates a new Observer instance.
118127
func NewObserver(reg prometheus.Registerer, opts ...ObserverOption) (*Observer, error) {
119128
// we need the boot time because all what we receive from ebpf is the time in nanoseconds since boot
@@ -131,6 +140,7 @@ func NewObserver(reg prometheus.Registerer, opts ...ObserverOption) (*Observer,
131140
cleanCacheInterval: defaultCleanCacheInterval,
132141
deletePodsAfter: defaultDeletePodsAfter,
133142
nowFunc: time.Now,
143+
shortLivedConnInterval: defaultShortLivedConnInterval,
134144

135145
controlPlaneLatency: prometheus.NewHistogram(prometheus.HistogramOpts{
136146
Namespace: prometheusNamespace,
@@ -198,7 +208,7 @@ func NewObserver(reg prometheus.Registerer, opts ...ObserverOption) (*Observer,
198208
// update it after we apply the options, since nowFunc can be overridden
199209
obs.lastCacheClean = obs.nowFunc()
200210

201-
log.Infof("Observer created with clean cache interval: %s, delete pods after: %s", obs.cleanCacheInterval, obs.deletePodsAfter)
211+
log.Infof("Observer created (clean cache interval: %s), (delete pods after: %s), (short lived connections interval: %s)", obs.cleanCacheInterval, obs.deletePodsAfter, obs.shortLivedConnInterval)
202212
return obs, nil
203213
}
204214

@@ -352,6 +362,18 @@ func (o *Observer) ConnectionNeedsRetry(nsFromBoot time.Duration) bool {
352362
return false
353363
}
354364

365+
// FilterShortLivedConnection returns true if the connection is short lived and should be ignored.
366+
func (o *Observer) FilterShortLivedConnection(nsFromBoot time.Duration) bool {
367+
// if the interval is not configured we return false
368+
if o.shortLivedConnInterval == 0 {
369+
return false
370+
}
371+
372+
connCreationTime := o.bootTime + int64(nsFromBoot.Seconds())
373+
// if the connection is younger than the configured interval we consider it a short lived connection
374+
return connCreationTime > (o.nowFunc().Unix() - int64(o.shortLivedConnInterval.Seconds()))
375+
}
376+
355377
// ResolvePodsByIPs resolves the pods by their IPs, returning the PodInfo for each IP.
356378
func (o *Observer) ResolvePodsByIPs(srcIP, dstIP util.Address, nsFromBoot time.Duration) (*PodInfo, *PodInfo) {
357379
o.access.RLock()

0 commit comments

Comments
 (0)