Skip to content

Commit d0f3be5

Browse files
authored
Use patched node-agent (#123)
1 parent 0894138 commit d0f3be5

3 files changed

Lines changed: 190 additions & 3 deletions

File tree

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
name: Node Agent Patch CI
2+
3+
on:
4+
pull_request:
5+
paths:
6+
- 'ebpf/patches/node-agent/**'
7+
- 'ebpf/Dockerfile'
8+
push:
9+
branches:
10+
- main
11+
paths:
12+
- 'ebpf/patches/node-agent/**'
13+
- 'ebpf/Dockerfile'
14+
15+
jobs:
16+
test-node-agent-patch:
17+
runs-on: ubuntu-22.04
18+
steps:
19+
- uses: actions/checkout@v4
20+
21+
- name: Clone node-agent and apply patches
22+
run: |
23+
NODE_AGENT_VERSION=$(grep 'NODE_AGENT_VERSION=' ebpf/Dockerfile | head -1 | sed 's/.*=//')
24+
git clone --depth 1 --branch ${NODE_AGENT_VERSION} \
25+
https://github.com/coroot/coroot-node-agent.git /tmp/node-agent
26+
cd /tmp/node-agent
27+
git apply ${{ github.workspace }}/ebpf/patches/node-agent/*.patch
28+
29+
- uses: actions/setup-go@v5
30+
with:
31+
go-version: '1.23.8'
32+
cache: false
33+
34+
- uses: actions/cache@v4
35+
with:
36+
path: |
37+
~/go/pkg/mod
38+
~/.cache/go-build
39+
key: node-agent-go-${{ hashFiles('/tmp/node-agent/go.sum') }}
40+
restore-keys: node-agent-go-
41+
42+
- run: sudo apt-get install -y libsystemd-dev
43+
44+
- name: go mod download
45+
working-directory: /tmp/node-agent
46+
run: go mod download
47+
48+
- name: gofmt -l .
49+
working-directory: /tmp/node-agent
50+
run: files=$(gofmt -l .); if [[ -n "$files" ]]; then echo "$files"; exit 1; fi
51+
52+
- name: goimports -l .
53+
working-directory: /tmp/node-agent
54+
run: |
55+
go install golang.org/x/tools/cmd/goimports@latest
56+
files=$(goimports -l .); if [[ -n "$files" ]]; then echo "$files"; exit 1; fi
57+
58+
- name: go vet
59+
working-directory: /tmp/node-agent
60+
run: go vet ./...
61+
62+
- name: go test
63+
working-directory: /tmp/node-agent
64+
run: go test ./...
65+

ebpf/Dockerfile

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,21 @@
1-
# Add Node Agent to the image
2-
FROM ghcr.io/coroot/coroot-node-agent:1.28.3 AS node-agent
1+
# Build Node Agent from source with patches
2+
FROM debian:bullseye AS node-agent-builder
3+
RUN apt-get update && apt-get install -y \
4+
curl git build-essential pkg-config libsystemd-dev
5+
ARG GO_VERSION=1.24.9
6+
RUN curl -fsSL https://go.dev/dl/go${GO_VERSION}.linux-$(dpkg --print-architecture).tar.gz -o go.tar.gz && \
7+
tar -C /usr/local -xzf go.tar.gz && rm go.tar.gz
8+
ENV PATH="/usr/local/go/bin:${PATH}"
9+
ARG CUSTOMIZATION_TAG="betterstack"
10+
ARG NODE_AGENT_VERSION=v1.28.3
11+
RUN git clone --depth 1 --branch ${NODE_AGENT_VERSION} \
12+
https://github.com/coroot/coroot-node-agent.git /tmp/node-agent
13+
WORKDIR /tmp/node-agent
14+
COPY ebpf/patches/node-agent/*.patch ./
15+
RUN git apply *.patch
16+
RUN CGO_ENABLED=1 go build -mod=readonly \
17+
-ldflags "-extldflags='-Wl,-z,lazy' -X 'github.com/coroot/coroot-node-agent/flags.Version=${NODE_AGENT_VERSION}-${CUSTOMIZATION_TAG}'" \
18+
-o /usr/bin/coroot-node-agent .
319

420
# Add Cluster Agent to the image
521
FROM ghcr.io/coroot/coroot-cluster-agent:1.2.4 AS cluster-agent
@@ -48,7 +64,7 @@ COPY --from=obi-source --chmod=755 /tmp/obi /usr/local/bin/ebpf-instrument
4864
COPY --from=obi-source /tmp/LICENSE /tmp/NOTICE /usr/share/doc/ebpf-instrument/
4965

5066
# Copy Node Agent
51-
COPY --from=node-agent --chmod=755 /usr/bin/coroot-node-agent /usr/local/bin/node-agent
67+
COPY --from=node-agent-builder --chmod=755 /usr/bin/coroot-node-agent /usr/local/bin/node-agent
5268

5369
# Copy Cluster Agent
5470
COPY --from=cluster-agent --chmod=755 /usr/bin/coroot-cluster-agent /usr/local/bin/cluster-agent
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
diff --git a/containers/container.go b/containers/container.go
2+
index abb2cfb..5ab58ce 100644
3+
--- a/containers/container.go
4+
+++ b/containers/container.go
5+
@@ -26,7 +26,7 @@ import (
6+
)
7+
8+
var (
9+
- gcInterval = 10 * time.Minute
10+
+ gcInterval = *flags.GCInterval
11+
pingTimeout = 300 * time.Millisecond
12+
multilineCollectorTimeout = time.Second
13+
gpuStatsWindow = 15 * time.Second
14+
diff --git a/containers/registry.go b/containers/registry.go
15+
index b5f6a00..549d532 100644
16+
--- a/containers/registry.go
17+
+++ b/containers/registry.go
18+
@@ -116,7 +116,7 @@ func NewRegistry(reg prometheus.Registerer, processInfoCh chan<- ProcessInfo, gp
19+
20+
processInfoCh: processInfoCh,
21+
22+
- tracer: ebpftracer.NewTracer(hostNetNs, selfNetNs, *flags.DisableL7Tracing),
23+
+ tracer: ebpftracer.NewTracer(hostNetNs, selfNetNs, *flags.DisableL7Tracing, *flags.MaxConnections, *flags.MaxL7Requests),
24+
25+
trafficStatsUpdateCh: make(chan *TrafficStatsUpdate),
26+
nodejsStatsUpdateCh: make(chan *NodejsStatsUpdate),
27+
diff --git a/ebpftracer/tracer.go b/ebpftracer/tracer.go
28+
index f79b23e..0d3f445 100644
29+
--- a/ebpftracer/tracer.go
30+
+++ b/ebpftracer/tracer.go
31+
@@ -82,6 +82,8 @@ type Tracer struct {
32+
disableL7Tracing bool
33+
hostNetNs netns.NsHandle
34+
selfNetNs netns.NsHandle
35+
+ maxConnections int
36+
+ maxL7Requests int
37+
38+
collection *ebpf.Collection
39+
readers map[string]*perf.Reader
40+
@@ -89,7 +91,7 @@ type Tracer struct {
41+
uprobes map[string]*ebpf.Program
42+
}
43+
44+
-func NewTracer(hostNetNs, selfNetNs netns.NsHandle, disableL7Tracing bool) *Tracer {
45+
+func NewTracer(hostNetNs, selfNetNs netns.NsHandle, disableL7Tracing bool, maxConnections, maxL7Requests int) *Tracer {
46+
if disableL7Tracing {
47+
klog.Infoln("L7 tracing is disabled")
48+
}
49+
@@ -97,6 +99,8 @@ func NewTracer(hostNetNs, selfNetNs netns.NsHandle, disableL7Tracing bool) *Trac
50+
disableL7Tracing: disableL7Tracing,
51+
hostNetNs: hostNetNs,
52+
selfNetNs: selfNetNs,
53+
+ maxConnections: maxConnections,
54+
+ maxL7Requests: maxL7Requests,
55+
56+
readers: map[string]*perf.Reader{},
57+
uprobes: map[string]*ebpf.Program{},
58+
@@ -217,6 +221,20 @@ func (t *Tracer) ebpf(ch chan<- Event) error {
59+
if err != nil {
60+
return fmt.Errorf("failed to load collection spec: %w", err)
61+
}
62+
+ if t.maxConnections > 0 {
63+
+ for _, name := range []string{"active_connections", "connection_id_by_socket"} {
64+
+ if m := collectionSpec.Maps[name]; m != nil {
65+
+ klog.Infof("overriding %s max_entries: %d -> %d", name, m.MaxEntries, t.maxConnections)
66+
+ m.MaxEntries = uint32(t.maxConnections)
67+
+ }
68+
+ }
69+
+ }
70+
+ if t.maxL7Requests > 0 {
71+
+ if m := collectionSpec.Maps["active_l7_requests"]; m != nil {
72+
+ klog.Infof("overriding active_l7_requests max_entries: %d -> %d", m.MaxEntries, t.maxL7Requests)
73+
+ m.MaxEntries = uint32(t.maxL7Requests)
74+
+ }
75+
+ }
76+
_ = unix.Setrlimit(unix.RLIMIT_MEMLOCK, &unix.Rlimit{Cur: unix.RLIM_INFINITY, Max: unix.RLIM_INFINITY})
77+
c, err := ebpf.NewCollectionWithOptions(collectionSpec, ebpf.CollectionOptions{
78+
//Programs: ebpf.ProgramOptions{LogLevel: 2, LogSize: 20 * 1024 * 1024},
79+
diff --git a/ebpftracer/tracer_test.go b/ebpftracer/tracer_test.go
80+
index c8cf7cc..9fe98de 100644
81+
--- a/ebpftracer/tracer_test.go
82+
+++ b/ebpftracer/tracer_test.go
83+
@@ -327,7 +327,7 @@ func runTracer(t *testing.T, verbose bool) (func() *Event, func()) {
84+
assert.NoError(t, common.SetKernelVersion(string(bytes.Split(uname.Release[:], []byte{0})[0])))
85+
86+
go func() {
87+
- tt := NewTracer(0, 0, false)
88+
+ tt := NewTracer(0, 0, false, 0, 0)
89+
err := tt.Run(events)
90+
require.NoError(t, err)
91+
<-done
92+
diff --git a/flags/flags.go b/flags/flags.go
93+
index c80323f..ff89896 100644
94+
--- a/flags/flags.go
95+
+++ b/flags/flags.go
96+
@@ -16,6 +16,10 @@ var (
97+
DisableL7Tracing = kingpin.Flag("disable-l7-tracing", "Disable L7 tracing").Default("false").Envar("DISABLE_L7_TRACING").Bool()
98+
DisableGPUMonitoring = kingpin.Flag("disable-gpu-monitoring", "Disable GPU monitoring (NVML)").Default("false").Envar("DISABLE_GPU_MONITORING").Bool()
99+
100+
+ MaxConnections = kingpin.Flag("max-connections", "Maximum number of tracked TCP connections in eBPF maps").Default("1000000").Envar("MAX_CONNECTIONS").Int()
101+
+ MaxL7Requests = kingpin.Flag("max-l7-requests", "Maximum number of in-flight L7 requests in eBPF maps").Default("32768").Envar("MAX_L7_REQUESTS").Int()
102+
+ GCInterval = kingpin.Flag("gc-interval", "How often to run container garbage collection").Default("10m").Envar("GC_INTERVAL").Duration()
103+
+
104+
ContainerAllowlist = kingpin.Flag("container-allowlist", "List of allowed containers (regex patterns)").Envar("CONTAINER_ALLOWLIST").Strings()
105+
ContainerDenylist = kingpin.Flag("container-denylist", "List of denied containers (regex patterns)").Envar("CONTAINER_DENYLIST").Strings()
106+

0 commit comments

Comments
 (0)