Skip to content

Commit 2b2a08b

Browse files
committed
Support local and remote deployment
Signed-off-by: Andrea Terzolo <andreaterzolo3@gmail.com>
1 parent 5fd0161 commit 2b2a08b

7 files changed

Lines changed: 406 additions & 77 deletions

File tree

k8s-deployment/Makefile

Lines changed: 128 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1,64 +1,136 @@
11

2-
NAMESPACE := default
3-
DST_DIR := /suse
2+
SETUP_TIMEOUT ?= 120s
3+
SETUP_AGENT_HELM_CHART_DIR ?= ~/personal/helm-charts/stable/suse-observability-agent
4+
5+
# SETUP_TYPE can be 'local' or 'remote' (default: local)
6+
SETUP_TYPE ?= local
7+
SETUP_API_KEY ?= null
8+
SETUP_CLUSTER_NAME ?= minikube-local-setup
9+
SETUP_RECEIVER_ENDPOINT ?= http://test-server-service:7077/stsAgent
10+
# To be set only in remote mode
11+
SETUP_OTEL_ENDPOINT ?= null
12+
13+
.PHONY: create-cluster
14+
create-cluster:
15+
@echo "🚀 Creating Kubernetes cluster..."
16+
minikube start --driver=kvm2 --cpus=6 --memory=12g --disk-size='30g'
17+
18+
.PHONY: delete-cluster
19+
delete-cluster:
20+
@echo "🧹 Deleting Kubernetes cluster..."
21+
minikube delete
22+
23+
.PHONY: load-agent-image
24+
load-agent-image:
25+
@echo "🚀 Loading agent image into the cluster..."
26+
# we should also build again the ebpf probes but at the moment we skip it since we suppose we are in the middle of the development
27+
rake local_build
28+
docker build --tag registry/agent:latest -f ../Dockerfile ..
29+
minikube image rm registry/agent:latest 2>/dev/null || true
30+
minikube image load registry/agent:latest
31+
32+
.PHONY: load-test-server-image
33+
load-test-server-image:
34+
@echo "🚀 Loading test server image into the cluster..."
35+
go build -o ../test-server ../test-server
36+
docker build --tag test-server:latest -f ../test-server/Dockerfile ../test-server
37+
minikube image rm test-server:latest 2>/dev/null || true
38+
minikube image load test-server:latest
39+
40+
.PHONY: deploy-prometheus
41+
deploy-prometheus:
42+
@echo "🚀 Deploying Prometheus..."
43+
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
44+
helm upgrade --install --wait --timeout $(SETUP_TIMEOUT) prometheus prometheus-community/prometheus --version 27.32.0 \
45+
--values ./yaml/prometheus-config.yaml \
46+
--namespace open-telemetry --create-namespace
47+
48+
.PHONY: deploy-test-server
49+
deploy-test-server: load-test-server-image
50+
@echo "🚀 Deploying the test server in monitoring namespace..."
51+
kubectl create namespace monitoring || true
52+
kubectl apply -f ./yaml/test-server.yaml -n monitoring
53+
@echo "🕐 Waiting for test-server..."
54+
kubectl wait --for=condition=Ready -n monitoring pod -l app=test-server --timeout=$(SETUP_TIMEOUT)
55+
56+
.PHONY: deploy-otel-collector
57+
deploy-otel-collector:
58+
@echo "🚀 Deploying OTEL collector ($(SETUP_TYPE))..."
59+
# We need to create the secret and configMap for the remote setup so that the OTEL collector configuration can use them
60+
if [ "$(SETUP_TYPE)" = "remote" ]; then \
61+
kubectl create namespace open-telemetry || true; \
62+
kubectl create secret generic open-telemetry-collector --namespace open-telemetry --from-literal=SETUP_API_KEY="$(SETUP_API_KEY)"; \
63+
kubectl create configmap open-telemetry-collector-config --namespace open-telemetry \
64+
--from-literal=SETUP_CLUSTER_NAME="$(SETUP_CLUSTER_NAME)" \
65+
--from-literal=SETUP_OTEL_ENDPOINT="$(SETUP_OTEL_ENDPOINT)"; \
66+
fi
67+
helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts
68+
helm upgrade --install --wait --timeout $(SETUP_TIMEOUT) opentelemetry-collector open-telemetry/opentelemetry-collector --version 0.132.0 \
69+
--values ./yaml/otel-collector-config-$(SETUP_TYPE).yaml \
70+
--namespace open-telemetry --create-namespace
71+
72+
.PHONY: deploy-agent
73+
deploy-agent: load-agent-image
74+
@echo "🚀 Deploying the process agent ($(SETUP_TYPE))..."
75+
helm upgrade --install --devel\
76+
suse-observability-agent \
77+
$(SETUP_AGENT_HELM_CHART_DIR) \
78+
--set-string 'stackstate.apiKey'='$(SETUP_API_KEY)' \
79+
--set-string 'stackstate.cluster.name'='$(SETUP_CLUSTER_NAME)' \
80+
--set-string 'stackstate.url'='$(SETUP_RECEIVER_ENDPOINT)' \
81+
--set logsAgent.enabled=false \
82+
--set clusterAgent.enabled=false \
83+
--set checksAgent.enabled=false \
84+
--set kubernetes-rbac-agent.enabled=false \
85+
--set processAgent.podCorrelation.enabled=true \
86+
--set processAgent.podCorrelation.protocolMetrics=true \
87+
--set nodeAgent.httpTracing.enabled=false \
88+
--set-string processAgent.podCorrelation.exporter.type=otlp \
89+
--set-string processAgent.podCorrelation.exporter.endpoint=opentelemetry-collector.open-telemetry.svc.cluster.local:4317 \
90+
--namespace monitoring \
91+
--create-namespace \
92+
--set-string 'nodeAgent.skipKubeletTLSVerify'='true' \
93+
--set-string 'nodeAgent.containers.processAgent.image.pullPolicy'='Never' \
94+
--set-string 'nodeAgent.containers.processAgent.image.tag'='latest' \
95+
--set-string 'nodeAgent.containers.processAgent.image.registry'='registry' \
96+
--set-string 'nodeAgent.containers.processAgent.image.repository'='agent'
497

598
.PHONY: deploy
6-
deploy:
7-
@echo "🚀 Deploying the process-agent and basic components in namespace '$(NAMESPACE)'..."
8-
kubectl apply -f ./yaml/deployment.yaml -n $(NAMESPACE)
9-
@echo "🚀 Deploying the Postgres service in namespace '$(NAMESPACE)'..."
10-
kubectl apply -f ./yaml/postgres.yaml -n $(NAMESPACE)
11-
@echo "🕐 Waiting for necessary components to be up..."
12-
kubectl wait --for=condition=Ready pod -l app=loader-agent --timeout=180s
13-
kubectl wait --for=condition=Ready pod -l app=opentelemetry --timeout=180s
14-
kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=prometheus --timeout=180s
15-
@echo "🕐 Waiting for postgres service to be up..."
16-
kubectl wait --for=condition=Ready pod -l app=postgres-client --timeout=180s
17-
kubectl wait --for=condition=Ready pod -l app=postgres-server --timeout=180s
18-
19-
.PHONY: update
20-
# Substitute all the binaries and config files inside the dockers
21-
# NOTE: kubectl cp will overwrite the previous files.
22-
update:
23-
@echo "🔄 Updating binaries inside pods..."
24-
for loader_pod in $$(kubectl get pods -n ${NAMESPACE} -l app=loader-agent -o custom-columns=:metadata.name --no-headers); do \
25-
echo "=> Considering pod: $${loader_pod}"; \
26-
echo " [1/6] Copying process-agent binary..."; \
27-
kubectl cp "../process-agent" ${NAMESPACE}/$${loader_pod}:"${DST_DIR}/process-agent"; \
28-
echo " [2/6] Copying process-agent config..."; \
29-
kubectl cp "./process-agent-config.yaml" ${NAMESPACE}/$${loader_pod}:"${DST_DIR}/config.yaml"; \
30-
echo " [3/6] Creating directory for 'ebpf-object-files'..."; \
31-
kubectl exec -n ${NAMESPACE} $${loader_pod} -- mkdir -p "${DST_DIR}/ebpf-object-files/"; \
32-
echo " [4/6] Copying 'ebpf-object-files/x86_64'..."; \
33-
kubectl cp "../ebpf-object-files/x86_64" ${NAMESPACE}/$${loader_pod}:"${DST_DIR}/ebpf-object-files/"; \
34-
echo " [5/6] Copying test-server binary..."; \
35-
kubectl cp "../test-server/test-server" ${NAMESPACE}/$${loader_pod}:"${DST_DIR}/test-server"; \
36-
echo " [5/6] Copying test-server config..."; \
37-
kubectl cp "./test-server-config.json" ${NAMESPACE}/$${loader_pod}:"${DST_DIR}/config.json"; \
38-
done; \
39-
echo "🧹 Deleting pods..."; \
40-
kubectl delete pods -n ${NAMESPACE} -l app=process-agent --force
41-
kubectl delete pods -n ${NAMESPACE} -l app=test-server --force
42-
@echo "🕐 Waiting for process-agent to be up..."
43-
kubectl wait --for=condition=Ready pod -l app=process-agent --timeout=180s
44-
@echo "🕐 Waiting for test-server to be up..."
45-
kubectl wait --for=condition=Ready pod -l app=test-server --timeout=180s
46-
47-
48-
.PHONY: e2e-tests
49-
e2e-tests:
50-
@echo "🚀 Running end-to-end tests..."
51-
go test -tags 'docker kubelet kubeapiserver linux cri containerd linux_bpf k8s_e2e' ./tests -v
99+
deploy: deploy-prometheus deploy-otel-collector deploy-agent
100+
@if [ "$(SETUP_TYPE)" = "local" ]; then $(MAKE) deploy-test-server; fi
101+
@echo "✅ Completed deploy (mode=$(SETUP_TYPE))"
102+
103+
.PHONY: redeploy-agent
104+
redeploy-agent:
105+
@echo "🚀 Removing old agent chart..."
106+
helm uninstall suse-observability-agent -n monitoring || true
107+
@echo "🚀 Redeploying the process-agent ($(SETUP_TYPE))..."
108+
$(MAKE) deploy-agent
109+
@echo "✅ Completed re-deploy (mode=$(SETUP_TYPE))"
52110

53111
# Obtain a local copy of the json output from the test server pod
54112
# useful only with legacy metrics
55113
.PHONY: take-output
56114
take-output:
57-
@echo "📄 Dump output file..."; \
58-
kubectl cp $$(kubectl get pods -l app=test-server -o custom-columns=:metadata.name --no-headers):output.json ./output.json \
59-
60-
.PHONY: clean
61-
clean:
62-
echo "🧹 Cleaning up..."; \
63-
kubectl delete -f ./yaml/deployment.yaml -n $(NAMESPACE)
64-
kubectl delete -f ./yaml/postgres.yaml -n $(NAMESPACE)
115+
if [ "$(SETUP_TYPE)" = "remote" ]; then return; fi
116+
@echo "📄 Dump output file..."
117+
kubectl cp -n monitoring $$(kubectl get -n monitoring pods -l app=test-server -o custom-columns=:metadata.name --no-headers):output.json ./output.json
118+
119+
############################
120+
# End-to-end tests targets #
121+
############################
122+
.PHONY: deploy-e2e-tests
123+
deploy-e2e-tests:
124+
@echo "🚀 Deploying Postgres service..."
125+
kubectl apply -f ./yaml/postgres.yaml
126+
kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=postgres-server --timeout=$(SETUP_TIMEOUT)
127+
kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=postgres-client --timeout=$(SETUP_TIMEOUT)
128+
129+
.PHONY: run-e2e-tests
130+
run-e2e-tests:
131+
@echo "🚀 Running end-to-end tests..."
132+
go test -count=1 -tags 'docker kubelet kubeapiserver linux cri containerd linux_bpf k8s_e2e' ./tests -v
133+
134+
.PHONY: clean-e2e-tests
135+
clean-e2e-tests:
136+
kubectl delete -f ./yaml/postgres.yaml

k8s-deployment/README.md

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,30 +2,45 @@
22

33
## Goals of this folder
44

5-
1. Quickly iterate on the development of the process agent. No need of rebuilding docker images for every change, it is enough to restart existing pods with the new agent binaries.
6-
2. Run integration tests against the process agent. At the moment we have integration tests only for the agent exposing OTEL metrics.
5+
1. Quickly iterate on the development of the process agent deploying it in a local k8s cluster.
6+
2. Run integration tests against the process agent. At the moment we have integration tests only for OTEL metrics.
77

88
## 1. Deploy local agent binary into a k8s cluster
99

1010
These are the steps required to have a process agent up and running in a local Kubernetes cluster.
1111

1212
```bash
13-
# 1. Create a k8s cluster (kind, minikube, k3s, ecc...)
14-
minikube start --driver=kvm2
13+
# 1. Create a minikube k8s cluster
14+
make create-cluster
15+
16+
# 2. Deploy the setup
17+
# Here we have 2 options:
18+
# 1. Option "local":
19+
# - the agent will send legacy metrics to a local test server.
20+
# - the agent will send OTEL metrics to a local OTEL collector and then prometheus will scrape the OTEL collector
21+
# In this case, environment variables are not strictly necessary since they have default values in the Makefile, you can define them just to be sure you have a clean setup
22+
export SETUP_AGENT_HELM_CHART_DIR=<path_to_your_local_agent_helm_chart>
23+
export SETUP_TYPE=local
24+
export SETUP_RECEIVER_ENDPOINT=http://test-server-service:7077/stsAgent
25+
make deploy
1526

16-
# 2. Deploy initial docker images (process-agent, loader-agent, test-server, OTEL collector, prometheus)
27+
# 2. Option "remote":
28+
# - the agent will send legacy metrics to the platform receiver.
29+
# - the agent will send OTEL metrics to a local OTEL collector and then this collector will forward them to the platform OTEL endpoint.
30+
export SETUP_AGENT_HELM_CHART_DIR=<path_to_your_local_agent_helm_chart>
31+
export SETUP_TYPE=remote
32+
export SETUP_API_KEY=<your_stackstate_platform_api_key>
33+
export SETUP_CLUSTER_NAME=<your_cluster_name>
34+
export SETUP_RECEIVER_ENDPOINT=<your_stackstate_platform_endpoint>
1735
make deploy
1836

19-
# 3. Build locally the process-agent and the test-server (or update configs)
20-
rake local_build
21-
# see how to build the process agent and ebpf artifacts in the project README.md
37+
# 3. If you need to push new changes to the agent binary, you can run redeploy-agent
38+
make redeploy-agent
2239

23-
# 4. Push the new binaries/configs inside pods
24-
make update
40+
# 4. delete the cluster
41+
make delete-cluster
2542
```
2643

27-
Everytime you do some local changes to the process-agent and test-server code, or you change a config, you should rerun `make update`
28-
2944
### Legacy Metrics
3045

3146
If you want to obtain a local copy of the json output from the test server pod you can run:
@@ -39,15 +54,16 @@ make take-output
3954
You can inspect OTEL metrics using Prometheus. The Prometheus UI can be accessed by through the NodePort service:
4055

4156
```bash
42-
PROMETHEUS_PORT=$(kubectl get services -A -o json | jq -r '.items[] | select(.metadata.name == "prometheus-service") | .spec.ports[] | select(.name == "web") | .nodePort')
43-
PROMETHEUS_ADDR=$(kubectl get nodes -A -o json | jq -r '.items[] | select(.metadata.name == "minikube") | .status.addresses[] | select(.type == "InternalIP") | .address')
44-
echo "Prometheus is available at: http://${PROMETHEUS_ADDR}:${PROMETHEUS_PORT}"
57+
PROMETHEUS_PORT=$(kubectl get --namespace open-telemetry -o jsonpath="{.spec.ports[0].nodePort}" services prometheus-server)
58+
PROMETHEUS_ADDR=$(kubectl get nodes --namespace open-telemetry -o jsonpath="{.items[0].status.addresses[0].address}")
59+
echo http://$PROMETHEUS_ADDR:$PROMETHEUS_PORT
4560
```
4661

4762
## 2. Test OTEL metrics
4863

4964
In order to run e2e tests you need the the cluster up and running with the agent already deployed
5065

5166
```bash
67+
kubectl apply -f ./yaml/postgres.yaml
5268
make e2e-tests
5369
```
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
mode: deployment
2+
image:
3+
# we need the contrib image for the prometheus exporter
4+
repository: "ghcr.io/open-telemetry/opentelemetry-collector-releases/opentelemetry-collector-contrib"
5+
ports:
6+
# These are internal metrics of the collector
7+
metrics:
8+
enabled: true
9+
servicePort: 8888
10+
# This is the port for the prometheus exporter we configure in the collector
11+
prom-metrics:
12+
enabled: true
13+
containerPort: 9464
14+
servicePort: 9464
15+
protocol: TCP
16+
17+
presets:
18+
kubernetesAttributes:
19+
enabled: true
20+
extractAllPodLabels: true
21+
22+
# This is the config file for the collector:
23+
config:
24+
receivers:
25+
otlp:
26+
protocols:
27+
grpc:
28+
endpoint: 0.0.0.0:4317
29+
http:
30+
endpoint: 0.0.0.0:4318
31+
processors:
32+
batch: {}
33+
resource:
34+
attributes:
35+
- key: service.instance.id
36+
from_attribute: k8s.pod.uid
37+
action: insert
38+
# Use the k8s namespace also as the open telemetry namespace
39+
- key: service.namespace
40+
from_attribute: k8s.namespace.name
41+
action: insert
42+
exporters:
43+
debug:
44+
verbosity: normal
45+
prometheus:
46+
endpoint: 0.0.0.0:9464
47+
nop: {}
48+
service:
49+
pipelines:
50+
traces:
51+
receivers: [otlp]
52+
processors: []
53+
exporters: [nop]
54+
metrics:
55+
receivers: [otlp]
56+
processors: [resource, batch]
57+
exporters: [debug, prometheus]
58+
logs:
59+
receivers: [otlp]
60+
processors: []
61+
exporters: [nop]

0 commit comments

Comments
 (0)