Skip to content

Commit d66ae37

Browse files
committed
feat: observability with otel and default grafana dashboard
Signed-off-by: Attila Mészáros <a_meszaros@apple.com>
1 parent 4f1d011 commit d66ae37

File tree

1 file changed

+240
-0
lines changed

1 file changed

+240
-0
lines changed
Lines changed: 240 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
#!/bin/bash
2+
3+
set -e
4+
5+
# Colors for output
6+
RED='\033[0;31m'
7+
GREEN='\033[0;32m'
8+
YELLOW='\033[1;33m'
9+
NC='\033[0m' # No Color
10+
11+
echo -e "${GREEN}========================================${NC}"
12+
echo -e "${GREEN}Installing Observability Stack${NC}"
13+
echo -e "${GREEN}OpenTelemetry + Prometheus + Grafana${NC}"
14+
echo -e "${GREEN}========================================${NC}"
15+
16+
# Check if helm is installed
17+
echo -e "\n${YELLOW}Checking helm installation...${NC}"
18+
if ! command -v helm &> /dev/null; then
19+
echo -e "${RED}Error: helm is not installed${NC}"
20+
echo "Please install helm: https://helm.sh/docs/intro/install/"
21+
exit 1
22+
fi
23+
echo -e "${GREEN}✓ helm is installed${NC}"
24+
25+
# Add Helm repositories
26+
echo -e "\n${YELLOW}Adding Helm repositories...${NC}"
27+
helm repo add jetstack https://charts.jetstack.io
28+
helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts
29+
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
30+
helm repo update
31+
echo -e "${GREEN}✓ Helm repositories added${NC}"
32+
33+
# Install cert-manager (required for OpenTelemetry Operator)
34+
echo -e "\n${YELLOW}Installing cert-manager...${NC}"
35+
if kubectl get namespace cert-manager > /dev/null 2>&1; then
36+
echo -e "${YELLOW}cert-manager namespace already exists, skipping...${NC}"
37+
else
38+
kubectl create namespace cert-manager
39+
helm install cert-manager jetstack/cert-manager \
40+
--namespace cert-manager \
41+
--set crds.enabled=true \
42+
--wait
43+
echo -e "${GREEN}✓ cert-manager installed${NC}"
44+
fi
45+
46+
# Create observability namespace
47+
echo -e "\n${YELLOW}Creating observability namespace...${NC}"
48+
kubectl create namespace observability --dry-run=client -o yaml | kubectl apply -f -
49+
echo -e "${GREEN}✓ observability namespace ready${NC}"
50+
51+
# Install OpenTelemetry Operator
52+
echo -e "\n${YELLOW}Installing OpenTelemetry Operator...${NC}"
53+
if helm list -n observability | grep -q opentelemetry-operator; then
54+
echo -e "${YELLOW}OpenTelemetry Operator already installed, upgrading...${NC}"
55+
helm upgrade opentelemetry-operator open-telemetry/opentelemetry-operator \
56+
--namespace observability \
57+
--set "manager.collectorImage.repository=otel/opentelemetry-collector-contrib" \
58+
--wait
59+
else
60+
helm install opentelemetry-operator open-telemetry/opentelemetry-operator \
61+
--namespace observability \
62+
--set "manager.collectorImage.repository=otel/opentelemetry-collector-contrib" \
63+
--wait
64+
fi
65+
echo -e "${GREEN}✓ OpenTelemetry Operator installed${NC}"
66+
67+
# Install kube-prometheus-stack (includes Prometheus + Grafana)
68+
echo -e "\n${YELLOW}Installing Prometheus and Grafana stack...${NC}"
69+
if helm list -n observability | grep -q kube-prometheus-stack; then
70+
echo -e "${YELLOW}kube-prometheus-stack already installed, upgrading...${NC}"
71+
helm upgrade kube-prometheus-stack prometheus-community/kube-prometheus-stack \
72+
--namespace observability \
73+
--set prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false \
74+
--set prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues=false \
75+
--set grafana.adminPassword=admin \
76+
--wait
77+
else
78+
helm install kube-prometheus-stack prometheus-community/kube-prometheus-stack \
79+
--namespace observability \
80+
--set prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false \
81+
--set prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues=false \
82+
--set grafana.adminPassword=admin \
83+
--wait
84+
fi
85+
echo -e "${GREEN}✓ Prometheus and Grafana installed${NC}"
86+
87+
# Create OpenTelemetry Collector instance
88+
echo -e "\n${YELLOW}Creating OpenTelemetry Collector...${NC}"
89+
cat <<EOF | kubectl apply -f -
90+
apiVersion: opentelemetry.io/v1beta1
91+
kind: OpenTelemetryCollector
92+
metadata:
93+
name: otel-collector
94+
namespace: observability
95+
spec:
96+
mode: deployment
97+
config:
98+
receivers:
99+
otlp:
100+
protocols:
101+
grpc:
102+
endpoint: 0.0.0.0:4317
103+
http:
104+
endpoint: 0.0.0.0:4318
105+
prometheus:
106+
config:
107+
scrape_configs:
108+
- job_name: 'otel-collector'
109+
scrape_interval: 10s
110+
static_configs:
111+
- targets: ['0.0.0.0:8888']
112+
113+
processors:
114+
batch:
115+
timeout: 10s
116+
memory_limiter:
117+
check_interval: 1s
118+
limit_percentage: 75
119+
spike_limit_percentage: 15
120+
121+
exporters:
122+
prometheus:
123+
endpoint: "0.0.0.0:8889"
124+
namespace: "otel"
125+
send_timestamps: true
126+
metric_expiration: 5m
127+
debug:
128+
verbosity: detailed
129+
sampling_initial: 5
130+
sampling_thereafter: 200
131+
132+
service:
133+
pipelines:
134+
metrics:
135+
receivers: [otlp, prometheus]
136+
processors: [memory_limiter, batch]
137+
exporters: [prometheus, debug]
138+
traces:
139+
receivers: [otlp]
140+
processors: [memory_limiter, batch]
141+
exporters: [debug]
142+
EOF
143+
echo -e "${GREEN}✓ OpenTelemetry Collector created${NC}"
144+
145+
# Create ServiceMonitor for OpenTelemetry Collector
146+
echo -e "\n${YELLOW}Creating ServiceMonitor for OpenTelemetry...${NC}"
147+
cat <<EOF | kubectl apply -f -
148+
apiVersion: v1
149+
kind: Service
150+
metadata:
151+
name: otel-collector-prometheus
152+
namespace: observability
153+
labels:
154+
app: otel-collector
155+
spec:
156+
ports:
157+
- name: prometheus
158+
port: 8889
159+
targetPort: 8889
160+
protocol: TCP
161+
selector:
162+
app.kubernetes.io/name: otel-collector
163+
---
164+
apiVersion: monitoring.coreos.com/v1
165+
kind: ServiceMonitor
166+
metadata:
167+
name: otel-collector
168+
namespace: observability
169+
labels:
170+
app: otel-collector
171+
spec:
172+
selector:
173+
matchLabels:
174+
app: otel-collector
175+
endpoints:
176+
- port: prometheus
177+
interval: 30s
178+
EOF
179+
echo -e "${GREEN}✓ ServiceMonitor created${NC}"
180+
181+
# Wait for all pods to be ready
182+
echo -e "\n${YELLOW}Waiting for all pods to be ready...${NC}"
183+
kubectl wait --for=condition=ready pod --all -n observability --timeout=300s
184+
echo -e "${GREEN}✓ All pods are ready${NC}"
185+
186+
# Get pod statuses
187+
echo -e "\n${GREEN}========================================${NC}"
188+
echo -e "${GREEN}Installation Complete!${NC}"
189+
echo -e "${GREEN}========================================${NC}"
190+
191+
echo -e "\n${YELLOW}Pod Status:${NC}"
192+
kubectl get pods -n observability
193+
194+
echo -e "\n${GREEN}========================================${NC}"
195+
echo -e "${GREEN}Access Information${NC}"
196+
echo -e "${GREEN}========================================${NC}"
197+
198+
echo -e "\n${YELLOW}Grafana:${NC}"
199+
echo -e " Username: ${GREEN}admin${NC}"
200+
echo -e " Password: ${GREEN}admin${NC}"
201+
echo -e " Access with: ${GREEN}kubectl port-forward -n observability svc/kube-prometheus-stack-grafana 3000:80${NC}"
202+
echo -e " Then open: ${GREEN}http://localhost:3000${NC}"
203+
204+
echo -e "\n${YELLOW}Prometheus:${NC}"
205+
echo -e " Access with: ${GREEN}kubectl port-forward -n observability svc/kube-prometheus-stack-prometheus 9090:9090${NC}"
206+
echo -e " Then open: ${GREEN}http://localhost:9090${NC}"
207+
208+
echo -e "\n${YELLOW}OpenTelemetry Collector:${NC}"
209+
echo -e " OTLP gRPC endpoint: ${GREEN}otel-collector-collector.observability.svc.cluster.local:4317${NC}"
210+
echo -e " OTLP HTTP endpoint: ${GREEN}otel-collector-collector.observability.svc.cluster.local:4318${NC}"
211+
echo -e " Prometheus metrics: ${GREEN}http://otel-collector-prometheus.observability.svc.cluster.local:8889/metrics${NC}"
212+
213+
echo -e "\n${YELLOW}Configure your Java Operator to use OpenTelemetry:${NC}"
214+
echo -e " Add dependency: ${GREEN}io.javaoperatorsdk:operator-framework-opentelemetry-support${NC}"
215+
echo -e " Set environment variables:"
216+
echo -e " ${GREEN}OTEL_SERVICE_NAME=your-operator-name${NC}"
217+
echo -e " ${GREEN}OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector-collector.observability.svc.cluster.local:4318${NC}"
218+
echo -e " ${GREEN}OTEL_METRICS_EXPORTER=otlp${NC}"
219+
echo -e " ${GREEN}OTEL_TRACES_EXPORTER=otlp${NC}"
220+
221+
echo -e "\n${GREEN}========================================${NC}"
222+
echo -e "${GREEN}Grafana Dashboards${NC}"
223+
echo -e "${GREEN}========================================${NC}"
224+
echo -e "\nPre-installed dashboards in Grafana:"
225+
echo -e " - Kubernetes / Compute Resources / Cluster"
226+
echo -e " - Kubernetes / Compute Resources / Namespace (Pods)"
227+
echo -e " - Node Exporter / Nodes"
228+
echo -e "\nFor JOSDK metrics, create a custom dashboard with queries like:"
229+
echo -e " ${GREEN}sum(rate(operator_sdk_reconciliations_started_total[5m]))${NC}"
230+
echo -e " ${GREEN}sum(rate(operator_sdk_reconciliations_success_total[5m]))${NC}"
231+
echo -e " ${GREEN}sum(rate(operator_sdk_reconciliations_failed_total[5m]))${NC}"
232+
233+
echo -e "\n${YELLOW}To uninstall:${NC}"
234+
echo -e " kubectl delete -n observability OpenTelemetryCollector otel-collector"
235+
echo -e " helm uninstall -n observability kube-prometheus-stack"
236+
echo -e " helm uninstall -n observability opentelemetry-operator"
237+
echo -e " helm uninstall -n cert-manager cert-manager"
238+
echo -e " kubectl delete namespace observability cert-manager"
239+
240+
echo -e "\n${GREEN}Done!${NC}"

0 commit comments

Comments
 (0)