Skip to content

Commit c30e436

Browse files
Add Grafana LGTM monitoring example
1 parent 3517537 commit c30e436

18 files changed

Lines changed: 1642 additions & 0 deletions

File tree

monitoring/README.md

Lines changed: 384 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
##
2+
## Local Dgraph Cluster Overlay
3+
##
4+
## Adds Dgraph Zero and Alpha to the monitoring stack.
5+
##
6+
## Usage:
7+
## docker compose -f docker-compose.yml -f docker-compose.dgraph.yml up -d
8+
##
9+
10+
services:
11+
zero:
12+
image: ${DGRAPH_IMAGE:-dgraph/dgraph:local}
13+
container_name: dgraph-zero
14+
restart: unless-stopped
15+
depends_on:
16+
- otel-collector
17+
command: >
18+
dgraph zero
19+
--my=zero:5080
20+
--telemetry "reports=false;"
21+
--trace "jaeger=otel-collector:4318; ratio=1.0; service=zero;"
22+
ports:
23+
- "5080:5080"
24+
- "6080:6080"
25+
volumes:
26+
- dgraph-zero-data:/dgraph
27+
labels:
28+
- "logging=dgraph"
29+
- "service=dgraph-zero"
30+
logging:
31+
driver: "json-file"
32+
options:
33+
max-size: "10m"
34+
max-file: "3"
35+
tag: "dgraph-zero"
36+
37+
alpha:
38+
image: ${DGRAPH_IMAGE:-dgraph/dgraph:local}
39+
container_name: dgraph-alpha
40+
restart: unless-stopped
41+
depends_on:
42+
- zero
43+
- otel-collector
44+
command: >
45+
dgraph alpha
46+
--my=alpha:7080
47+
--zero=zero:5080
48+
--security "whitelist=0.0.0.0/0;"
49+
--telemetry "reports=false;"
50+
--trace "jaeger=otel-collector:4318; ratio=1.0; service=alpha;"
51+
ports:
52+
- "8080:8080"
53+
- "9080:9080"
54+
volumes:
55+
- dgraph-alpha-data:/dgraph
56+
labels:
57+
- "logging=dgraph"
58+
- "service=dgraph-alpha"
59+
logging:
60+
driver: "json-file"
61+
options:
62+
max-size: "10m"
63+
max-file: "3"
64+
tag: "dgraph-alpha"
65+
66+
volumes:
67+
dgraph-zero-data:
68+
dgraph-alpha-data:
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
##
2+
## External Dgraph Network Overlay
3+
##
4+
## Connects the monitoring stack to an existing Dgraph cluster's
5+
## Docker network so services can communicate across compose projects.
6+
##
7+
## Usage:
8+
## # Find your Dgraph network name:
9+
## docker network ls | grep compose
10+
##
11+
## # Start monitoring attached to that network:
12+
## DGRAPH_NETWORK=compose_default \
13+
## docker compose -f docker-compose.yml -f docker-compose.network.yml up -d
14+
##
15+
16+
services:
17+
otel-collector:
18+
networks:
19+
- default
20+
- dgraph
21+
22+
prometheus:
23+
networks:
24+
- default
25+
- dgraph
26+
27+
promtail:
28+
networks:
29+
- default
30+
- dgraph
31+
32+
networks:
33+
dgraph:
34+
external: true
35+
name: ${DGRAPH_NETWORK:?Set DGRAPH_NETWORK to your Dgraph compose network name}

monitoring/docker-compose.yml

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
##
2+
## Grafana LGTM Observability Stack for Dgraph
3+
##
4+
## Components:
5+
## - Prometheus (metrics collection)
6+
## - Grafana Tempo (distributed tracing)
7+
## - Grafana Loki (log aggregation)
8+
## - OpenTelemetry Collector (trace pipeline)
9+
## - Promtail (log collection via Docker API)
10+
## - Grafana (visualization & dashboards)
11+
##
12+
## Usage:
13+
##
14+
## Monitoring stack only (external Dgraph cluster):
15+
## 1. Edit prometheus/targets/dgraph.json with your Alpha/Zero addresses
16+
## 2. Configure your Dgraph cluster with: --trace "jaeger=<collector-host>:4318; ..."
17+
## 3. docker compose up -d
18+
##
19+
## With a local Dgraph cluster:
20+
## docker compose -f docker-compose.yml -f docker-compose.dgraph.yml up -d
21+
##
22+
## Open Grafana at http://localhost:3000 (admin/admin)
23+
##
24+
25+
services:
26+
# ──────────────────────────────────────────────
27+
# OpenTelemetry Collector
28+
# Receives OTLP traces from Dgraph and forwards
29+
# them to Tempo.
30+
# ──────────────────────────────────────────────
31+
otel-collector:
32+
image: otel/opentelemetry-collector-contrib:latest
33+
container_name: otel-collector
34+
restart: unless-stopped
35+
command: ["--config=/etc/otelcol/config.yaml"]
36+
volumes:
37+
- ./otel-collector/config.yaml:/etc/otelcol/config.yaml:ro
38+
# Uncomment ports below if you need host access for debugging.
39+
# In Mode 2, the collector is reachable via the Docker network.
40+
# ports:
41+
# - "4317:4317" # OTLP gRPC
42+
# - "4318:4318" # OTLP HTTP
43+
# - "8888:8888" # Collector metrics
44+
depends_on:
45+
- tempo
46+
47+
# ──────────────────────────────────────────────
48+
# Prometheus — Metrics
49+
# Scrapes /metrics from Dgraph Alpha and Zero.
50+
# ──────────────────────────────────────────────
51+
prometheus:
52+
image: prom/prometheus:latest
53+
container_name: prometheus
54+
restart: unless-stopped
55+
command:
56+
- "--config.file=/etc/prometheus/prometheus.yml"
57+
- "--storage.tsdb.path=/prometheus"
58+
- "--storage.tsdb.retention.time=30d"
59+
- "--web.enable-lifecycle"
60+
- "--web.enable-remote-write-receiver"
61+
- "--enable-feature=exemplar-storage"
62+
volumes:
63+
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
64+
- ./prometheus/targets:/etc/prometheus/targets:ro
65+
- prometheus-data:/prometheus
66+
ports:
67+
- "9090:9090"
68+
69+
# ──────────────────────────────────────────────
70+
# Grafana Tempo — Distributed Tracing
71+
# Receives OTLP traces via the OTel Collector.
72+
# ──────────────────────────────────────────────
73+
tempo:
74+
image: grafana/tempo:2.6.1
75+
container_name: tempo
76+
restart: unless-stopped
77+
command: ["-config.file=/etc/tempo/tempo.yaml"]
78+
volumes:
79+
- ./tempo/tempo.yaml:/etc/tempo/tempo.yaml:ro
80+
- tempo-data:/var/tempo
81+
ports:
82+
- "3200:3200" # Tempo HTTP API
83+
- "4320:4317" # OTLP gRPC (mapped to avoid conflict with collector)
84+
85+
# ──────────────────────────────────────────────
86+
# Promtail — Log Collection
87+
# Collects Docker container logs via Docker API
88+
# and ships them to Loki.
89+
# ──────────────────────────────────────────────
90+
promtail:
91+
image: grafana/promtail:latest
92+
container_name: promtail
93+
restart: unless-stopped
94+
command: ["-config.file=/etc/promtail/promtail.yaml"]
95+
volumes:
96+
- ./promtail/promtail.yaml:/etc/promtail/promtail.yaml:ro
97+
- /var/run/docker.sock:/var/run/docker.sock:ro
98+
- promtail-data:/promtail
99+
depends_on:
100+
- loki
101+
102+
# ──────────────────────────────────────────────
103+
# Grafana Loki — Log Aggregation
104+
# Receives logs from Promtail.
105+
# ──────────────────────────────────────────────
106+
loki:
107+
image: grafana/loki:latest
108+
container_name: loki
109+
restart: unless-stopped
110+
command: ["-config.file=/etc/loki/loki.yaml"]
111+
volumes:
112+
- ./loki/loki.yaml:/etc/loki/loki.yaml:ro
113+
- loki-data:/loki
114+
ports:
115+
- "3100:3100"
116+
117+
# ──────────────────────────────────────────────
118+
# Grafana — Visualization & Dashboards
119+
# Pre-provisioned with Prometheus, Tempo, and
120+
# Loki datasources + a Dgraph dashboard.
121+
# ──────────────────────────────────────────────
122+
grafana:
123+
image: grafana/grafana:latest
124+
container_name: grafana
125+
restart: unless-stopped
126+
environment:
127+
- GF_SECURITY_ADMIN_USER=admin
128+
- GF_SECURITY_ADMIN_PASSWORD=admin
129+
- GF_FEATURE_TOGGLES_ENABLE=traceqlEditor tempoSearch tempoServiceGraph traceToMetrics
130+
- GF_AUTH_ANONYMOUS_ENABLED=true
131+
- GF_AUTH_ANONYMOUS_ORG_ROLE=Viewer
132+
volumes:
133+
- ./grafana/provisioning:/etc/grafana/provisioning:ro
134+
- ./grafana/dashboards:/var/lib/grafana/dashboards:ro
135+
- grafana-data:/var/lib/grafana
136+
ports:
137+
- "3000:3000"
138+
depends_on:
139+
- prometheus
140+
- tempo
141+
- loki
142+
143+
volumes:
144+
prometheus-data:
145+
tempo-data:
146+
loki-data:
147+
grafana-data:
148+
promtail-data:

monitoring/generate-targets.sh

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
#!/usr/bin/env bash
2+
##
3+
## Generate Prometheus scrape targets from a Dgraph docker-compose file.
4+
##
5+
## Parses Dgraph Alpha and Zero services to extract hostnames and HTTP ports,
6+
## then writes prometheus/targets/dgraph.json.
7+
##
8+
## Requirements: docker (compose v2), jq
9+
##
10+
## Usage:
11+
## ./generate-targets.sh /path/to/dgraph/docker-compose.yml
12+
## ./generate-targets.sh /path/to/dgraph/docker-compose.yml my-cluster
13+
##
14+
15+
set -euo pipefail
16+
17+
COMPOSE_FILE="${1:?Usage: $0 <docker-compose.yml> [cluster-name]}"
18+
CLUSTER_NAME="${2:-dgraph}"
19+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
20+
TARGETS_FILE="${SCRIPT_DIR}/prometheus/targets/dgraph.json"
21+
22+
if ! command -v jq &>/dev/null; then
23+
echo "Error: jq is required but not installed." >&2
24+
echo " brew install jq" >&2
25+
exit 1
26+
fi
27+
28+
if [[ ! -f "$COMPOSE_FILE" ]]; then
29+
echo "Error: File not found: $COMPOSE_FILE" >&2
30+
exit 1
31+
fi
32+
33+
# Get normalized compose config as JSON
34+
CONFIG=$(docker compose -f "$COMPOSE_FILE" config --format json 2>/dev/null)
35+
36+
if [[ -z "$CONFIG" ]]; then
37+
echo "Error: Failed to parse $COMPOSE_FILE" >&2
38+
exit 1
39+
fi
40+
41+
# Extract Dgraph services and their --my flag to determine hostnames and ports.
42+
# Dgraph port layout (base + offset):
43+
# Alpha: internal=7080, http=8080 (internal + 1000)
44+
# Zero: internal=5080, http=6080 (internal + 1000)
45+
#
46+
# The --my flag gives us <hostname>:<internal_port>, so http_port = internal_port + 1000
47+
48+
TARGETS=$(echo "$CONFIG" | jq -r --arg cluster "$CLUSTER_NAME" '
49+
[.services | to_entries[] |
50+
# Only process services whose command contains "dgraph alpha" or "dgraph zero"
51+
select(.value.command != null) |
52+
.key as $svc |
53+
.value.command as $cmd |
54+
55+
# Normalize command to a string (may be string or array)
56+
($cmd | if type == "array" then join(" ") else tostring end) as $cmdstr |
57+
58+
# Only Dgraph services
59+
select($cmdstr | test("dgraph (alpha|zero)")) |
60+
61+
# Determine component type
62+
(if ($cmdstr | test("dgraph alpha")) then "alpha" else "zero" end) as $component |
63+
64+
# Extract --my=host:port
65+
($cmdstr | capture("--my=(?<host>[^:\\s]+):(?<port>\\d+)") // null) as $my |
66+
67+
select($my != null) |
68+
69+
# Calculate HTTP/metrics port (internal + 1000)
70+
(($my.port | tonumber) + 1000 | tostring) as $http_port |
71+
72+
{
73+
targets: [($my.host + ":" + $http_port)],
74+
labels: {
75+
job: ("dgraph-" + $component),
76+
cluster: $cluster,
77+
component: $component,
78+
instance: $svc
79+
}
80+
}
81+
]
82+
')
83+
84+
if [[ "$TARGETS" == "[]" || -z "$TARGETS" ]]; then
85+
echo "Error: No Dgraph services found in $COMPOSE_FILE" >&2
86+
exit 1
87+
fi
88+
89+
# Write targets file
90+
mkdir -p "$(dirname "$TARGETS_FILE")"
91+
echo "$TARGETS" | jq '.' > "$TARGETS_FILE"
92+
93+
echo "Generated $TARGETS_FILE:"
94+
echo "$TARGETS" | jq -r '.[] | " \(.labels.instance): \(.targets[0]) (\(.labels.component))"'
95+
echo ""
96+
echo "Prometheus will auto-reload targets within 30s."
97+
98+
# Also print the network name for convenience
99+
PROJECT_DIR="$(cd "$(dirname "$COMPOSE_FILE")" && basename "$(pwd)")"
100+
echo ""
101+
echo "To connect monitoring to this cluster's network:"
102+
echo " DGRAPH_NETWORK=${PROJECT_DIR}_default \\"
103+
echo " docker compose -f docker-compose.yml -f docker-compose.network.yml up -d"

0 commit comments

Comments
 (0)