forked from opensearch-project/observability-stack
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
313 lines (301 loc) · 12.6 KB
/
docker-compose.yml
File metadata and controls
313 lines (301 loc) · 12.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
# Observability Stack
# Docker Compose configuration for local development
# WARNING: This configuration is for development/testing only - not production-ready
name: observability-stack
include:
- path: ${INCLUDE_COMPOSE_EXAMPLES:-docker-compose/util/docker-compose.empty.yml}
- path: ${INCLUDE_COMPOSE_OTEL_DEMO:-docker-compose/util/docker-compose.empty.yml}
- path: ${INCLUDE_COMPOSE_AGENT_EVAL_LLM:-docker-compose/util/docker-compose.empty.yml}
- path: ${INCLUDE_COMPOSE_LOCAL_OPENSEARCH:-docker-compose/util/docker-compose.empty.yml}
- path: ${INCLUDE_COMPOSE_LOCAL_OPENSEARCH_DASHBOARDS:-docker-compose/util/docker-compose.empty.yml}
- path: ${INCLUDE_COMPOSE_COMPAT:-docker-compose/util/docker-compose.empty.yml}
x-default-logging: &logging
driver: "json-file"
options:
max-size: "5m"
max-file: "2"
tag: "{{.Name}}"
networks:
observability-stack-network:
name: observability-stack-network
driver: bridge
volumes:
prometheus-data:
driver: local
alertmanager-data:
driver: local
services:
# OpenTelemetry Collector - Receives telemetry data via OTLP protocol
otel-collector:
image: otel/opentelemetry-collector-contrib:${OTEL_COLLECTOR_VERSION}
container_name: otel-collector
pull_policy: always
command: ["--config=/etc/otelcol-config.yml"]
volumes:
- ./docker-compose/otel-collector/config.yaml:/etc/otelcol-config.yml
ports:
# OTLP gRPC receiver - high-performance binary protocol
- "${OTEL_COLLECTOR_PORT_GRPC}:4317"
# OTLP HTTP receiver - easier debugging and browser compatibility
- "${OTEL_COLLECTOR_PORT_HTTP}:4318"
# Metrics endpoint for collector self-monitoring
- "${OTEL_COLLECTOR_METRICS_PORT}:8888"
networks:
- observability-stack-network
restart: unless-stopped
deploy:
resources:
limits:
memory: ${OTEL_COLLECTOR_MEMORY_LIMIT}
environment:
- OTEL_COLLECTOR_HOST=${OTEL_COLLECTOR_HOST}
- OTEL_COLLECTOR_PORT_GRPC=${OTEL_COLLECTOR_PORT_GRPC}
- OTEL_COLLECTOR_PORT_HTTP=${OTEL_COLLECTOR_PORT_HTTP}
- OPENSEARCH_HOST=${OPENSEARCH_HOST}
- OPENSEARCH_PORT=${OPENSEARCH_PORT}
- GOMEMLIMIT=160MiB
depends_on:
opensearch:
condition: service_healthy
required: false
logging: *logging
# Data Prepper - Transforms and enriches logs/traces before OpenSearch ingestion
data-prepper:
image: ${DATA_PREPPER_DOCKER_REPO}/${DATA_PREPPER_IMAGE}:${DATA_PREPPER_VERSION}
container_name: data-prepper
pull_policy: always
platform: linux/amd64
command: >
/bin/bash -c "
cp /tmp/pipelines.template.yaml /tmp/pipelines.yaml &&
chmod +w /tmp/pipelines.yaml &&
sed -i 's|OPENSEARCH_USER|${OPENSEARCH_USER}|g' /tmp/pipelines.yaml &&
sed -i 's|OPENSEARCH_PASSWORD|${OPENSEARCH_PASSWORD}|g' /tmp/pipelines.yaml &&
sed -i 's|OPENSEARCH_PROTOCOL|${OPENSEARCH_PROTOCOL}|g' /tmp/pipelines.yaml &&
sed -i 's|OPENSEARCH_HOST|${OPENSEARCH_HOST}|g' /tmp/pipelines.yaml &&
sed -i 's|OPENSEARCH_PORT|${OPENSEARCH_PORT}|g' /tmp/pipelines.yaml &&
sed -i 's|PROMETHEUS_HOST|${PROMETHEUS_HOST}|g' /tmp/pipelines.yaml &&
sed -i 's|PROMETHEUS_PORT|${PROMETHEUS_PORT}|g' /tmp/pipelines.yaml &&
mv /tmp/pipelines.yaml /usr/share/data-prepper/pipelines/pipelines.yaml &&
exec /usr/share/data-prepper/bin/data-prepper"
volumes:
- ./docker-compose/data-prepper/pipelines.template.yaml:/tmp/pipelines.template.yaml
- ./docker-compose/data-prepper/data-prepper-config.yaml:/usr/share/data-prepper/config/data-prepper-config.yaml
ports:
# OTLP gRPC receiver from OpenTelemetry Collector
- "${DATA_PREPPER_OTLP_PORT}:21890"
# OTLP HTTP receiver
- "${DATA_PREPPER_HTTP_PORT}:21892"
environment:
- OPENSEARCH_HOST=${OPENSEARCH_HOST}
- OPENSEARCH_PORT=${OPENSEARCH_PORT}
- OPENSEARCH_USER=${OPENSEARCH_USER}
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
- PROMETHEUS_HOST=${PROMETHEUS_HOST}
- PROMETHEUS_PORT=${PROMETHEUS_PORT}
depends_on:
opensearch:
condition: service_healthy
required: false
networks:
- observability-stack-network
restart: unless-stopped
deploy:
resources:
limits:
memory: ${DATA_PREPPER_MEMORY_LIMIT}
logging: *logging
# Cortex - Prometheus-compatible metrics backend with Ruler + Alertmanager APIs
# Replaces vanilla Prometheus to expose the full Prometheus HTTP API surface
# (query, ruler, alertmanager) at a single endpoint for OpenSearch Dashboards.
# The service name is kept as "prometheus" so PROMETHEUS_HOST/PORT in .env
# continue to work everywhere without changes.
prometheus:
image: cortexproject/cortex:${CORTEX_VERSION}
container_name: prometheus
# One-time cleanup shim: if this is the first boot of Cortex on a volume
# that still has vanilla-Prometheus artifacts (/data/chunks_head with no
# /data/tsdb), remove them before starting Cortex so the volume isn't
# polluted with dormant dirs Cortex never reads. Skips on fresh deploys
# (chunks_head absent) and on subsequent restarts (tsdb present).
entrypoint:
- /bin/sh
- -c
- |
if [ ! -d /data/tsdb ] && [ -d /data/chunks_head ]; then
echo "First boot after upgrade from vanilla Prometheus — removing stale TSDB artifacts"
rm -rf /data/chunks_head /data/wal /data/wbl /data/lock /data/queries.active
fi
exec /bin/cortex "$$@"
- --
command:
- '-config.file=/etc/cortex/cortex.yaml'
# Cortex retention mirrors PROMETHEUS_RETENTION. Compactor deletes
# blocks whose max-time is older than this; set to 0 to disable.
- '-compactor.blocks-retention-period=${PROMETHEUS_RETENTION}'
volumes:
- ./docker-compose/cortex/cortex.yaml:/etc/cortex/cortex.yaml:ro
- prometheus-data:/data
ports:
- "${PROMETHEUS_PORT}:9090"
networks:
- observability-stack-network
restart: unless-stopped
deploy:
resources:
limits:
memory: ${PROMETHEUS_MEMORY_LIMIT}
healthcheck:
test: ["CMD", "wget", "--tries=1", "--spider", "-q", "http://localhost:9090/ready"]
start_period: 30s
interval: 5s
timeout: 5s
retries: 20
logging: *logging
# Prometheus Alertmanager - Alert routing, grouping, deduplication, and silencing.
# Runs whether or not the otel-demo is enabled: the base stack rules (collector
# health, scrape-target health) alert into it, and demo rules alert in when the
# demo overlay is enabled too. The OSD Prometheus datasource's alertmanager.uri
# points at this service's HTTP API.
alertmanager:
image: prom/alertmanager:${ALERTMANAGER_VERSION}
container_name: alertmanager
pull_policy: always
entrypoint: /bin/sh
command:
- -c
- |
cp /tmp/alertmanager.template.yml /tmp/alertmanager.yml &&
sed -i 's|OPENSEARCH_USER|'$$OPENSEARCH_USER'|g' /tmp/alertmanager.yml &&
sed -i 's|OPENSEARCH_PASSWORD|'$$OPENSEARCH_PASSWORD'|g' /tmp/alertmanager.yml &&
exec /bin/alertmanager \
--config.file=/tmp/alertmanager.yml \
--storage.path=/alertmanager \
--web.listen-address=:9093
volumes:
- ./docker-compose/alertmanager/alertmanager.template.yml:/tmp/alertmanager.template.yml:ro
- alertmanager-data:/alertmanager
ports:
- "${ALERTMANAGER_PORT}:9093"
environment:
- OPENSEARCH_USER=${OPENSEARCH_USER}
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
networks:
- observability-stack-network
restart: unless-stopped
deploy:
resources:
limits:
memory: ${ALERTMANAGER_MEMORY_LIMIT}
healthcheck:
test: ["CMD", "wget", "--tries=1", "--spider", "-q", "http://localhost:9093/-/healthy"]
interval: 10s
timeout: 5s
retries: 10
logging: *logging
# Alerting Rules + Monitors Init - One-shot loader for stack-wide alerting:
# 1. Cortex ruler rules from /rules/stack (Prometheus rule groups)
# 2. OpenSearch alerting monitors that watch the stack itself (cluster health, etc)
# Both stages are idempotent. The container runs both regardless of each
# other's outcome and exits non-zero if either failed, so a partial failure
# is visible but doesn't block the working half. The otel-demo overlay
# defines a sibling container that loads its own rules/monitors.
alerting-rules-monitors-init:
image: python:3.11-alpine
container_name: alerting-rules-monitors-init
command:
- sh
- -c
- |
pip install requests pyyaml || exit 1
fail=0
echo "=== Loading Cortex rules ==="
python /init-cortex-rules.py || fail=1
echo "=== Creating OpenSearch monitors ==="
python /init-stack-monitors.py || fail=1
exit $$fail
depends_on:
prometheus:
condition: service_healthy
opensearch:
condition: service_healthy
environment:
- OPENSEARCH_USER=${OPENSEARCH_USER}
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
volumes:
- ./docker-compose/cortex/init-cortex-rules.py:/init-cortex-rules.py
- ./docker-compose/prometheus/rules-stack:/rules/stack:ro
- ./docker-compose/opensearch-dashboards/init/init-stack-monitors.py:/init-stack-monitors.py
networks:
- observability-stack-network
restart: "no"
logging: *logging
# OpenSearch Prometheus Exporter - Exposes OpenSearch metrics for Prometheus scraping
opensearch-exporter:
image: prometheuscommunity/elasticsearch-exporter:v1.10.0
container_name: opensearch-exporter
command:
- --es.uri=${OPENSEARCH_PROTOCOL}://${OPENSEARCH_HOST}:${OPENSEARCH_PORT}
- --es.ssl-skip-verify
- --es.all
- --es.indices
- --es.shards
environment:
- ES_USERNAME=${OPENSEARCH_USER}
- ES_PASSWORD=${OPENSEARCH_PASSWORD}
ports:
- "9114:9114"
networks:
- observability-stack-network
depends_on:
opensearch:
condition: service_healthy
required: false
restart: unless-stopped
deploy:
resources:
limits:
memory: 128M
logging: *logging
# OpenSearch Dashboards Initialization - Creates workspace, index patterns, and saved queries
opensearch-dashboards-init:
image: python:3.11-alpine
container_name: opensearch-dashboards-init
command: sh -c "pip install requests pyyaml && python /init.py"
environment:
- OPENSEARCH_USER=${OPENSEARCH_USER}
- OPENSEARCH_PASSWORD=${OPENSEARCH_PASSWORD}
- OPENSEARCH_HOST=${OPENSEARCH_HOST}
- OPENSEARCH_PORT=${OPENSEARCH_PORT}
- OPENSEARCH_PROTOCOL=${OPENSEARCH_PROTOCOL}
# Overrides the endpoint stored on the seeded `local_cluster` data-source
# saved object. Blank default uses the intra-network hostname, which is
# correct when OpenSearch Dashboards runs inside the compose network.
# Set in `.env` (e.g. `https://localhost:9200`) when running OSD on the
# host, since the host process cannot resolve the `opensearch` service
# name — any MDS-scoped OSD feature dialing this SO's endpoint would
# otherwise fail with `getaddrinfo ENOTFOUND opensearch`.
- OSD_DATASOURCE_ENDPOINT=${OSD_DATASOURCE_ENDPOINT:-}
- OPENSEARCH_DASHBOARDS_HOST=${OPENSEARCH_DASHBOARDS_HOST}
- OPENSEARCH_DASHBOARDS_PORT=${OPENSEARCH_DASHBOARDS_PORT}
- OPENSEARCH_DASHBOARDS_PROTOCOL=${OPENSEARCH_DASHBOARDS_PROTOCOL}
- PROMETHEUS_HOST=${PROMETHEUS_HOST}
- PROMETHEUS_PORT=${PROMETHEUS_PORT}
# alertmanager.uri is set on the Prometheus datasource unconditionally.
# Alertmanager now runs always (defined in docker-compose.yml, not the
# otel-demo overlay), so this URI is always valid.
- ALERTMANAGER_HOST=alertmanager
- ALERTMANAGER_PORT=${ALERTMANAGER_PORT}
- ISM_RETENTION_DAYS=${ISM_RETENTION_DAYS:-7}
volumes:
- ./docker-compose/opensearch-dashboards/init/init-opensearch-dashboards.py:/init.py
- ./docker-compose/opensearch-dashboards/saved-queries-traces.yaml:/config/saved-queries-traces.yaml
- ./docker-compose/opensearch-dashboards/saved-queries-metrics.yaml:/config/saved-queries-metrics.yaml
- ./docker-compose/opensearch-dashboards/dashboard-opensearch-health.yaml:/config/dashboard-opensearch-health.yaml
- ./docker-compose/opensearch-dashboards/dashboard-pipeline-health.yaml:/config/dashboard-pipeline-health.yaml
- ./docker-compose/opensearch-dashboards/init/architecture.png:/config/architecture.png
- ./docker-compose/opensearch-dashboards/init/dashboard-astronomy-shop.ndjson:/config/dashboard-astronomy-shop.ndjson
networks:
- observability-stack-network
restart: "no"
logging: *logging