Skip to content

Commit 2fee983

Browse files
authored
feat: add E2E smoke tests for core observability stack (opensearch-project#96)
* feat: add E2E smoke tests for core observability stack\n\nAdds a test script and GitHub Actions workflow that:\n- Starts the core stack via docker compose\n- Health checks OpenSearch, OTel Collector, Prometheus, and Dashboards\n- Sends a test trace through the OTLP pipeline\n- Tears down cleanly on exit Signed-off-by: Kyle Hounslow <kylhouns@amazon.com> * ci: add temporary push trigger for testing e2e workflow Signed-off-by: Kyle Hounslow <kylhouns@amazon.com> * fix: parse .env safely instead of sourcing it Signed-off-by: Kyle Hounslow <kylhouns@amazon.com> * feat: verify test trace lands in OpenSearch after ingestion Signed-off-by: Kyle Hounslow <kylhouns@amazon.com> * fix: broaden trace search across index patterns and field names Signed-off-by: Kyle Hounslow <kylhouns@amazon.com> * chore: bump trace verification timeout to 90s, remove temp push trigger Signed-off-by: Kyle Hounslow <kylhouns@amazon.com> * docs: add E2E tests status badge to README Signed-off-by: Kyle Hounslow <kylhouns@amazon.com> --------- Signed-off-by: Kyle Hounslow <kylhouns@amazon.com>
1 parent 20e6921 commit 2fee983

3 files changed

Lines changed: 134 additions & 0 deletions

File tree

.github/workflows/e2e.yml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
name: E2E Tests
2+
3+
on:
4+
pull_request:
5+
branches: [main]
6+
paths-ignore:
7+
- 'docs/**'
8+
- '*.md'
9+
workflow_dispatch:
10+
11+
jobs:
12+
e2e:
13+
runs-on: ubuntu-latest
14+
timeout-minutes: 15
15+
steps:
16+
- uses: actions/checkout@v4
17+
18+
- name: Run E2E tests
19+
run: ./test/e2e.sh
20+
21+
- name: Dump logs on failure
22+
if: failure()
23+
run: docker compose logs --tail=50

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
# 🔭 OpenSearch Observability Stack
44

5+
[![E2E Tests](https://github.com/opensearch-project/observability-stack/actions/workflows/e2e.yml/badge.svg)](https://github.com/opensearch-project/observability-stack/actions/workflows/e2e.yml)
6+
57
Observability Stack is an open-source stack designed for modern distributed systems. Built on OpenTelemetry, OpenSearch, and Prometheus, Observability Stack provides a complete, pre-configured infrastructure for monitoring microservices, web applications, and AI agents—with first-class support for agent observability through [OpenTelemetry Gen-AI Semantic Conventions](https://opentelemetry.io/docs/specs/semconv/gen-ai/).
68

79
![OpenSearch Observability Stack Architecture - docker-compose](./docs/observability-stack-arch-compose.excalidraw.png)

test/e2e.sh

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
5+
PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
6+
COMPOSE_FILE="$PROJECT_DIR/docker-compose.yml"
7+
WAIT_TIMEOUT="${WAIT_TIMEOUT:-180}"
8+
9+
# Override to only run core stack (no examples/demo)
10+
export INCLUDE_COMPOSE_EXAMPLES=docker-compose/util/docker-compose.empty.yml
11+
export INCLUDE_COMPOSE_OTEL_DEMO=docker-compose/util/docker-compose.empty.yml
12+
13+
cleanup() {
14+
echo "==> Tearing down..."
15+
docker compose -f "$COMPOSE_FILE" --project-directory "$PROJECT_DIR" down -v --remove-orphans 2>/dev/null || true
16+
}
17+
trap cleanup EXIT
18+
19+
echo "==> Starting observability stack..."
20+
docker compose -f "$COMPOSE_FILE" --project-directory "$PROJECT_DIR" up -d --wait --wait-timeout "$WAIT_TIMEOUT"
21+
22+
# Parse .env safely (don't source — some values aren't shell-safe)
23+
eval "$(grep -E '^(OPENSEARCH_USER|OPENSEARCH_PASSWORD|OPENSEARCH_PORT|OPENSEARCH_DASHBOARDS_PORT|OTEL_COLLECTOR_PORT_HTTP|PROMETHEUS_PORT)=' "$PROJECT_DIR/.env")"
24+
25+
OPENSEARCH_URL="https://localhost:${OPENSEARCH_PORT}"
26+
CURL_OPTS=(-s -k -u "${OPENSEARCH_USER}:${OPENSEARCH_PASSWORD}")
27+
28+
echo "==> Checking OpenSearch cluster health..."
29+
health=$(curl "${CURL_OPTS[@]}" "$OPENSEARCH_URL/_cluster/health" | sed -n 's/.*"status":"\([^"]*\)".*/\1/p')
30+
if [[ "$health" == "red" ]]; then
31+
echo "FAIL: OpenSearch cluster health is red"
32+
exit 1
33+
fi
34+
echo " OpenSearch cluster health: $health"
35+
36+
echo "==> Checking OTel Collector is accepting OTLP..."
37+
otel_status=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:${OTEL_COLLECTOR_PORT_HTTP}/v1/traces" \
38+
-H "Content-Type: application/json" \
39+
-d '{"resourceSpans":[]}')
40+
if [[ "$otel_status" != "200" ]]; then
41+
echo "FAIL: OTel Collector OTLP HTTP endpoint returned $otel_status"
42+
exit 1
43+
fi
44+
echo " OTel Collector OTLP HTTP: OK"
45+
46+
echo "==> Checking Prometheus is up..."
47+
prom_status=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:${PROMETHEUS_PORT}/-/healthy")
48+
if [[ "$prom_status" != "200" ]]; then
49+
echo "FAIL: Prometheus health check returned $prom_status"
50+
exit 1
51+
fi
52+
echo " Prometheus: OK"
53+
54+
echo "==> Checking OpenSearch Dashboards is up..."
55+
dashboards_status=$(curl -s -o /dev/null -w "%{http_code}" -u "${OPENSEARCH_USER}:${OPENSEARCH_PASSWORD}" \
56+
"http://localhost:${OPENSEARCH_DASHBOARDS_PORT}/api/status")
57+
if [[ "$dashboards_status" != "200" ]]; then
58+
echo "FAIL: OpenSearch Dashboards returned $dashboards_status"
59+
exit 1
60+
fi
61+
echo " OpenSearch Dashboards: OK"
62+
63+
echo "==> Sending test trace through OTel Collector..."
64+
trace_response=$(curl -s -w "\n%{http_code}" "http://localhost:${OTEL_COLLECTOR_PORT_HTTP}/v1/traces" \
65+
-H "Content-Type: application/json" \
66+
-d '{
67+
"resourceSpans": [{
68+
"resource": {"attributes": [{"key": "service.name", "value": {"stringValue": "e2e-test"}}]},
69+
"scopeSpans": [{
70+
"spans": [{
71+
"traceId": "5b8efff798038103d269b633813fc60c",
72+
"spanId": "eee19b7ec3c1b174",
73+
"name": "e2e-test-span",
74+
"kind": 1,
75+
"startTimeUnixNano": "1000000000",
76+
"endTimeUnixNano": "2000000000",
77+
"status": {}
78+
}]
79+
}]
80+
}]
81+
}')
82+
trace_status=$(echo "$trace_response" | tail -1)
83+
if [[ "$trace_status" != "200" ]]; then
84+
echo "FAIL: Sending test trace returned $trace_status"
85+
exit 1
86+
fi
87+
echo " Test trace sent: OK"
88+
89+
echo "==> Verifying trace landed in OpenSearch..."
90+
TRACE_ID="5b8efff798038103d269b633813fc60c"
91+
MAX_RETRIES=90
92+
for i in $(seq 1 "$MAX_RETRIES"); do
93+
hits=$(curl "${CURL_OPTS[@]}" "$OPENSEARCH_URL/*span*,*trace*/_search" \
94+
-H "Content-Type: application/json" \
95+
-d "{\"query\":{\"bool\":{\"should\":[{\"term\":{\"traceId\":\"$TRACE_ID\"}},{\"term\":{\"traceID\":\"$TRACE_ID\"}}]}}}" \
96+
| sed -n 's/.*"total":{"value":\([0-9]*\).*/\1/p')
97+
if [[ "$hits" -gt 0 ]]; then
98+
echo " Trace found in OpenSearch after ${i}s"
99+
break
100+
fi
101+
if [[ "$i" -eq "$MAX_RETRIES" ]]; then
102+
echo "FAIL: Trace not found in OpenSearch after ${MAX_RETRIES}s"
103+
exit 1
104+
fi
105+
sleep 1
106+
done
107+
108+
echo ""
109+
echo "==> All E2E checks passed!"

0 commit comments

Comments
 (0)