backup: snapshot_reader (Phase 0a foundation for snapshot-decode binary) #3520
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| on: | |
| push: | |
| workflow_dispatch: | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }}-jepsen-test | |
| name: Jepsen Test | |
| permissions: | |
| contents: read | |
| jobs: | |
| test: | |
| runs-on: ubuntu-latest | |
| env: | |
| GOCACHE: /tmp/go-build | |
| steps: | |
| - uses: actions/checkout@v6 | |
| with: | |
| submodules: recursive | |
| - uses: actions/setup-java@v5 | |
| with: | |
| distribution: temurin | |
| java-version: '21' | |
| - uses: actions/setup-go@v6 | |
| with: | |
| go-version-file: 'go.mod' | |
| - name: Install netcat and graphviz | |
| run: sudo apt-get update && sudo apt-get install -y netcat-openbsd graphviz | |
| - name: Install Leiningen | |
| run: | | |
| curl -L https://raw.githubusercontent.com/technomancy/leiningen/stable/bin/lein > ~/lein | |
| chmod +x ~/lein | |
| ~/lein version | |
| # See jepsen-test-scheduled.yml for the rationale: Maven Central | |
| # 429s during peak hours have been knocking the scheduled stress | |
| # run out, and the per-push run uses the same dependency set. | |
| - name: Cache Maven and Leiningen artifacts | |
| uses: actions/cache@v5 | |
| with: | |
| path: | | |
| ~/.m2/repository | |
| ~/.lein | |
| key: ${{ runner.os }}-maven-${{ hashFiles('jepsen/project.clj') }} | |
| restore-keys: | | |
| ${{ runner.os }}-maven- | |
| - name: Pre-fetch Go modules and build binary | |
| run: | | |
| mkdir -p "$GOCACHE" /tmp/go-tmp | |
| GOPATH=$(go env GOPATH) | |
| export GOCACHE GOTMPDIR=/tmp/go-tmp | |
| go mod download | |
| go build -o /tmp/elastickv-bin . | |
| - name: Warm Leiningen Maven cache | |
| working-directory: jepsen | |
| run: | | |
| set -uo pipefail | |
| n=0 | |
| max=3 | |
| until ~/lein deps; do | |
| n=$((n + 1)) | |
| if [ "$n" -ge "$max" ]; then | |
| echo "lein deps failed after $n attempts" >&2 | |
| exit 1 | |
| fi | |
| sleep_secs=$((n * 30)) | |
| echo "lein deps failed (attempt $n/$max), retrying in ${sleep_secs}s..." >&2 | |
| sleep "$sleep_secs" | |
| done | |
| - name: Run Jepsen unit tests | |
| working-directory: jepsen | |
| run: ~/lein test | |
| - name: Launch etcd-backed cluster | |
| run: | | |
| set -euo pipefail | |
| mkdir -p /tmp/elastickv-ci | |
| BOOTSTRAP_MEMBERS="n1=127.0.0.1:50051,n2=127.0.0.1:50052,n3=127.0.0.1:50053" | |
| RAFT_REDIS_MAP="127.0.0.1:50051=127.0.0.1:63791,127.0.0.1:50052=127.0.0.1:63792,127.0.0.1:50053=127.0.0.1:63793" | |
| RAFT_S3_MAP="127.0.0.1:50051=127.0.0.1:63901,127.0.0.1:50052=127.0.0.1:63902,127.0.0.1:50053=127.0.0.1:63903" | |
| RAFT_DYNAMO_MAP="127.0.0.1:50051=127.0.0.1:63801,127.0.0.1:50052=127.0.0.1:63802,127.0.0.1:50053=127.0.0.1:63803" | |
| RAFT_SQS_MAP="127.0.0.1:50051=127.0.0.1:63501,127.0.0.1:50052=127.0.0.1:63502,127.0.0.1:50053=127.0.0.1:63503" | |
| : > /tmp/elastickv-demo.pid | |
| for node in 1 2 3; do | |
| nohup /tmp/elastickv-bin \ | |
| --address "127.0.0.1:5005${node}" \ | |
| --redisAddress "127.0.0.1:6379${node}" \ | |
| --dynamoAddress "127.0.0.1:6380${node}" \ | |
| --s3Address "127.0.0.1:6390${node}" \ | |
| --sqsAddress "127.0.0.1:6350${node}" \ | |
| --metricsAddress "" \ | |
| --pprofAddress "" \ | |
| --raftId "n${node}" \ | |
| --raftDataDir "/tmp/elastickv-ci/n${node}" \ | |
| --raftBootstrapMembers "$BOOTSTRAP_MEMBERS" \ | |
| --raftRedisMap "$RAFT_REDIS_MAP" \ | |
| --raftS3Map "$RAFT_S3_MAP" \ | |
| --raftDynamoMap "$RAFT_DYNAMO_MAP" \ | |
| --raftSqsMap "$RAFT_SQS_MAP" \ | |
| > "/tmp/elastickv-demo-n${node}.log" 2>&1 & | |
| echo $! >> /tmp/elastickv-demo.pid | |
| done | |
| echo "Waiting for redis (63791-63793), dynamo (63801-63803), s3 (63901-63903), and sqs (63501-63503) listeners..." | |
| for i in {1..90}; do | |
| if nc -z 127.0.0.1 63791 && nc -z 127.0.0.1 63792 && nc -z 127.0.0.1 63793 \ | |
| && nc -z 127.0.0.1 63801 && nc -z 127.0.0.1 63802 && nc -z 127.0.0.1 63803 \ | |
| && nc -z 127.0.0.1 63901 && nc -z 127.0.0.1 63902 && nc -z 127.0.0.1 63903 \ | |
| && nc -z 127.0.0.1 63501 && nc -z 127.0.0.1 63502 && nc -z 127.0.0.1 63503; then | |
| echo "Cluster is up" | |
| exit 0 | |
| fi | |
| sleep 1 | |
| done | |
| echo "Demo cluster failed to start; dumping log:" | |
| tail -n 200 /tmp/elastickv-demo-n1.log || true | |
| tail -n 200 /tmp/elastickv-demo-n2.log || true | |
| tail -n 200 /tmp/elastickv-demo-n3.log || true | |
| exit 1 | |
| - name: Run Redis Jepsen workload against elastickv | |
| working-directory: jepsen | |
| timeout-minutes: 3 | |
| run: | | |
| timeout 120 ~/lein run -m elastickv.redis-workload --time-limit 5 --rate 5 --concurrency 5 --ports 63791,63792,63793 --host 127.0.0.1 | |
| - name: Run DynamoDB Jepsen workload against elastickv | |
| working-directory: jepsen | |
| timeout-minutes: 3 | |
| run: | | |
| timeout 120 ~/lein run -m elastickv.dynamodb-workload --local --time-limit 5 --rate 5 --concurrency 5 --dynamo-ports 63801,63802,63803 --host 127.0.0.1 | |
| - name: Run DynamoDB per-type Jepsen workloads against elastickv | |
| working-directory: jepsen | |
| timeout-minutes: 10 | |
| run: | | |
| # Run every type even if one fails, so the log shows which | |
| # specific attribute types pass and which fail. The step | |
| # still fails at the end if any single type failed. | |
| declare -A RESULT | |
| FAILED=() | |
| for t in string number binary bool null string-set number-set binary-set list map; do | |
| echo "::group::value-type=${t}" | |
| set +e | |
| timeout 120 ~/lein run -m elastickv.dynamodb-types-workload --local \ | |
| --time-limit 5 --rate 5 --concurrency 4 \ | |
| --value-type "${t}" \ | |
| --dynamo-ports 63801,63802,63803 --host 127.0.0.1 | |
| rc=$? | |
| set -e | |
| if [ "$rc" -eq 0 ]; then | |
| RESULT[$t]="pass" | |
| else | |
| RESULT[$t]="fail(${rc})" | |
| FAILED+=("$t") | |
| fi | |
| echo "::endgroup::" | |
| done | |
| echo | |
| echo "=== per-type jepsen summary ===" | |
| for t in string number binary bool null string-set number-set binary-set list map; do | |
| printf ' %-12s %s\n' "$t" "${RESULT[$t]}" | |
| done | |
| if [ ${#FAILED[@]} -ne 0 ]; then | |
| echo "FAILED types: ${FAILED[*]}" | |
| exit 1 | |
| fi | |
| - name: Upload Jepsen store on per-type failure | |
| if: failure() | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: jepsen-store-types | |
| path: jepsen/store | |
| retention-days: 7 | |
| - name: Run S3 Jepsen workload against elastickv | |
| working-directory: jepsen | |
| timeout-minutes: 3 | |
| run: | | |
| timeout 120 ~/lein run -m elastickv.s3-workload --local --time-limit 5 --rate 10 --concurrency 10 --s3-ports 63901,63902,63903 --host 127.0.0.1 | |
| - name: Run SQS HT-FIFO Jepsen workload against elastickv | |
| working-directory: jepsen | |
| # The HT-FIFO workload runs sends and receives across a 4-partition | |
| # FIFO queue with content-based deduplication. The custom checker | |
| # validates within-group ordering, no loss, and no duplicates. | |
| # See jepsen/src/elastickv/sqs_htfifo_workload.clj. | |
| # | |
| # --drain-time 15: in --local mode the nemesis is a no-op, so no | |
| # message can become invisible due to partition/kill — the 40s | |
| # default drain (which protects against fault-induced | |
| # visibility-timeout races) is overkill here. 15s leaves ample | |
| # headroom under the 120s shell timeout against JVM startup and | |
| # the 5s main phase. | |
| timeout-minutes: 3 | |
| run: | | |
| timeout 120 ~/lein run -m elastickv.sqs-htfifo-workload --local \ | |
| --time-limit 5 --rate 5 --concurrency 5 \ | |
| --partition-count 4 --group-count 6 \ | |
| --drain-time 15 \ | |
| --sqs-ports 63501,63502,63503 --host 127.0.0.1 | |
| - name: Stop demo cluster | |
| if: always() | |
| run: | | |
| if [ -f /tmp/elastickv-demo.pid ]; then | |
| while read -r pid; do | |
| kill "$pid" 2>/dev/null || true | |
| wait "$pid" 2>/dev/null || true | |
| done < /tmp/elastickv-demo.pid | |
| fi |