Jepsen Scheduled Stress Test #145
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| on: | |
| schedule: | |
| - cron: '0 */6 * * *' | |
| workflow_dispatch: | |
| inputs: | |
| time-limit: | |
| description: "Workload runtime seconds" | |
| required: false | |
| default: "300" | |
| rate: | |
| description: "Ops/sec per worker" | |
| required: false | |
| default: "5" | |
| concurrency: | |
| description: "Number of worker threads (must be multiple of 4 for S3)" | |
| required: false | |
| default: "4" | |
| key-count: | |
| description: "Number of distinct keys per workload" | |
| required: false | |
| default: "8" | |
| max-writes-per-key: | |
| description: "Maximum writes per key before exhaustion" | |
| required: false | |
| default: "150" | |
| max-txn-length: | |
| description: "Maximum micro-ops per transaction" | |
| required: false | |
| default: "4" | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }}-jepsen-scheduled | |
| name: Jepsen Scheduled Stress Test | |
| permissions: | |
| contents: read | |
| jobs: | |
| test: | |
| runs-on: ubuntu-latest | |
| env: | |
| GOCACHE: /tmp/go-build | |
| steps: | |
| - uses: actions/checkout@v6 | |
| with: | |
| submodules: recursive | |
| - uses: actions/setup-java@v5 | |
| with: | |
| distribution: temurin | |
| java-version: '21' | |
| - uses: actions/setup-go@v6 | |
| with: | |
| go-version-file: 'go.mod' | |
| - name: Install netcat and graphviz | |
| run: sudo apt-get update && sudo apt-get install -y netcat-openbsd graphviz | |
| - name: Install Leiningen | |
| run: | | |
| curl -L https://raw.githubusercontent.com/technomancy/leiningen/stable/bin/lein > ~/lein | |
| chmod +x ~/lein | |
| ~/lein version | |
| - name: Pre-fetch Go modules | |
| run: | | |
| mkdir -p "$GOCACHE" /tmp/go-tmp | |
| GOPATH=$(go env GOPATH) | |
| export GOCACHE GOTMPDIR=/tmp/go-tmp | |
| go mod download | |
| - name: Run Jepsen unit tests | |
| working-directory: jepsen | |
| run: ~/lein test | |
| - name: Launch demo cluster | |
| run: | | |
| set -euo pipefail | |
| mkdir -p "$GOCACHE" /tmp/go-tmp | |
| export GOTMPDIR=/tmp/go-tmp | |
| nohup go run cmd/server/demo.go > /tmp/elastickv-demo.log 2>&1 & | |
| echo $! > /tmp/elastickv-demo.pid | |
| echo "Waiting for redis (63791-63793), dynamo (63801-63803), and s3 (63901-63903) listeners..." | |
| for i in {1..90}; do | |
| if nc -z 127.0.0.1 63791 && nc -z 127.0.0.1 63792 && nc -z 127.0.0.1 63793 \ | |
| && nc -z 127.0.0.1 63801 && nc -z 127.0.0.1 63802 && nc -z 127.0.0.1 63803 \ | |
| && nc -z 127.0.0.1 63901 && nc -z 127.0.0.1 63902 && nc -z 127.0.0.1 63903; then | |
| echo "Cluster is up" | |
| exit 0 | |
| fi | |
| sleep 1 | |
| done | |
| echo "Demo cluster failed to start; dumping log:" | |
| tail -n 200 /tmp/elastickv-demo.log || true | |
| exit 1 | |
| - name: Run Redis Jepsen workload against elastickv | |
| working-directory: jepsen | |
| timeout-minutes: 10 | |
| run: | | |
| timeout 480 ~/lein run -m elastickv.redis-workload \ | |
| --time-limit ${{ inputs.time-limit || '150' }} \ | |
| --rate ${{ inputs.rate || '10' }} \ | |
| --concurrency ${{ inputs.concurrency || '8' }} \ | |
| --key-count ${{ inputs.key-count || '16' }} \ | |
| --max-writes-per-key ${{ inputs.max-writes-per-key || '250' }} \ | |
| --max-txn-length ${{ inputs.max-txn-length || '4' }} \ | |
| --ports 63791,63792,63793 \ | |
| --host 127.0.0.1 | |
| - name: Run DynamoDB Jepsen workload against elastickv | |
| working-directory: jepsen | |
| timeout-minutes: 10 | |
| run: | | |
| timeout 480 ~/lein run -m elastickv.dynamodb-workload --local \ | |
| --time-limit ${{ inputs.time-limit || '150' }} \ | |
| --rate ${{ inputs.rate || '10' }} \ | |
| --concurrency ${{ inputs.concurrency || '8' }} \ | |
| --key-count ${{ inputs.key-count || '16' }} \ | |
| --max-writes-per-key ${{ inputs.max-writes-per-key || '250' }} \ | |
| --max-txn-length ${{ inputs.max-txn-length || '4' }} \ | |
| --dynamo-ports 63801,63802,63803 \ | |
| --host 127.0.0.1 | |
| - name: Run DynamoDB per-type Jepsen workloads against elastickv | |
| working-directory: jepsen | |
| # The per-type sweep is a coverage check across all 10 attribute | |
| # types, not the deep stress run — it uses its own shorter | |
| # time-limit so the 10-type loop fits comfortably inside the job | |
| # timeout regardless of the workflow_dispatch time-limit input. | |
| # The per-invocation `timeout` is derived from TYPE_TL + buffer | |
| # so bumping TYPE_TL never races against the outer timeout. | |
| timeout-minutes: 30 | |
| env: | |
| # Per-type sweep is a coverage check, not the deep stress run, so | |
| # it uses its own shorter runtime and smaller history density than | |
| # the parent dynamodb-workload step. Keeping per-key ops modest | |
| # also keeps Knossos's linearizability analysis inside its | |
| # time budget (dense histories cause :valid? :unknown verdicts). | |
| TYPE_TL: "60" | |
| TYPE_CONCURRENCY: "4" | |
| TYPE_KEY_COUNT: "8" | |
| TYPE_MAX_WRITES: "80" | |
| run: | | |
| # Run every type independently: one failure does not stop | |
| # the sweep so the final summary shows which specific types | |
| # passed/failed. The step still fails if any type failed. | |
| PER_TYPE_TIMEOUT=$((TYPE_TL + 180)) | |
| declare -A RESULT | |
| FAILED=() | |
| for t in string number binary bool null string-set number-set binary-set list map; do | |
| echo "::group::value-type=${t}" | |
| set +e | |
| timeout "${PER_TYPE_TIMEOUT}" ~/lein run -m elastickv.dynamodb-types-workload --local \ | |
| --time-limit "${TYPE_TL}" \ | |
| --rate ${{ inputs.rate || '5' }} \ | |
| --concurrency "${TYPE_CONCURRENCY}" \ | |
| --key-count "${TYPE_KEY_COUNT}" \ | |
| --max-writes-per-key "${TYPE_MAX_WRITES}" \ | |
| --value-type "${t}" \ | |
| --dynamo-ports 63801,63802,63803 \ | |
| --host 127.0.0.1 | |
| rc=$? | |
| set -e | |
| if [ "$rc" -eq 0 ]; then | |
| RESULT[$t]="pass" | |
| else | |
| RESULT[$t]="fail(${rc})" | |
| FAILED+=("$t") | |
| fi | |
| echo "::endgroup::" | |
| done | |
| echo | |
| echo "=== per-type jepsen summary ===" | |
| for t in string number binary bool null string-set number-set binary-set list map; do | |
| printf ' %-12s %s\n' "$t" "${RESULT[$t]}" | |
| done | |
| if [ ${#FAILED[@]} -ne 0 ]; then | |
| echo "FAILED types: ${FAILED[*]}" | |
| exit 1 | |
| fi | |
| - name: Upload Jepsen store on failure | |
| if: failure() | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: jepsen-store-types | |
| path: jepsen/store | |
| retention-days: 7 | |
| - name: Run S3 Jepsen workload against elastickv | |
| working-directory: jepsen | |
| timeout-minutes: 10 | |
| run: | | |
| timeout 480 ~/lein run -m elastickv.s3-workload --local \ | |
| --time-limit ${{ inputs.time-limit || '150' }} \ | |
| --rate ${{ inputs.rate || '10' }} \ | |
| --concurrency ${{ inputs.concurrency || '8' }} \ | |
| --key-count ${{ inputs.key-count || '16' }} \ | |
| --max-writes-per-key ${{ inputs.max-writes-per-key || '250' }} \ | |
| --threads-per-key 4 \ | |
| --s3-ports 63901,63902,63903 \ | |
| --host 127.0.0.1 | |
| - name: Dump demo cluster log on failure | |
| if: failure() | |
| run: tail -n 500 /tmp/elastickv-demo.log || true | |
| - name: Stop demo cluster | |
| if: always() | |
| run: | | |
| if [ -f /tmp/elastickv-demo.pid ]; then | |
| pid=$(cat /tmp/elastickv-demo.pid) | |
| kill "$pid" 2>/dev/null || true | |
| wait "$pid" 2>/dev/null || true | |
| fi |