Skip to content

e2e Test - full-sweep --seq-lens 1k1k --model-prefix dsr1 --multi-node #158

e2e Test - full-sweep --seq-lens 1k1k --model-prefix dsr1 --multi-node

e2e Test - full-sweep --seq-lens 1k1k --model-prefix dsr1 --multi-node #158

Workflow file for this run

name: End-to-End Tests
run-name: e2e Test - ${{ github.event.inputs.generate-cli-command }}
on:
workflow_dispatch:
inputs:
generate-cli-command:
description: "Command passed to generate matrix script"
required: true
type: string
jobs:
get-jobs:
runs-on: ubuntu-latest
outputs:
search-space-config: ${{ steps.get-jobs.outputs.search-space-config }}
steps:
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
- id: get-jobs
run: |
pip install pydantic
CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py \
${{ inputs.generate-cli-command }} \
--runner-config .github/configs/runners.yaml \
--config-files .github/configs/nvidia-master.yaml .github/configs/amd-master.yaml)
echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT
test:
runs-on: ubuntu-latest
needs: get-jobs
steps:
- id: get-jobs
run: |
set -x
echo "${{fromJson(needs.get-jobs.outputs.search-space-config) }}"
echo "${{fromJson(needs.get-jobs.outputs.search-space-config)[0] }}"
echo "${{fromJson(needs.get-jobs.outputs.search-space-config)[0].prefill }}"
# test-sweep:
# needs: get-jobs
# uses: ./.github/workflows/benchmark-tmpl.yml
# name: ${{ inputs.generate-cli-command }}
# strategy:
# fail-fast: false
# matrix:
# config: ${{ fromJson(needs.get-jobs.outputs.search-space-config) }}
# secrets: inherit
# with:
# exp-name: ${{ matrix.config.exp-name }}
# isl: ${{ matrix.config.isl }}
# osl: ${{ matrix.config.osl }}
# max-model-len: ${{ matrix.config.max-model-len }}
# runner: ${{ matrix.config.runner }}
# image: ${{ matrix.config.image }}
# model: ${{ matrix.config.model }}
# framework: ${{ matrix.config.framework }}
# precision: ${{ matrix.config.precision }}
# tp: ${{ matrix.config.tp }}
# ep: ${{ matrix.config.ep }}
# dp-attn: ${{ matrix.config.dp-attn }}
# conc: ${{ matrix.config.conc }}
test-sweep-multi-node:
needs: get-jobs
if: ${{ needs.get-jobs.outputs.search-space-config != '[]' && fromJson(needs.get-jobs.outputs.search-space-config)[0].prefill != null }}
uses: ./.github/workflows/benchmark-multinode-tmpl.yml
name: ${{ inputs.generate-cli-command }} multi-node
strategy:
fail-fast: false
matrix:
config: ${{ fromJson(needs.get-jobs.outputs.search-space-config) }}
secrets: inherit
with:
isl: 1024
osl: 1024
max-model-len: 2048
runner: ${{ matrix.config.runner }}
image: ${{ matrix.config.image }}
model: ${{ matrix.config.model }}
framework: ${{ matrix.config.framework }}
precision: ${{ matrix.config.precision }}
exp-name: ${{ matrix.config.exp-name }}
conc-list: ${{ toJson(matrix.config.conc) }}
spec-decoding: ${{ matrix.config.spec-decoding }}
disagg: ${{ matrix.config.disagg }}
prefill-num-worker: ${{ matrix.config.prefill.num-worker }}
prefill-tp: ${{ matrix.config.prefill.tp }}
prefill-ep: ${{ matrix.config.prefill.ep }}
prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }}
prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }}
decode-num-worker: ${{ matrix.config.decode.num-worker }}
decode-tp: ${{ matrix.config.decode.tp }}
decode-ep: ${{ matrix.config.decode.ep }}
decode-dp-attn: ${{ matrix.config.decode.dp-attn }}
decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }}
test-sweep-single-node:
needs: get-jobs
if: ${{ needs.get-jobs.outputs.search-space-config != '[]' && fromJson(needs.get-jobs.outputs.search-space-config)[0].prefill == null }}
uses: ./.github/workflows/benchmark-tmpl.yml
name: ${{ inputs.generate-cli-command }} single-node
strategy:
fail-fast: false
matrix:
config: ${{ fromJson(needs.get-jobs.outputs.search-space-config) }}
secrets: inherit
with:
exp-name: ${{ matrix.config.exp-name }}
isl: ${{ matrix.config.isl }}
osl: ${{ matrix.config.osl }}
max-model-len: ${{ matrix.config.max-model-len }}
runner: ${{ matrix.config.runner }}
image: ${{ matrix.config.image }}
model: ${{ matrix.config.model }}
framework: ${{ matrix.config.framework }}
precision: ${{ matrix.config.precision }}
tp: ${{ matrix.config.tp }}
ep: ${{ matrix.config.ep }}
dp-attn: ${{ matrix.config.dp-attn }}
conc: ${{ matrix.config.conc }}
# collect-results:
# needs: test-sweep
# if: ${{ always() }}
# uses: ./.github/workflows/collect-results.yml
# secrets: inherit
# calc-success-rate:
# needs: collect-results
# if: ${{ always() }}
# runs-on: ubuntu-latest
# env:
# RESULTS_DIR: "results/"
# STATS_FILENAME: "run_stats"
# GITHUB_TOKEN: ${{ secrets.REPO_PAT }}
# steps:
# - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
# with:
# token: ${{ secrets.REPO_PAT }}
# fetch-depth: 0
# - name: Download results artifacts
# uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
# with:
# path: ${{ env.RESULTS_DIR }}
# pattern: results_*
# - name: Install python dependencies
# run: pip install PyGithub
# - name: Calculate success rate
# run: python3 utils/calc_success_rate.py $STATS_FILENAME
# - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
# with:
# name: "run-stats"
# path: ${{ env.STATS_FILENAME }}.json