e2e Test - qwen3.5-fp4-mi355x-sglang-disagg #2563
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: End-to-End Tests | ||
| run-name: e2e Test - ${{ inputs.test-name || inputs.generate-cli-command || github.event.inputs.generate-cli-command }} | ||
| on: | ||
| workflow_dispatch: | ||
| inputs: | ||
| generate-cli-command: | ||
| description: "Command passed to generate matrix script" | ||
| required: true | ||
| type: string | ||
| test-name: | ||
| description: "Name for this test run" | ||
| required: false | ||
| type: string | ||
| ref: | ||
| description: "Ref (branch/sha) to checkout for generating configs" | ||
| required: false | ||
| type: string | ||
| duration-override: | ||
| description: "Override matrix.config.duration (seconds). Empty = use matrix value." | ||
| required: false | ||
| type: string | ||
| default: "" | ||
| workflow_call: | ||
| inputs: | ||
| generate-cli-command: | ||
| description: "Command passed to generate matrix script" | ||
| required: true | ||
| type: string | ||
| test-name: | ||
| description: "Name for this test run" | ||
| required: false | ||
| type: string | ||
| ref: | ||
| description: "Ref (branch/sha) to checkout for generating configs" | ||
| required: false | ||
| type: string | ||
| duration-override: | ||
| description: "Override matrix.config.duration (seconds). Empty = use matrix value." | ||
| required: false | ||
| type: string | ||
| default: "" | ||
| jobs: | ||
| get-jobs: | ||
| runs-on: ubuntu-latest | ||
| outputs: | ||
| single-node-config: ${{ steps.get-jobs.outputs.single-node-config }} | ||
| multi-node-config: ${{ steps.get-jobs.outputs.multi-node-config }} | ||
| eval-config: ${{ steps.get-jobs.outputs.eval-config }} | ||
| multi-node-eval-config: ${{ steps.get-jobs.outputs.multi-node-eval-config }} | ||
| agentic-config: ${{ steps.get-jobs.outputs.agentic-config }} | ||
| multi-node-agentic-config: ${{ steps.get-jobs.outputs.multi-node-agentic-config }} | ||
| steps: | ||
| - name: Checkout code (ref) | ||
| if: ${{ inputs.ref && inputs.ref != '' }} | ||
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | ||
| with: | ||
| ref: ${{ inputs.ref }} | ||
| - name: Checkout code (default) | ||
| if: ${{ !inputs.ref || inputs.ref == '' }} | ||
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | ||
| with: | ||
| ref: ${{ github.sha }} | ||
| - id: get-jobs | ||
| run: | | ||
| pip install pydantic | ||
| CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix_logic/generate_sweep_configs.py \ | ||
| ${{ inputs.generate-cli-command || github.event.inputs.generate-cli-command }}) | ||
| AGENTIC=$(echo "$CONFIG_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps([x for x in d if x.get('scenario-type') == 'agentic-coding' and 'prefill' not in x]))") | ||
| MULTI_AGENTIC=$(echo "$CONFIG_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps([x for x in d if x.get('scenario-type') == 'agentic-coding' and 'prefill' in x]))") | ||
| SINGLE=$(echo "$CONFIG_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps([x for x in d if 'prefill' not in x and x.get('scenario-type') != 'agentic-coding' and not x.get('eval-only', False)]))") | ||
| MULTI=$(echo "$CONFIG_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps([x for x in d if 'prefill' in x and x.get('scenario-type') != 'agentic-coding' and not x.get('eval-only', False)]))") | ||
| EVALS=$(echo "$CONFIG_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps([x for x in d if 'prefill' not in x and x.get('scenario-type') != 'agentic-coding' and x.get('run-eval', False)]))") | ||
| MULTI_EVAL=$(echo "$CONFIG_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(json.dumps([x for x in d if 'prefill' in x and x.get('run-eval', False)]))") | ||
| echo "agentic-config=$AGENTIC" >> $GITHUB_OUTPUT | ||
| echo "multi-node-agentic-config=$MULTI_AGENTIC" >> $GITHUB_OUTPUT | ||
| echo "single-node-config=$SINGLE" >> $GITHUB_OUTPUT | ||
| echo "multi-node-config=$MULTI" >> $GITHUB_OUTPUT | ||
| echo "eval-config=$EVALS" >> $GITHUB_OUTPUT | ||
| echo "multi-node-eval-config=$MULTI_EVAL" >> $GITHUB_OUTPUT | ||
| test-sweep-multi-node: | ||
| needs: get-jobs | ||
| if: ${{ needs.get-jobs.outputs.multi-node-config != '[]' }} | ||
| uses: ./.github/workflows/benchmark-multinode-tmpl.yml | ||
| name: multi-node / | ||
| strategy: | ||
| fail-fast: false | ||
| matrix: | ||
| config: ${{ fromJson(needs.get-jobs.outputs.multi-node-config) }} | ||
| secrets: inherit | ||
| with: | ||
| isl: ${{ matrix.config.isl }} | ||
| osl: ${{ matrix.config.osl }} | ||
| max-model-len: ${{ matrix.config.max-model-len }} | ||
| runner: ${{ matrix.config.runner }} | ||
| image: ${{ matrix.config.image }} | ||
| model: ${{ matrix.config.model }} | ||
| model-prefix: ${{ matrix.config.model-prefix }} | ||
| framework: ${{ matrix.config.framework }} | ||
| precision: ${{ matrix.config.precision }} | ||
| exp-name: ${{ matrix.config.exp-name }} | ||
| conc-list: ${{ toJson(matrix.config.conc) }} | ||
| spec-decoding: ${{ matrix.config.spec-decoding }} | ||
| disagg: ${{ matrix.config.disagg }} | ||
| prefill-num-worker: ${{ matrix.config.prefill.num-worker }} | ||
| prefill-tp: ${{ matrix.config.prefill.tp }} | ||
| prefill-ep: ${{ matrix.config.prefill.ep }} | ||
| prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }} | ||
| prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }} | ||
| decode-num-worker: ${{ matrix.config.decode.num-worker }} | ||
| decode-tp: ${{ matrix.config.decode.tp }} | ||
| decode-ep: ${{ matrix.config.decode.ep }} | ||
| decode-dp-attn: ${{ matrix.config.decode.dp-attn }} | ||
| decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }} | ||
| run-eval: false | ||
| ref: ${{ inputs.ref }} | ||
| test-sweep-multi-node-evals: | ||
| needs: get-jobs | ||
| if: ${{ needs.get-jobs.outputs.multi-node-eval-config != '[]' }} | ||
| uses: ./.github/workflows/benchmark-multinode-tmpl.yml | ||
| name: multi-node eval / | ||
| strategy: | ||
| fail-fast: false | ||
| matrix: | ||
| config: ${{ fromJson(needs.get-jobs.outputs.multi-node-eval-config) }} | ||
| secrets: inherit | ||
| with: | ||
| isl: ${{ matrix.config.isl }} | ||
| osl: ${{ matrix.config.osl }} | ||
| max-model-len: ${{ matrix.config.max-model-len }} | ||
| runner: ${{ matrix.config.runner }} | ||
| image: ${{ matrix.config.image }} | ||
| model: ${{ matrix.config.model }} | ||
| model-prefix: ${{ matrix.config.model-prefix }} | ||
| framework: ${{ matrix.config.framework }} | ||
| precision: ${{ matrix.config.precision }} | ||
| exp-name: ${{ matrix.config.exp-name }} | ||
| conc-list: ${{ toJson(matrix.config.conc) }} | ||
| spec-decoding: ${{ matrix.config.spec-decoding }} | ||
| disagg: ${{ matrix.config.disagg }} | ||
| prefill-num-worker: ${{ matrix.config.prefill.num-worker }} | ||
| prefill-tp: ${{ matrix.config.prefill.tp }} | ||
| prefill-ep: ${{ matrix.config.prefill.ep }} | ||
| prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }} | ||
| prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }} | ||
| decode-num-worker: ${{ matrix.config.decode.num-worker }} | ||
| decode-tp: ${{ matrix.config.decode.tp }} | ||
| decode-ep: ${{ matrix.config.decode.ep }} | ||
| decode-dp-attn: ${{ matrix.config.decode.dp-attn }} | ||
| decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }} | ||
| run-eval: true | ||
| eval-only: true | ||
| eval-conc: ${{ matrix.config.eval-conc }} | ||
| ref: ${{ inputs.ref }} | ||
| test-sweep-agentic: | ||
| needs: get-jobs | ||
| if: ${{ needs.get-jobs.outputs.agentic-config != '[]' }} | ||
| uses: ./.github/workflows/benchmark-tmpl.yml | ||
| name: agentic / | ||
| strategy: | ||
| fail-fast: false | ||
| matrix: | ||
| config: ${{ fromJson(needs.get-jobs.outputs.agentic-config) }} | ||
| secrets: inherit | ||
| with: | ||
| exp-name: ${{ matrix.config.exp-name }} | ||
| runner: ${{ matrix.config.runner }} | ||
| image: ${{ matrix.config.image }} | ||
| model: ${{ matrix.config.model }} | ||
| model-prefix: ${{ matrix.config.model-prefix }} | ||
| framework: ${{ matrix.config.framework }} | ||
| precision: ${{ matrix.config.precision }} | ||
| tp: ${{ matrix.config.tp }} | ||
| ep: ${{ matrix.config.ep }} | ||
| dp-attn: ${{ matrix.config.dp-attn }} | ||
| conc: ${{ matrix.config.conc }} | ||
| offloading: ${{ matrix.config.offloading }} | ||
| duration: ${{ inputs.duration-override != '' && inputs.duration-override || matrix.config.duration }} | ||
| isl: '0' | ||
| osl: '0' | ||
| max-model-len: '0' | ||
| spec-decoding: 'none' | ||
| disagg: 'false' | ||
| run-eval: false | ||
| scenario-type: agentic-coding | ||
| ref: ${{ inputs.ref }} | ||
| test-sweep-multi-node-agentic: | ||
| needs: get-jobs | ||
| if: ${{ needs.get-jobs.outputs.multi-node-agentic-config != '[]' }} | ||
| uses: ./.github/workflows/benchmark-multinode-tmpl.yml | ||
| name: multi-node agentic / | ||
| strategy: | ||
| fail-fast: false | ||
| matrix: | ||
| config: ${{ fromJson(needs.get-jobs.outputs.multi-node-agentic-config) }} | ||
| secrets: inherit | ||
| with: | ||
| exp-name: ${{ matrix.config.exp-name }} | ||
| isl: '0' | ||
| osl: '0' | ||
| max-model-len: '0' | ||
| runner: ${{ matrix.config.runner }} | ||
| image: ${{ matrix.config.image }} | ||
| model: ${{ matrix.config.model }} | ||
| model-prefix: ${{ matrix.config.model-prefix }} | ||
| framework: ${{ matrix.config.framework }} | ||
| precision: ${{ matrix.config.precision }} | ||
| conc-list: '[${{ matrix.config.conc }}]' | ||
| spec-decoding: ${{ matrix.config.spec-decoding }} | ||
| disagg: ${{ matrix.config.disagg }} | ||
| prefill-num-worker: ${{ matrix.config.prefill.num-worker }} | ||
| prefill-tp: ${{ matrix.config.prefill.tp }} | ||
| prefill-ep: ${{ matrix.config.prefill.ep }} | ||
| prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }} | ||
| prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }} | ||
| decode-num-worker: ${{ matrix.config.decode.num-worker }} | ||
| decode-tp: ${{ matrix.config.decode.tp }} | ||
| decode-ep: ${{ matrix.config.decode.ep }} | ||
| decode-dp-attn: ${{ matrix.config.decode.dp-attn }} | ||
| decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }} | ||
| conc: ${{ matrix.config.conc }} | ||
| duration: ${{ inputs.duration-override != '' && inputs.duration-override || matrix.config.duration }} | ||
| run-eval: false | ||
| scenario-type: agentic-coding | ||
| ref: ${{ inputs.ref }} | ||
| test-sweep-single-node: | ||
| needs: get-jobs | ||
| if: ${{ needs.get-jobs.outputs.single-node-config != '[]' }} | ||
| uses: ./.github/workflows/benchmark-tmpl.yml | ||
| name: single-node / | ||
| strategy: | ||
| fail-fast: false | ||
| matrix: | ||
| config: ${{ fromJson(needs.get-jobs.outputs.single-node-config) }} | ||
| secrets: inherit | ||
| with: | ||
| exp-name: ${{ matrix.config.exp-name }} | ||
| isl: ${{ matrix.config.isl }} | ||
| osl: ${{ matrix.config.osl }} | ||
| max-model-len: ${{ matrix.config.max-model-len }} | ||
| runner: ${{ matrix.config.runner }} | ||
| image: ${{ matrix.config.image }} | ||
| model: ${{ matrix.config.model }} | ||
| model-prefix: ${{ matrix.config.model-prefix }} | ||
| framework: ${{ matrix.config.framework }} | ||
| precision: ${{ matrix.config.precision }} | ||
| tp: ${{ matrix.config.tp }} | ||
| ep: ${{ matrix.config.ep }} | ||
| dp-attn: ${{ matrix.config.dp-attn }} | ||
| conc: ${{ matrix.config.conc }} | ||
| spec-decoding: ${{ matrix.config.spec-decoding }} | ||
| disagg: ${{ matrix.config.disagg }} | ||
| run-eval: false | ||
| ref: ${{ inputs.ref }} | ||
| test-sweep-evals: | ||
| needs: get-jobs | ||
| if: ${{ needs.get-jobs.outputs.eval-config != '[]' }} | ||
| uses: ./.github/workflows/benchmark-tmpl.yml | ||
| name: eval / | ||
| strategy: | ||
| fail-fast: false | ||
| matrix: | ||
| config: ${{ fromJson(needs.get-jobs.outputs.eval-config) }} | ||
| secrets: inherit | ||
| with: | ||
| exp-name: ${{ matrix.config.exp-name }} | ||
| isl: ${{ matrix.config.isl }} | ||
| osl: ${{ matrix.config.osl }} | ||
| max-model-len: ${{ matrix.config.max-model-len }} | ||
| runner: ${{ matrix.config.runner }} | ||
| image: ${{ matrix.config.image }} | ||
| model: ${{ matrix.config.model }} | ||
| model-prefix: ${{ matrix.config.model-prefix }} | ||
| framework: ${{ matrix.config.framework }} | ||
| precision: ${{ matrix.config.precision }} | ||
| tp: ${{ matrix.config.tp }} | ||
| ep: ${{ matrix.config.ep }} | ||
| dp-attn: ${{ matrix.config.dp-attn }} | ||
| conc: ${{ matrix.config.conc }} | ||
| spec-decoding: ${{ matrix.config.spec-decoding }} | ||
| disagg: ${{ matrix.config.disagg }} | ||
| run-eval: true | ||
| eval-only: true | ||
| ref: ${{ inputs.ref }} | ||
| collect-results: | ||
| needs: [test-sweep-multi-node, test-sweep-single-node, test-sweep-agentic, test-sweep-multi-node-agentic] | ||
| if: ${{ always() && (needs.test-sweep-multi-node.result != 'skipped' || needs.test-sweep-single-node.result != 'skipped' || needs.test-sweep-agentic.result != 'skipped' || needs.test-sweep-multi-node-agentic.result != 'skipped') }} | ||
| uses: ./.github/workflows/collect-results.yml | ||
| secrets: inherit | ||
| with: | ||
| result-prefix: "bmk" | ||
| collect-evals: | ||
| needs: [test-sweep-evals, test-sweep-multi-node-evals] | ||
| if: ${{ always() && (needs.test-sweep-evals.result != 'skipped' || needs.test-sweep-multi-node-evals.result != 'skipped') }} | ||
| uses: ./.github/workflows/collect-evals.yml | ||
|
Check failure on line 310 in .github/workflows/e2e-tests.yml
|
||
| secrets: inherit | ||
| collect-agentic-results: | ||
| needs: [test-sweep-agentic, test-sweep-multi-node-agentic] | ||
| if: ${{ always() && (needs.test-sweep-agentic.result != 'skipped' || needs.test-sweep-multi-node-agentic.result != 'skipped') }} | ||
| runs-on: ubuntu-latest | ||
| steps: | ||
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | ||
| with: | ||
| submodules: true | ||
| - uses: actions/setup-python@v5 | ||
| with: | ||
| python-version: '3.11' | ||
| - name: Install dependencies | ||
| run: pip install pandas matplotlib numpy | ||
| - name: Download agentic artifacts | ||
| uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 | ||
| with: | ||
| pattern: 'agentic_*' | ||
| path: results/ | ||
| - name: Run aggregation | ||
| env: | ||
| PYTHONPATH: utils/agentic-benchmark/scripts:utils/agentic-benchmark/analysis | ||
| run: | | ||
| python utils/agentic-benchmark/scripts/collect_sweep_results.py results/ aggregated/ | ||
| - name: Upload aggregated results | ||
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | ||
| with: | ||
| name: agentic_aggregated | ||
| path: aggregated/ | ||
| calc-success-rate: | ||
| needs: [collect-results, collect-evals, collect-agentic-results] | ||
| if: ${{ always() }} | ||
| runs-on: ubuntu-latest | ||
| env: | ||
| RESULTS_DIR: "results/" | ||
| STATS_FILENAME: "run_stats" | ||
| GITHUB_TOKEN: ${{ secrets.REPO_PAT }} | ||
| steps: | ||
| - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | ||
| with: | ||
| token: ${{ secrets.REPO_PAT }} | ||
| fetch-depth: 0 | ||
| - name: Download results artifacts | ||
| uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 | ||
| with: | ||
| path: ${{ env.RESULTS_DIR }} | ||
| pattern: results_* | ||
| - name: Install python dependencies | ||
| run: pip install PyGithub | ||
| - name: Calculate success rate | ||
| run: python3 utils/calc_success_rate.py $STATS_FILENAME | ||
| - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | ||
| with: | ||
| name: "run-stats" | ||
| path: ${{ env.STATS_FILENAME }}.json | ||