Skip to content

Commit 7f8ccaf

Browse files
Add back sentencepiece dependency for tests + cleanup workflow
Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com>
1 parent cdd7d79 commit 7f8ccaf

5 files changed

Lines changed: 25 additions & 37 deletions

File tree

.github/workflows/example_tests.yml

Lines changed: 10 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -56,18 +56,21 @@ jobs:
5656
match_pattern: "^DCO$|^linux$" # Wait for DCO and Unit tests / linux to pass
5757
delay: 300s
5858

59-
##### PyTorch Example Tests #####
59+
##### PyTorch Example Tests (speculative_decoding requires 26.01 image) #####
6060
torch-pr:
6161
needs: [check-file-changes, wait-checks]
6262
if: startsWith(github.ref, 'refs/heads/pull-request/') && needs.check-file-changes.outputs.any_changed == 'true'
6363
strategy:
6464
fail-fast: false
6565
matrix:
6666
example: [llm_distill, llm_qat, llm_sparsity]
67+
include:
68+
- example: speculative_decoding
69+
docker_image: "nvcr.io/nvidia/pytorch:26.01-py3"
6770
uses: ./.github/workflows/_example_tests_runner.yml
6871
secrets: inherit
6972
with:
70-
docker_image: "nvcr.io/nvidia/pytorch:25.06-py3"
73+
docker_image: ${{ matrix.docker_image || 'nvcr.io/nvidia/pytorch:25.06-py3' }}
7174
example: ${{ matrix.example }}
7275
pip_install_extras: "[hf,dev-test]"
7376
runner: linux-amd64-gpu-l4-latest-1
@@ -78,36 +81,17 @@ jobs:
7881
fail-fast: false
7982
matrix:
8083
example: [llm_distill, llm_qat, llm_sparsity]
84+
include:
85+
- example: speculative_decoding
86+
docker_image: "nvcr.io/nvidia/pytorch:26.01-py3"
8187
uses: ./.github/workflows/_example_tests_runner.yml
8288
secrets: inherit
8389
with:
84-
docker_image: "nvcr.io/nvidia/pytorch:25.06-py3"
90+
docker_image: ${{ matrix.docker_image || 'nvcr.io/nvidia/pytorch:25.06-py3' }}
8591
example: ${{ matrix.example }}
8692
pip_install_extras: "[hf,dev-test]"
8793
runner: linux-amd64-gpu-h100-latest-2
8894

89-
##### Speculative Decoding Example Tests (requires 26.01 image) #####
90-
speculative-decoding-pr:
91-
needs: [check-file-changes, wait-checks]
92-
if: startsWith(github.ref, 'refs/heads/pull-request/') && needs.check-file-changes.outputs.any_changed == 'true'
93-
uses: ./.github/workflows/_example_tests_runner.yml
94-
secrets: inherit
95-
with:
96-
docker_image: "nvcr.io/nvidia/pytorch:26.01-py3"
97-
example: speculative_decoding
98-
pip_install_extras: "[hf,dev-test]"
99-
runner: linux-amd64-gpu-l4-latest-1
100-
101-
speculative-decoding-non-pr:
102-
if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
103-
uses: ./.github/workflows/_example_tests_runner.yml
104-
secrets: inherit
105-
with:
106-
docker_image: "nvcr.io/nvidia/pytorch:26.01-py3"
107-
example: speculative_decoding
108-
pip_install_extras: "[hf,dev-test]"
109-
runner: linux-amd64-gpu-h100-latest-2
110-
11195
##### TensorRT-LLM Example Tests #####
11296
trtllm-pr:
11397
needs: [check-file-changes, wait-checks]
@@ -172,15 +156,14 @@ jobs:
172156
example-pr-required-check:
173157
# Run even if example tests are skipped
174158
if: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && always() }}
175-
needs: [check-file-changes, torch-pr, speculative-decoding-pr, trtllm-pr, onnx-pr]
159+
needs: [check-file-changes, torch-pr, trtllm-pr, onnx-pr]
176160
runs-on: ubuntu-latest
177161
steps:
178162
- name: Required GPU tests did not succeed
179163
if: |
180164
needs.check-file-changes.result != 'success' ||
181165
(needs.check-file-changes.outputs.any_changed == 'true' && (
182166
needs.torch-pr.result != 'success' ||
183-
needs.speculative-decoding-pr.result != 'success' ||
184167
needs.trtllm-pr.result != 'success' ||
185168
needs.onnx-pr.result != 'success'
186169
))

.github/workflows/gpu_tests.yml

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,13 @@ jobs:
6262
strategy:
6363
fail-fast: false
6464
matrix:
65-
example: [py312-cuda12-gpu, py312-cuda12-gpu-megatron]
65+
include:
66+
- example: py312-cuda12-gpu
67+
timeout: 90
68+
- example: py312-cuda12-gpu-megatron
69+
timeout: 120
6670
runs-on: linux-amd64-gpu-l4-latest-1
67-
timeout-minutes: 90
71+
timeout-minutes: ${{ matrix.timeout }}
6872
container: &gpu_container
6973
image: nvcr.io/nvidia/pytorch:25.06-py3
7074
env:
@@ -84,9 +88,13 @@ jobs:
8488
strategy:
8589
fail-fast: false
8690
matrix:
87-
example: [py312-cuda12-gpu, py312-cuda12-gpu-megatron]
91+
include:
92+
- example: py312-cuda12-gpu
93+
timeout: 90
94+
- example: py312-cuda12-gpu-megatron
95+
timeout: 120
8896
runs-on: linux-amd64-gpu-h100-latest-2
89-
timeout-minutes: 90
97+
timeout-minutes: ${{ matrix.timeout }}
9098
container: *gpu_container
9199
steps: *gpu_steps
92100
gpu-pr-required-check:

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,8 @@ disable_error_code = ["attr-defined"]
132132
[tool.pytest.ini_options]
133133
# Default additional options
134134
# Show a short test summary info for all except passed tests with -ra flag
135-
# print execution time for 20 slowest tests and generate coverage reports
136-
addopts = "-v -ra --instafail --cov-report=term-missing --cov-report=html --cov-report=xml:coverage.xml --cov-config=pyproject.toml --durations=20 --strict-markers"
135+
# print execution time for 50 slowest tests and generate coverage reports
136+
addopts = "-v -ra --instafail --cov-report=term-missing --cov-report=html --cov-report=xml:coverage.xml --cov-config=pyproject.toml --durations=50 --strict-markers"
137137
pythonpath = ["tests/"]
138138
markers = [
139139
"manual: Only run when --run-manual is given",

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
"pytest-cov",
7878
"pytest-instafail",
7979
"pytest-timeout",
80+
"sentencepiece", # For test_unified_export_megatron.py, test_vllm_fakequant_megatron_export.py
8081
"timm",
8182
"torchprofile>=0.0.4", # For computing flops of CV models
8283
"torchvision",

tox.ini

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,6 @@ commands_pre =
6262
# Install deps here so that it gets installed even in --current-env
6363
pip install git+https://github.com/Dao-AILab/fast-hadamard-transform.git
6464

65-
# NOTE: User is expected to have correct torch-cuda version pre-installed if using --current-env
66-
# to avoid possible CUDA version mismatch
6765
pip install -e .[all,dev-test]
6866
commands =
6967
# Coverage fails with "Can't combine line data with arc data" error so not using "--cov"
@@ -79,8 +77,6 @@ commands_pre =
7977
pip-mark-installed triton
8078
pip install --no-build-isolation git+https://github.com/state-spaces/mamba.git
8179

82-
# NOTE: User is expected to have correct torch-cuda version pre-installed if using --current-env
83-
# to avoid possible CUDA version mismatch
8480
pip install -e .[all,dev-test]
8581
commands =
8682
# Coverage fails with "Can't combine line data with arc data" error so not using "--cov"

0 commit comments

Comments
 (0)