Skip to content

Commit e64bd20

Browse files
committed
Merge branch 'main' into mf
2 parents fec45ac + c86b17c commit e64bd20

215 files changed

Lines changed: 6094 additions & 3278 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.buildkite/scripts/hardware_ci/run-amd-test.sh

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -326,8 +326,7 @@ apply_rocm_test_overrides() {
326326
if [[ $cmds == *" kernels/moe"* ]]; then
327327
cmds="${cmds} \
328328
--ignore=kernels/moe/test_moe.py \
329-
--ignore=kernels/moe/test_cutlass_moe.py \
330-
--ignore=kernels/moe/test_triton_moe_ptpc_fp8.py"
329+
--ignore=kernels/moe/test_cutlass_moe.py"
331330
fi
332331

333332
# --- Entrypoint ignores ---
@@ -337,12 +336,16 @@ apply_rocm_test_overrides() {
337336
--ignore=entrypoints/openai/completion/test_shutdown.py \
338337
--ignore=entrypoints/openai/test_completion.py \
339338
--ignore=entrypoints/openai/models/test_models.py \
340-
--ignore=entrypoints/openai/test_lora_adapters.py \
341339
--ignore=entrypoints/openai/test_return_tokens_as_ids.py \
342340
--ignore=entrypoints/openai/chat_completion/test_root_path.py \
343341
--ignore=entrypoints/openai/completion/test_prompt_validation.py "}
344342
fi
345343

344+
if [[ $cmds == *" entrypoints/serve"* ]]; then
345+
cmds="${cmds} \
346+
--ignore=entrypoints/serve/lora/test_lora_adapters.py"
347+
fi
348+
346349
if [[ $cmds == *" entrypoints/llm "* ]]; then
347350
cmds=${cmds//" entrypoints/llm "/" entrypoints/llm \
348351
--ignore=entrypoints/llm/test_chat.py \

.buildkite/scripts/hardware_ci/run-tpu-v1-test-part2.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ run_and_track_test() {
127127
128128
# --- Actual Test Execution ---
129129
run_and_track_test 1 "test_struct_output_generate.py" \
130-
"python3 -m pytest -s -v /workspace/vllm/tests/v1/entrypoints/llm/test_struct_output_generate.py -k \"not test_structured_output_with_reasoning_matrices\""
130+
"python3 -m pytest -s -v /workspace/vllm/tests/entrypoints/llm/test_struct_output_generate.py -k \"not test_structured_output_with_reasoning_matrices\""
131131
run_and_track_test 2 "test_moe_pallas.py" \
132132
"python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_moe_pallas.py"
133133
run_and_track_test 3 "test_lora.py" \

.buildkite/test-amd.yaml

Lines changed: 36 additions & 63 deletions
Large diffs are not rendered by default.

.buildkite/test_areas/distributed.yaml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,14 @@ steps:
2727
- vllm/v1/engine/
2828
- vllm/v1/worker/
2929
- tests/v1/distributed
30-
- tests/v1/entrypoints/openai/test_multi_api_servers.py
30+
- tests/entrypoints/openai/test_multi_api_servers.py
3131
commands:
3232
# https://github.com/NVIDIA/nccl/issues/1838
3333
- export NCCL_CUMEM_HOST_ENABLE=0
3434
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py
3535
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_eagle_dp.py
3636
- TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py
37-
- DP_SIZE=2 pytest -v -s v1/entrypoints/openai/test_multi_api_servers.py
37+
- DP_SIZE=2 pytest -v -s entrypoints/openai/test_multi_api_servers.py
3838

3939
- label: Distributed Compile + RPC Tests (2 GPUs)
4040
timeout_in_minutes: 20
@@ -88,7 +88,6 @@ steps:
8888
- vllm/distributed/
8989
- tests/distributed/test_torchrun_example.py
9090
- tests/distributed/test_torchrun_example_moe.py
91-
- examples/offline_inference/rlhf.py
9291
- examples/offline_inference/rlhf_colocate.py
9392
- examples/rl/
9493
- tests/examples/offline_inference/data_parallel.py

.buildkite/test_areas/engine.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,15 @@ steps:
7070
device: mi325_4
7171
depends_on:
7272
- image-build-amd
73+
74+
- label: V1 e2e (4xH100)
75+
timeout_in_minutes: 60
76+
device: h100
77+
num_devices: 4
78+
optional: true
79+
source_file_dependencies:
80+
- vllm/v1/attention/backends/utils.py
81+
- vllm/v1/worker/gpu_model_runner.py
82+
- tests/v1/e2e/test_hybrid_chunked_prefill.py
83+
commands:
84+
- pytest -v -s v1/e2e/test_hybrid_chunked_prefill.py

.buildkite/test_areas/entrypoints.yaml

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ steps:
1010
- tests/entrypoints/
1111
commands:
1212
- pytest -v -s entrypoints/openai/tool_parsers
13-
- pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/rpc --ignore=entrypoints/sleep --ignore=entrypoints/instrumentator --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling
13+
- pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/rpc --ignore=entrypoints/sleep --ignore=entrypoints/serve/instrumentator --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling
1414

1515
- label: Entrypoints Integration (LLM)
1616
timeout_in_minutes: 40
@@ -34,7 +34,7 @@ steps:
3434
- tests/entrypoints/test_chat_utils
3535
commands:
3636
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
37-
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/chat_completion/test_oot_registration.py --ignore=entrypoints/openai/completion/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses
37+
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/chat_completion/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/chat_completion/test_oot_registration.py --ignore=entrypoints/openai/completion/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/tool_parsers/ --ignore=entrypoints/openai/responses --ignore=entrypoints/openai/test_multi_api_servers.py
3838
- pytest -v -s entrypoints/test_chat_utils.py
3939
mirror:
4040
amd:
@@ -48,11 +48,11 @@ steps:
4848
source_file_dependencies:
4949
- vllm/
5050
- tests/entrypoints/rpc
51-
- tests/entrypoints/instrumentator
51+
- tests/entrypoints/serve/instrumentator
5252
- tests/tool_use
5353
commands:
5454
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
55-
- pytest -v -s entrypoints/instrumentator
55+
- pytest -v -s entrypoints/serve/instrumentator
5656
- PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/rpc
5757
- pytest -v -s tool_use
5858

@@ -75,19 +75,6 @@ steps:
7575
commands:
7676
- pytest -v -s entrypoints/openai/responses
7777

78-
- label: Entrypoints V1
79-
timeout_in_minutes: 50
80-
source_file_dependencies:
81-
- vllm/
82-
- tests/v1
83-
commands:
84-
- pytest -v -s v1/entrypoints
85-
mirror:
86-
amd:
87-
device: mi325_1
88-
depends_on:
89-
- image-build-amd
90-
9178
- label: OpenAI API Correctness
9279
timeout_in_minutes: 30
9380
source_file_dependencies:

.buildkite/test_areas/lm_eval.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,22 @@ steps:
4545
commands:
4646
- pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-blackwell.txt
4747

48+
- label: LM Eval Qwen3.5 Models (B200)
49+
timeout_in_minutes: 120
50+
device: b200
51+
optional: true
52+
num_devices: 2
53+
source_file_dependencies:
54+
- vllm/model_executor/models/qwen3_5.py
55+
- vllm/model_executor/models/qwen3_5_mtp.py
56+
- vllm/transformers_utils/configs/qwen3_5.py
57+
- vllm/transformers_utils/configs/qwen3_5_moe.py
58+
- vllm/model_executor/models/qwen3_next.py
59+
- vllm/model_executor/models/qwen3_next_mtp.py
60+
- vllm/model_executor/layers/fla/ops/
61+
commands:
62+
- pytest -s -v evals/gsm8k/test_gsm8k_correctness.py --config-list-file=configs/models-qwen35-blackwell.txt
63+
4864
- label: LM Eval Large Models (H200)
4965
timeout_in_minutes: 60
5066
device: h200

.buildkite/test_areas/model_runner_v2.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ steps:
1111
- vllm/v1/attention/
1212
- tests/v1/engine/test_llm_engine.py
1313
- tests/v1/e2e/
14-
- tests/v1/entrypoints/llm/test_struct_output_generate.py
14+
- tests/entrypoints/llm/test_struct_output_generate.py
1515
commands:
1616
- set -x
1717
- export VLLM_USE_V2_MODEL_RUNNER=1
@@ -22,7 +22,7 @@ steps:
2222
- pytest -v -s v1/e2e/general/test_context_length.py
2323
- pytest -v -s v1/e2e/general/test_min_tokens.py
2424
# Temporary hack filter to exclude ngram spec decoding based tests.
25-
- pytest -v -s v1/entrypoints/llm/test_struct_output_generate.py -k "xgrammar and not speculative_config6 and not speculative_config7 and not speculative_config8 and not speculative_config0"
25+
- pytest -v -s entrypoints/llm/test_struct_output_generate.py -k "xgrammar and not speculative_config6 and not speculative_config7 and not speculative_config8 and not speculative_config0"
2626

2727
- label: Model Runner V2 Examples
2828
timeout_in_minutes: 45

.github/CODEOWNERS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ CMakeLists.txt @tlrmchlsmth @LucasWilkinson
7575
/tests/multimodal @DarkLight1337 @ywang96 @NickLucche
7676
/tests/quantization @mgoin @robertgshaw2-redhat @yewentao256 @pavanimajety
7777
/tests/test_inputs.py @DarkLight1337 @ywang96
78-
/tests/v1/entrypoints/llm/test_struct_output_generate.py @mgoin @russellb @aarnphm
78+
/tests/entrypoints/llm/test_struct_output_generate.py @mgoin @russellb @aarnphm
7979
/tests/v1/structured_output @mgoin @russellb @aarnphm
8080
/tests/v1/core @WoosukKwon @robertgshaw2-redhat @njhill @ywang96 @alexm-redhat @heheda12345 @ApostaC @orozery
8181
/tests/weight_loading @mgoin @youkaichao @yewentao256

.github/mergify.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ pull_request_rules:
260260
- files=examples/offline_inference/structured_outputs.py
261261
- files=examples/online_serving/structured_outputs/structured_outputs.py
262262
- files~=^tests/v1/structured_output/
263-
- files=tests/v1/entrypoints/llm/test_struct_output_generate.py
263+
- files=tests/entrypoints/llm/test_struct_output_generate.py
264264
- files~=^vllm/v1/structured_output/
265265
actions:
266266
label:

0 commit comments

Comments
 (0)