diff --git a/.github/config/model-tests/vllm-model-tests.yml b/.github/config/model-tests/vllm-model-tests.yml index a2f803d550c2..332e5ffbf973 100644 --- a/.github/config/model-tests/vllm-model-tests.yml +++ b/.github/config/model-tests/vllm-model-tests.yml @@ -104,6 +104,17 @@ benchmark: min_throughput: 680 min_rps: 5.3 + - name: "gemma-4-e2b-it" + s3_model: "gemma-4-e2b-it.tar.gz" + fleet: "x86-g6xl-runner" + extra_args: "--tensor-parallel-size 1 --max-model-len 4096 --dtype bfloat16" + input_len: 512 + output_len: 128 + num_prompts: 64 + batch_size: 4 + min_throughput: 470 + min_rps: 3.7 + # Pending p5e.48xlarge fleet creation. Fleet name "x86-p5e-runner" is a placeholder. # - name: "minimax-m2.7" # s3_model: "minimax-m2.7.tar.gz"