|
| 1 | +# vLLM-Omni Model Test Configuration |
| 2 | +# Tests for omni-modality models (TTS, image generation, video, omni-chat) |
| 3 | +# |
| 4 | +# Each model defines its test_request (sent to /invocations via middleware) |
| 5 | +# and the route for the SageMaker routing middleware. |
| 6 | +# |
| 7 | +# Models use s3_model (pre-cached in S3) downloaded by the download-model action. |
| 8 | + |
| 9 | +s3_prefix: "s3://dlc-cicd-models/omni-models" |
| 10 | + |
| 11 | +smoke-test: |
| 12 | + codebuild-fleet: |
| 13 | + # --- TTS models (route: /v1/audio/speech) --- |
| 14 | + - name: "qwen3-tts-1.7b-customvoice" |
| 15 | + s3_model: "qwen3-tts-1.7b-customvoice.tar.gz" |
| 16 | + fleet: "x86-g6xl-runner" |
| 17 | + extra_args: "" |
| 18 | + route: "/v1/audio/speech" |
| 19 | + test_request: '{"input": "Hello, how are you?", "voice": "vivian", "language": "English"}' |
| 20 | + validate: "binary_size_gt:1000" |
| 21 | + |
| 22 | + # --- Image generation models (route: /v1/images/generations) --- |
| 23 | + - name: "flux2-klein-4b" |
| 24 | + s3_model: "flux2-klein-4b.tar.gz" |
| 25 | + fleet: "x86-g6xl-runner" |
| 26 | + extra_args: "" |
| 27 | + route: "/v1/images/generations" |
| 28 | + test_request: '{"prompt": "a red apple on a white table", "size": "512x512", "n": 1}' |
| 29 | + validate: "json_field:data[0].b64_json" |
| 30 | + |
| 31 | + # --- Video generation models (route: /v1/videos) --- |
| 32 | + - name: "wan2.1-t2v-1.3b" |
| 33 | + s3_model: "wan2.1-t2v-1.3b.tar.gz" |
| 34 | + fleet: "x86-g6exl-runner" |
| 35 | + extra_args: "" |
| 36 | + route: "/v1/videos" |
| 37 | + content_type: "multipart/form-data" |
| 38 | + test_request: 'prompt=a dog running on a beach&num_frames=17&num_inference_steps=4&size=480x320&seed=42' |
| 39 | + validate: "json_field:id" |
| 40 | + |
| 41 | + # --- Omni chat models (route: /v1/chat/completions, fallthrough) --- |
| 42 | + # model is big, won't run for now |
| 43 | + # - name: "bagel-7b-mot" |
| 44 | + # s3_model: "bagel-7b-mot.tar.gz" |
| 45 | + # fleet: "x86-g6e4xl-runner" |
| 46 | + # extra_args: "" |
| 47 | + # route: "/v1/chat/completions" |
| 48 | + # test_request: '{"messages": [{"role": "user", "content": [{"type": "text", "text": "<|im_start|>A cute cat<|im_end|>"}]}], "modalities": ["image"], "height": 512, "width": 512, "num_inference_steps": 4, "seed": 42}' |
| 49 | + # validate: "json_field:choices[0].message.content" |
| 50 | + |
| 51 | + - name: "qwen2.5-omni-3b" |
| 52 | + s3_model: "qwen2.5-omni-3b.tar.gz" |
| 53 | + fleet: "x86-g6e12xl-runner" |
| 54 | + extra_args: "" |
| 55 | + route: "/v1/chat/completions" |
| 56 | + test_request: '{"messages": [{"role": "user", "content": "Say hello in one sentence."}], "max_tokens": 64}' |
| 57 | + validate: "json_field:choices[0].message.content" |
0 commit comments