@@ -25,7 +25,7 @@ import (
2525const (
2626 defaultInstallDir = ".docker/model-runner/vllm-metal"
2727 // vllmMetalVersion is the vllm-metal release tag to download from Docker Hub.
28- vllmMetalVersion = "v0.1.0-20260126-121650 "
28+ vllmMetalVersion = "v0.1.0-20260320-122309 "
2929)
3030
3131var (
@@ -207,7 +207,7 @@ func (v *vllmMetal) Run(ctx context.Context, socket, model string, modelRef stri
207207 return fmt .Errorf ("failed to get model: %w" , err )
208208 }
209209
210- args , err := v .buildArgs (bundle , socket , mode , config )
210+ args , err := v .buildArgs (bundle , socket , model , modelRef , mode , config )
211211 if err != nil {
212212 return fmt .Errorf ("failed to build vllm-metal arguments: %w" , err )
213213 }
@@ -225,7 +225,9 @@ func (v *vllmMetal) Run(ctx context.Context, socket, model string, modelRef stri
225225}
226226
227227// buildArgs builds the command line arguments for vllm-metal server.
228- func (v * vllmMetal ) buildArgs (bundle interface { SafetensorsPath () string }, socket string , mode inference.BackendMode , config * inference.BackendConfiguration ) ([]string , error ) {
228+ // vllm-metal is a vLLM platform plugin, so we launch vLLM's OpenAI-compatible
229+ // API server directly; the Metal plugin is auto-discovered via entry points.
230+ func (v * vllmMetal ) buildArgs (bundle interface { SafetensorsPath () string }, socket , model , modelRef string , mode inference.BackendMode , config * inference.BackendConfiguration ) ([]string , error ) {
229231 // Parse host:port from socket (vllm-metal uses TCP)
230232 host , port , err := net .SplitHostPort (socket )
231233 if err != nil {
@@ -240,7 +242,7 @@ func (v *vllmMetal) buildArgs(bundle interface{ SafetensorsPath() string }, sock
240242 modelPath := filepath .Dir (safetensorsPath )
241243
242244 args := []string {
243- "-m" , "vllm_metal.server " ,
245+ "-m" , "vllm.entrypoints.openai.api_server " ,
244246 "--model" , modelPath ,
245247 "--host" , host ,
246248 "--port" , port ,
@@ -258,6 +260,10 @@ func (v *vllmMetal) buildArgs(bundle interface{ SafetensorsPath() string }, sock
258260 return nil , fmt .Errorf ("image generation mode not supported by vllm-metal backend" )
259261 }
260262
263+ // Register model aliases so the model-runner can address the model by its
264+ // digest (model) and its human-readable reference (modelRef).
265+ args = append (args , "--served-model-name" , model , modelRef )
266+
261267 // Add context size if specified
262268 if config != nil && config .ContextSize != nil {
263269 args = append (args , "--max-model-len" , strconv .Itoa (int (* config .ContextSize )))
0 commit comments