Skip to content

Commit 36e01bc

Browse files
authored
Merge pull request #769 from docker/update-vllm
vllm: macOS: update to vLLM 0.17.1 and use native server entry point
2 parents 65f4420 + 6e839f5 commit 36e01bc

3 files changed

Lines changed: 16 additions & 9 deletions

File tree

Makefile

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ docker-run-impl:
166166

167167
# vllm-metal (macOS ARM64 only)
168168
# The tarball is self-contained: includes a standalone Python 3.12 + all packages.
169-
VLLM_METAL_RELEASE ?= v0.1.0-20260126-121650
169+
VLLM_METAL_RELEASE ?= v0.1.0-20260320-122309
170170
VLLM_METAL_INSTALL_DIR := $(HOME)/.docker/model-runner/vllm-metal
171171
VLLM_METAL_TARBALL := vllm-metal-macos-arm64-$(VLLM_METAL_RELEASE).tar.gz
172172

@@ -219,14 +219,15 @@ vllm-metal-dev:
219219
rm -rf "$(VLLM_METAL_INSTALL_DIR)"; \
220220
$$PYTHON_BIN -m venv "$(VLLM_METAL_INSTALL_DIR)"; \
221221
. "$(VLLM_METAL_INSTALL_DIR)/bin/activate" && \
222-
VLLM_UPSTREAM_VERSION="0.13.0" && \
222+
VLLM_UPSTREAM_VERSION="0.17.1" && \
223223
WORK_DIR=$$(mktemp -d) && \
224224
curl -fsSL -o "$$WORK_DIR/vllm.tar.gz" "https://github.com/vllm-project/vllm/releases/download/v$$VLLM_UPSTREAM_VERSION/vllm-$$VLLM_UPSTREAM_VERSION.tar.gz" && \
225225
tar -xzf "$$WORK_DIR/vllm.tar.gz" -C "$$WORK_DIR" && \
226226
pip install -r "$$WORK_DIR/vllm-$$VLLM_UPSTREAM_VERSION/requirements/cpu.txt" && \
227-
pip install -e "$(VLLM_METAL_PATH)" && \
227+
pip install "$$WORK_DIR/vllm-$$VLLM_UPSTREAM_VERSION" && \
228228
pip install -r "$$WORK_DIR/vllm-$$VLLM_UPSTREAM_VERSION/requirements/common.txt" && \
229229
rm -rf "$$WORK_DIR" && \
230+
pip install -e "$(VLLM_METAL_PATH)" && \
230231
echo "dev" > "$(VLLM_METAL_INSTALL_DIR)/.vllm-metal-version"; \
231232
echo "vllm-metal dev installed from $(VLLM_METAL_PATH)"
232233

pkg/inference/backends/vllm/vllm_metal.go

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import (
2525
const (
2626
defaultInstallDir = ".docker/model-runner/vllm-metal"
2727
// vllmMetalVersion is the vllm-metal release tag to download from Docker Hub.
28-
vllmMetalVersion = "v0.1.0-20260126-121650"
28+
vllmMetalVersion = "v0.1.0-20260320-122309"
2929
)
3030

3131
var (
@@ -207,7 +207,7 @@ func (v *vllmMetal) Run(ctx context.Context, socket, model string, modelRef stri
207207
return fmt.Errorf("failed to get model: %w", err)
208208
}
209209

210-
args, err := v.buildArgs(bundle, socket, mode, config)
210+
args, err := v.buildArgs(bundle, socket, model, modelRef, mode, config)
211211
if err != nil {
212212
return fmt.Errorf("failed to build vllm-metal arguments: %w", err)
213213
}
@@ -225,7 +225,9 @@ func (v *vllmMetal) Run(ctx context.Context, socket, model string, modelRef stri
225225
}
226226

227227
// buildArgs builds the command line arguments for vllm-metal server.
228-
func (v *vllmMetal) buildArgs(bundle interface{ SafetensorsPath() string }, socket string, mode inference.BackendMode, config *inference.BackendConfiguration) ([]string, error) {
228+
// vllm-metal is a vLLM platform plugin, so we launch vLLM's OpenAI-compatible
229+
// API server directly; the Metal plugin is auto-discovered via entry points.
230+
func (v *vllmMetal) buildArgs(bundle interface{ SafetensorsPath() string }, socket, model, modelRef string, mode inference.BackendMode, config *inference.BackendConfiguration) ([]string, error) {
229231
// Parse host:port from socket (vllm-metal uses TCP)
230232
host, port, err := net.SplitHostPort(socket)
231233
if err != nil {
@@ -240,7 +242,7 @@ func (v *vllmMetal) buildArgs(bundle interface{ SafetensorsPath() string }, sock
240242
modelPath := filepath.Dir(safetensorsPath)
241243

242244
args := []string{
243-
"-m", "vllm_metal.server",
245+
"-m", "vllm.entrypoints.openai.api_server",
244246
"--model", modelPath,
245247
"--host", host,
246248
"--port", port,
@@ -258,6 +260,10 @@ func (v *vllmMetal) buildArgs(bundle interface{ SafetensorsPath() string }, sock
258260
return nil, fmt.Errorf("image generation mode not supported by vllm-metal backend")
259261
}
260262

263+
// Register model aliases so the model-runner can address the model by its
264+
// digest (model) and its human-readable reference (modelRef).
265+
args = append(args, "--served-model-name", model, modelRef)
266+
261267
// Add context size if specified
262268
if config != nil && config.ContextSize != nil {
263269
args = append(args, "--max-model-len", strconv.Itoa(int(*config.ContextSize)))

scripts/build-vllm-metal-tarball.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ WORK_DIR=$(mktemp -d)
2020
# Convert tarball path to absolute before we cd elsewhere
2121
TARBALL="$(cd "$(dirname "$TARBALL_ARG")" && pwd)/$(basename "$TARBALL_ARG")"
2222

23-
VLLM_VERSION="0.13.0"
23+
VLLM_VERSION="0.17.1"
2424
# Extract wheel version from release tag (e.g., v0.1.0-20260126-121650 -> 0.1.0)
2525
VLLM_METAL_WHEEL_VERSION=$(echo "$VLLM_METAL_RELEASE" | sed 's/^v//' | cut -d'-' -f1)
2626
VLLM_METAL_WHEEL_URL="https://github.com/vllm-project/vllm-metal/releases/download/${VLLM_METAL_RELEASE}/vllm_metal-${VLLM_METAL_WHEEL_VERSION}-cp312-cp312-macosx_11_0_arm64.whl"
@@ -89,4 +89,4 @@ SIZE=$(du -h "$TARBALL" | cut -f1)
8989
echo "Created: $TARBALL ($SIZE)"
9090
echo ""
9191
echo "This tarball is fully self-contained (includes Python 3.12 + all packages)."
92-
echo "To use: extract to a directory and run bin/python3 -m vllm_metal.server"
92+
echo "To use: extract to a directory and run bin/python3 -m vllm.entrypoints.openai.api_server --model <path>"

0 commit comments

Comments
 (0)