Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion .github/workflows/e2e-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,13 @@ on:

jobs:
e2e-test:
runs-on: macos-latest
strategy:
fail-fast: false
matrix:
include:
- os: macos-latest
- os: ubuntu-latest
runs-on: ${{ matrix.os }}
timeout-minutes: 30

steps:
Expand All @@ -24,5 +30,9 @@ jobs:
go-version: 1.25.8
cache: true

- name: Set up Docker
if: matrix.os == 'ubuntu-latest'
uses: docker/setup-docker-action@1a6edb0ba9ac496f6850236981f15d8f9a82254d

- name: Run e2e tests
run: make e2e
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ APP_NAME := model-runner
GO_VERSION := 1.25.8
LLAMA_SERVER_VERSION := latest
LLAMA_SERVER_VARIANT := cpu
BASE_IMAGE := ubuntu:24.04
BASE_IMAGE := ubuntu:26.04
VLLM_BASE_IMAGE := nvidia/cuda:13.0.2-runtime-ubuntu24.04
VLLM_VERSION ?= 0.17.0
DOCKER_IMAGE := docker/model-runner:latest
Expand Down Expand Up @@ -86,7 +86,7 @@ integration-tests:
go test -v -race -count=1 -tags=integration -run "^TestIntegration" -timeout=5m ./cmd/cli/commands
@echo "Integration tests completed!"

e2e: build-llamacpp build
e2e:
@echo "Running e2e tests..."
@echo "Checking test naming conventions..."
@INVALID_TESTS=$$(grep "^func Test" e2e/*_test.go | grep -v "^.*:func TestE2E" | grep -v "^.*:func TestMain"); \
Expand Down
30 changes: 18 additions & 12 deletions cmd/cli/pkg/standalone/images.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,30 @@ import (
"github.com/moby/moby/client/pkg/jsonmessage"
)

// EnsureControllerImage ensures that the controller container image is pulled.
// EnsureControllerImage ensures that the controller container image is
// available. It first tries to pull from the registry; if that fails it
// falls back to a locally available image with the same name.
func EnsureControllerImage(ctx context.Context, dockerClient client.ImageAPIClient, gpu gpupkg.GPUSupport, backend string, printer StatusPrinter) error {
imageName := controllerImageName(gpu, backend)

// Perform the pull.
out, err := dockerClient.ImagePull(ctx, imageName, client.ImagePullOptions{})
if err != nil {
return fmt.Errorf("failed to pull image %s: %w", imageName, err)
var pullErr error
out, pullErr := dockerClient.ImagePull(ctx, imageName, client.ImagePullOptions{})
if pullErr == nil {
defer out.Close()
fd, isTerminal := printer.GetFdInfo()
pullErr = jsonmessage.DisplayJSONMessagesStream(out, printer, fd, isTerminal, nil)
}
defer out.Close()

// Display pull progress using Docker's built-in display handler
fd, isTerminal := printer.GetFdInfo()
if err := jsonmessage.DisplayJSONMessagesStream(out, printer, fd, isTerminal, nil); err != nil {
return fmt.Errorf("failed to pull image %s: %w", imageName, err)
if pullErr == nil {
printer.Println("Successfully pulled", imageName)
return nil
}

printer.Println("Successfully pulled", imageName)
// Pull failed — check if the image exists locally.
_, inspectErr := dockerClient.ImageInspect(ctx, imageName)
if inspectErr != nil {
Comment thread
doringeman marked this conversation as resolved.
return fmt.Errorf("failed to pull image %s and no local image found: %w", imageName, pullErr)
}
printer.Println("Using local image", imageName)
return nil
}

Expand Down
57 changes: 54 additions & 3 deletions e2e/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"os"
"os/exec"
"path/filepath"
"runtime"
"strconv"
"strings"
"testing"
Expand Down Expand Up @@ -53,15 +54,27 @@ func run(m *testing.M) int {
fmt.Fprintf(os.Stderr, "e2e: %v\n", err)
return 1
}
cliBin = filepath.Join(root, "cmd", "cli", "model-cli")

if runtime.GOOS == "darwin" {
return runNative(m, root)
}
return runDocker(m, root)
}

fmt.Fprintln(os.Stderr, "e2e: building server and CLI...")
// runNative builds the server from source and runs it as a local process.
func runNative(m *testing.M, root string) int {
fmt.Fprintln(os.Stderr, "e2e: building llama.cpp, server, and CLI...")
if err := makeTarget(root, "build-llamacpp"); err != nil {
fmt.Fprintf(os.Stderr, "e2e: make build-llamacpp failed: %v\n", err)
return 1
}
if err := makeTarget(root, "build"); err != nil {
fmt.Fprintf(os.Stderr, "e2e: make build failed: %v\n", err)
return 1
}

serverBin := filepath.Join(root, "model-runner")
cliBin = filepath.Join(root, "cmd", "cli", "model-cli")
llamaBin := filepath.Join(root, "llamacpp", "install", "bin")

for _, path := range []string{serverBin, cliBin, llamaBin} {
Expand Down Expand Up @@ -105,12 +118,50 @@ func run(m *testing.M) int {
_ = server.Wait()
}()

return waitAndRunTests(m)
}

// runDocker builds the Docker image and CLI from source, then lets the CLI
// auto-start the model-runner container on the default Moby port (12434).
func runDocker(m *testing.M, root string) int {
fmt.Fprintln(os.Stderr, "e2e: building Docker image and CLI...")
if err := makeTarget(root, "docker-build"); err != nil {
fmt.Fprintf(os.Stderr, "e2e: make docker-build failed: %v\n", err)
return 1
}
if err := makeTarget(root, "build-cli"); err != nil {
fmt.Fprintf(os.Stderr, "e2e: make build-cli failed: %v\n", err)
return 1
}

// Tag the locally built image so install-runner uses it
// instead of pulling from Docker Hub.
tag := exec.Command("docker", "tag", "docker/model-runner:latest", "docker/model-runner:e2e-local")
if err := tag.Run(); err != nil {
fmt.Fprintf(os.Stderr, "e2e: docker tag failed: %v\n", err)
return 1
}

fmt.Fprintln(os.Stderr, "e2e: installing runner...")
cmd := exec.Command(cliBin, "install-runner")
cmd.Env = append(os.Environ(), "MODEL_RUNNER_CONTROLLER_VERSION=e2e-local")
cmd.Stdout = os.Stderr
cmd.Stderr = os.Stderr
if err := cmd.Run(); err != nil {
fmt.Fprintf(os.Stderr, "e2e: install-runner failed: %v\n", err)
return 1
}

serverURL = "http://localhost:12434"
return waitAndRunTests(m)
}

func waitAndRunTests(m *testing.M) int {
if err := waitForServer(serverURL+"/models", serverStartTimeout); err != nil {
fmt.Fprintf(os.Stderr, "e2e: %v\n", err)
return 1
}
fmt.Fprintf(os.Stderr, "e2e: server ready at %s\n", serverURL)

return m.Run()
Comment thread
doringeman marked this conversation as resolved.
}

Expand Down
15 changes: 11 additions & 4 deletions e2e/inference_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,24 @@ import (
"net/http"
"strings"
"testing"

"github.com/docker/model-runner/pkg/inference/platform"
)

type backendTestCase struct {
name string
model string
}

var backends = []backendTestCase{
{"llama.cpp", ggufModel},
{"vllm-metal", mlxModel},
}
var backends = func() []backendTestCase {
b := []backendTestCase{
{"llama.cpp", ggufModel},
}
if platform.SupportsVLLMMetal() {
b = append(b, backendTestCase{"vllm-metal", mlxModel})
}
return b
}()

func TestE2E_Inference(t *testing.T) {
for _, bc := range backends {
Expand Down
Loading