Skip to content

Commit c6c66e2

Browse files
authored
Merge pull request #773 from docker/conveinience
Add dmr dev convenience wrapper
2 parents 5b54bfa + 7e6e15b commit c6c66e2

3 files changed

Lines changed: 266 additions & 41 deletions

File tree

Makefile

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,27 +22,28 @@ DOCKER_BUILD_ARGS := \
2222
--target $(DOCKER_TARGET) \
2323
-t $(DOCKER_IMAGE)
2424

25-
# Test configuration
26-
BUILD_DMR ?= 1
27-
2825
# Phony targets grouped by category
29-
.PHONY: build run clean test integration-tests build-cli install-cli
26+
.PHONY: build build-cli build-dmr install-cli run clean test integration-tests
3027
.PHONY: validate validate-all lint help
3128
.PHONY: docker-build docker-build-multiplatform docker-run docker-run-impl
3229
.PHONY: docker-build-vllm docker-run-vllm docker-build-sglang docker-run-sglang
3330
.PHONY: test-docker-ce-installation
3431
.PHONY: vllm-metal-build vllm-metal-install vllm-metal-dev vllm-metal-clean
3532
.PHONY: diffusers-build diffusers-install diffusers-dev diffusers-clean
36-
# Default target
33+
# Default target: build server, CLI plugin, and dmr convenience wrapper
3734
.DEFAULT_GOAL := build
3835

39-
# Build the Go application
40-
build:
36+
build: build-server build-cli build-dmr
37+
38+
build-server:
4139
CGO_ENABLED=1 go build -ldflags="-s -w -X main.Version=$(shell git describe --tags --always --dirty --match 'v*')" -o $(APP_NAME) .
4240

4341
build-cli:
4442
$(MAKE) -C cmd/cli
4543

44+
build-dmr:
45+
go build -ldflags="-s -w" -o dmr ./cmd/dmr
46+
4647
install-cli:
4748
$(MAKE) -C cmd/cli install
4849

@@ -61,6 +62,7 @@ run: build
6162
# Clean build artifacts
6263
clean:
6364
rm -f $(APP_NAME)
65+
rm -f dmr
6466
rm -f model-runner.sock
6567

6668
# Run tests
@@ -77,7 +79,7 @@ integration-tests:
7779
echo "$$INVALID_TESTS" | sed 's/func \([^(]*\).*/\1/'; \
7880
exit 1; \
7981
fi
80-
@BUILD_DMR=$(BUILD_DMR) go test -v -race -count=1 -tags=integration -run "^TestIntegration" -timeout=5m ./cmd/cli/commands
82+
go test -v -race -count=1 -tags=integration -run "^TestIntegration" -timeout=5m ./cmd/cli/commands
8183
@echo "Integration tests completed!"
8284

8385
test-docker-ce-installation:
@@ -308,7 +310,8 @@ diffusers-clean:
308310

309311
help:
310312
@echo "Available targets:"
311-
@echo " build - Build the Go application"
313+
@echo " build - Build server, CLI plugin, and dmr wrapper (default)"
314+
@echo " build-server - Build the model-runner server"
312315
@echo " build-cli - Build the CLI (docker-model plugin)"
313316
@echo " install-cli - Build and install the CLI as a Docker plugin"
314317
@echo " docs - Generate CLI documentation"

README.md

Lines changed: 124 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -96,60 +96,40 @@ Before building from source, ensure you have the following installed:
9696

9797
### Building the Complete Stack
9898

99-
#### Step 1: Clone and Build model-runner (Server/Daemon)
99+
After cloning, a single `make` builds everything — the server, CLI plugin, and a `dmr` convenience wrapper:
100100

101101
```bash
102-
# Clone the model-runner repository
103-
git clone https://github.com/docker/model-runner.git
104-
cd model-runner
105-
106-
# Build the model-runner binary
107-
make build
108-
109-
# Or build with specific backend arguments
110-
make run LLAMA_ARGS="--verbose --jinja -ngl 999 --ctx-size 2048"
111-
112-
# Run tests to verify the build
113-
make test
102+
make
114103
```
115104

116-
The `model-runner` binary will be created in the current directory. This is the backend server that manages models.
117-
118-
#### Step 2: Build model-cli (Client)
105+
`dmr` starts the server on a free port, waits for it to be ready, runs your CLI command, then shuts the server down:
119106

120107
```bash
121-
# From the root directory, navigate to the model-cli directory
122-
cd cmd/cli
123-
124-
# Build the CLI binary
125-
make build
126-
127-
# The binary will be named 'model-cli'
128-
# Optionally, install it as a Docker CLI plugin
129-
make install # This will link it to ~/.docker/cli-plugins/docker-model
108+
./dmr run ai/smollm2 "Hello, how are you?"
109+
./dmr ls
110+
./dmr run qwen3:0.6B-Q4_0 tell me today's news
130111
```
131112
113+
These components can also be built, run, and tested separately using the Makefile.
114+
132115
### Testing the Complete Stack End-to-End
133116
134117
> **Note:** We use port 13434 in these examples to avoid conflicts with Docker Desktop's built-in Model Runner, which typically runs on port 12434.
135118

136-
#### Option 1: Local Development (Recommended for Contributors)
119+
#### Option 1: Manual two-terminal setup
137120

138121
1. **Start model-runner in one terminal:**
139122
```bash
140-
cd model-runner
141123
MODEL_RUNNER_PORT=13434 ./model-runner
142-
# The server will start on port 13434
143124
```
144125

145126
2. **Use model-cli in another terminal:**
146127
```bash
147-
cd cmd/cli
148-
# List available models (connecting to port 13434)
149-
MODEL_RUNNER_HOST=http://localhost:13434 ./model-cli list
128+
# List available models
129+
MODEL_RUNNER_HOST=http://localhost:13434 ./cmd/cli/model-cli list
150130
151131
# Pull and run a model
152-
MODEL_RUNNER_HOST=http://localhost:13434 ./model-cli run ai/smollm2 "Hello, how are you?"
132+
MODEL_RUNNER_HOST=http://localhost:13434 ./cmd/cli/model-cli run ai/smollm2 "Hello, how are you?"
153133
```
154134

155135
#### Option 2: Using Docker
@@ -422,6 +402,118 @@ in the form of [a Helm chart and static YAML](charts/docker-model-runner/README.
422402
If you are interested in a specific Kubernetes use-case, please start a
423403
discussion on the issue tracker.
424404
405+
<<<<<<< Updated upstream
406+
=======
407+
## dmrlet: Container Orchestrator for AI Inference
408+
409+
dmrlet is a purpose-built container orchestrator for AI inference workloads. Unlike Kubernetes, it focuses exclusively on running stateless inference containers with zero configuration overhead. Multi-GPU mapping "just works" without YAML, device plugins, or node selectors.
410+
411+
### Key Features
412+
413+
| Feature | Kubernetes | dmrlet |
414+
|---------|------------|--------|
415+
| Multi-GPU setup | Device plugins + node selectors + resource limits YAML | `dmrlet serve llama3 --gpus all` |
416+
| Config overhead | 50+ lines of YAML minimum | Zero YAML, CLI-only |
417+
| Time to first inference | Minutes (pod scheduling, image pull) | Seconds (model already local) |
418+
| Model management | External (mount PVCs, manage yourself) | Integrated with Docker Model Runner store |
419+
420+
### Building dmrlet
421+
422+
```bash
423+
# Build the dmrlet binary
424+
go build -o dmrlet ./cmd/dmrlet
425+
426+
# Verify it works
427+
./dmrlet --help
428+
```
429+
430+
### Usage
431+
432+
**Start the daemon:**
433+
```bash
434+
# Start in foreground
435+
dmrlet daemon
436+
437+
# With custom socket path
438+
dmrlet daemon --socket /tmp/dmrlet.sock
439+
```
440+
441+
**Serve a model:**
442+
```bash
443+
# Auto-detect backend and GPUs
444+
dmrlet serve llama3.2
445+
446+
# Specify backend
447+
dmrlet serve llama3.2 --backend vllm
448+
449+
# Specify GPU allocation
450+
dmrlet serve llama3.2 --gpus 0,1
451+
dmrlet serve llama3.2 --gpus all
452+
453+
# Multiple replicas
454+
dmrlet serve llama3.2 --replicas 2
455+
456+
# Backend-specific options
457+
dmrlet serve llama3.2 --ctx-size 4096 # llama.cpp context size
458+
dmrlet serve llama3.2 --gpu-memory 0.8 # vLLM GPU memory utilization
459+
```
460+
461+
**List running models:**
462+
```bash
463+
dmrlet ps
464+
# MODEL BACKEND REPLICAS GPUS ENDPOINTS STATUS
465+
# llama3.2 llama.cpp 1 [0,1,2,3] localhost:30000 healthy
466+
```
467+
468+
**View logs:**
469+
```bash
470+
dmrlet logs llama3.2 # Last 100 lines
471+
dmrlet logs llama3.2 -f # Follow logs
472+
```
473+
474+
**Scale replicas:**
475+
```bash
476+
dmrlet scale llama3.2 4 # Scale to 4 replicas
477+
```
478+
479+
**Stop a model:**
480+
```bash
481+
dmrlet stop llama3.2
482+
dmrlet stop --all # Stop all models
483+
```
484+
485+
**Check status:**
486+
```bash
487+
dmrlet status
488+
# DAEMON: running
489+
# SOCKET: /var/run/dmrlet.sock
490+
#
491+
# GPUs:
492+
# GPU 0: NVIDIA A100 80GB 81920MB (in use: llama3.2)
493+
# GPU 1: NVIDIA A100 80GB 81920MB (available)
494+
#
495+
# MODELS: 1 running
496+
```
497+
498+
### Supported Backends
499+
500+
- **llama.cpp** - Default backend for GGUF models
501+
- **vLLM** - High-throughput serving for safetensors models
502+
- **SGLang** - Fast serving with RadixAttention
503+
504+
### Architecture
505+
506+
```
507+
dmrlet daemon
508+
├── GPU Manager - Auto-detect and allocate GPUs
509+
├── Container Manager - Docker-based container lifecycle
510+
├── Service Registry - Endpoint discovery with load balancing
511+
├── Health Monitor - Auto-restart unhealthy containers
512+
├── Auto-scaler - Scale based on QPS/latency/GPU utilization
513+
└── Log Aggregator - Centralized log collection
514+
```
515+
516+
>>>>>>> Stashed changes
425517
## Community
426518
427519
For general questions and discussion, please use [Docker Model Runner's Slack channel](https://dockercommunity.slack.com/archives/C09H9P5E57B).

cmd/dmr/main.go

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
// dmr is a developer convenience wrapper that starts the model-runner server on
2+
// a free port and runs a model-cli command against it in one step.
3+
//
4+
// Usage: dmr <cli-args...>
5+
//
6+
// Example: dmr run qwen3:0.6B-Q4_0 tell me today's news
7+
package main
8+
9+
import (
10+
"errors"
11+
"fmt"
12+
"net"
13+
"net/http"
14+
"os"
15+
"os/exec"
16+
"os/signal"
17+
"path/filepath"
18+
"strconv"
19+
"syscall"
20+
"time"
21+
)
22+
23+
func freePort() (int, error) {
24+
l, err := net.Listen("tcp", "127.0.0.1:0")
25+
if err != nil {
26+
return 0, err
27+
}
28+
defer l.Close()
29+
return l.Addr().(*net.TCPAddr).Port, nil
30+
}
31+
32+
func waitForServer(url string, timeout time.Duration) error {
33+
client := &http.Client{Timeout: time.Second}
34+
deadline := time.Now().Add(timeout)
35+
for time.Now().Before(deadline) {
36+
resp, err := client.Get(url)
37+
if err == nil {
38+
resp.Body.Close()
39+
if resp.StatusCode == http.StatusOK {
40+
return nil
41+
}
42+
}
43+
time.Sleep(200 * time.Millisecond)
44+
}
45+
return fmt.Errorf("server not ready after %s", timeout)
46+
}
47+
48+
func checkBinary(path, name, expectedLayout string) error {
49+
if _, err := os.Stat(path); os.IsNotExist(err) {
50+
return fmt.Errorf("missing %s binary at %s\n\nExpected directory layout:\n%s\n\nPlease run 'make build' to build all binaries", name, path, expectedLayout)
51+
}
52+
return nil
53+
}
54+
55+
func main() {
56+
self, err := os.Executable()
57+
if err != nil {
58+
fmt.Fprintf(os.Stderr, "dmr: %v\n", err)
59+
os.Exit(1)
60+
}
61+
dir := filepath.Dir(self)
62+
63+
serverBin := filepath.Join(dir, "model-runner")
64+
cliBin := filepath.Join(dir, "cmd", "cli", "model-cli")
65+
66+
expectedLayout := fmt.Sprintf(`%s/
67+
├── model-runner (server binary)
68+
├── dmr (this wrapper)
69+
└── cmd/
70+
└── cli/
71+
└── model-cli (CLI binary)`, dir)
72+
73+
if err := checkBinary(serverBin, "model-runner", expectedLayout); err != nil {
74+
fmt.Fprintf(os.Stderr, "dmr: %v\n", err)
75+
os.Exit(1)
76+
}
77+
if err := checkBinary(cliBin, "model-cli", expectedLayout); err != nil {
78+
fmt.Fprintf(os.Stderr, "dmr: %v\n", err)
79+
os.Exit(1)
80+
}
81+
82+
port, err := freePort()
83+
if err != nil {
84+
fmt.Fprintf(os.Stderr, "dmr: failed to find free port: %v\n", err)
85+
os.Exit(1)
86+
}
87+
portStr := strconv.Itoa(port)
88+
serverURL := "http://localhost:" + portStr
89+
90+
fmt.Fprintf(os.Stderr, "dmr: starting model-runner on port %d\n", port)
91+
92+
server := exec.Command(serverBin)
93+
server.Env = append(os.Environ(), "MODEL_RUNNER_PORT="+portStr)
94+
server.Stderr = os.Stderr
95+
server.Stdout = os.Stdout
96+
97+
if err := server.Start(); err != nil {
98+
fmt.Fprintf(os.Stderr, "dmr: failed to start model-runner: %v\n", err)
99+
os.Exit(1)
100+
}
101+
defer server.Process.Kill()
102+
103+
sigCh := make(chan os.Signal, 1)
104+
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
105+
go func() {
106+
<-sigCh
107+
server.Process.Kill()
108+
}()
109+
110+
if err := waitForServer(serverURL+"/", 30*time.Second); err != nil {
111+
fmt.Fprintf(os.Stderr, "dmr: %v\n", err)
112+
os.Exit(1)
113+
}
114+
115+
// #nosec G702 - Intentional: dmr is a CLI wrapper that forwards arguments to model-cli
116+
cli := exec.Command(cliBin, os.Args[1:]...)
117+
cli.Env = append(os.Environ(), "MODEL_RUNNER_HOST="+serverURL)
118+
cli.Stdin = os.Stdin
119+
cli.Stdout = os.Stdout
120+
cli.Stderr = os.Stderr
121+
122+
if err := cli.Run(); err != nil {
123+
var exitErr *exec.ExitError
124+
if errors.As(err, &exitErr) {
125+
os.Exit(exitErr.ExitCode())
126+
}
127+
fmt.Fprintf(os.Stderr, "dmr: %v\n", err)
128+
os.Exit(1)
129+
}
130+
}

0 commit comments

Comments
 (0)