Skip to content

Commit 56d4632

Browse files
committed
fix(cli): support docker model logs from WSL2 with Docker Desktop
Signed-off-by: Dorin Geman <dorin.geman@docker.com>
2 parents 2bd7997 + 6e3a208 commit 56d4632

6 files changed

Lines changed: 174 additions & 36 deletions

File tree

cmd/cli/commands/logs.go

Lines changed: 53 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,12 @@ import (
77
"fmt"
88
"io"
99
"os"
10+
"os/exec"
1011
"os/signal"
1112
"path/filepath"
1213
"regexp"
1314
"runtime"
15+
"strings"
1416
"time"
1517

1618
"github.com/docker/model-runner/cmd/cli/commands/completion"
@@ -70,9 +72,22 @@ func newLogsCmd() *cobra.Command {
7072
case "darwin":
7173
serviceLogPath = filepath.Join(homeDir, "Library/Containers/com.docker.docker/Data/log/host/inference.log")
7274
runtimeLogPath = filepath.Join(homeDir, "Library/Containers/com.docker.docker/Data/log/host/inference-llama.cpp-server.log")
73-
case "windows":
74-
serviceLogPath = filepath.Join(homeDir, "AppData/Local/Docker/log/host/inference.log")
75-
runtimeLogPath = filepath.Join(homeDir, "AppData/Local/Docker/log/host/inference-llama.cpp-server.log")
75+
case "windows", "linux":
76+
baseDir := homeDir
77+
if runtime.GOOS == "linux" {
78+
if !isWSL() {
79+
return fmt.Errorf("log viewing on native Linux is only supported in standalone mode")
80+
}
81+
// When running inside WSL2 with Docker Desktop, the log files
82+
// are on the Windows host filesystem mounted under /mnt/.
83+
winHomeDir, wslErr := windowsHomeDirFromWSL(cmd.Context())
84+
if wslErr != nil {
85+
return fmt.Errorf("unable to determine Windows home directory from WSL2: %w", wslErr)
86+
}
87+
baseDir = winHomeDir
88+
}
89+
serviceLogPath = filepath.Join(baseDir, "AppData/Local/Docker/log/host/inference.log")
90+
runtimeLogPath = filepath.Join(baseDir, "AppData/Local/Docker/log/host/inference-llama.cpp-server.log")
7691
default:
7792
return fmt.Errorf("unsupported OS: %s", runtime.GOOS)
7893
}
@@ -98,9 +113,13 @@ func newLogsCmd() *cobra.Command {
98113

99114
g, ctx := errgroup.WithContext(ctx)
100115

116+
// Poll mode is needed when tailing files over a mounted filesystem
117+
// (Windows or WSL2 accessing the Windows host via /mnt/).
118+
pollMode := runtime.GOOS == "windows" || (runtime.GOOS == "linux" && isWSL())
119+
101120
g.Go(func() error {
102121
t, err := tail.TailFile(
103-
serviceLogPath, tail.Config{Location: &tail.SeekInfo{Offset: 0, Whence: io.SeekEnd}, Follow: true, ReOpen: true},
122+
serviceLogPath, tail.Config{Location: &tail.SeekInfo{Offset: 0, Whence: io.SeekEnd}, Follow: true, ReOpen: true, Poll: pollMode},
104123
)
105124
if err != nil {
106125
return err
@@ -121,7 +140,7 @@ func newLogsCmd() *cobra.Command {
121140
if !noEngines {
122141
g.Go(func() error {
123142
t, err := tail.TailFile(
124-
runtimeLogPath, tail.Config{Location: &tail.SeekInfo{Offset: 0, Whence: io.SeekEnd}, Follow: true, ReOpen: true},
143+
runtimeLogPath, tail.Config{Location: &tail.SeekInfo{Offset: 0, Whence: io.SeekEnd}, Follow: true, ReOpen: true, Poll: pollMode},
125144
)
126145
if err != nil {
127146
return err
@@ -150,6 +169,35 @@ func newLogsCmd() *cobra.Command {
150169
return c
151170
}
152171

172+
// isWSL reports whether the current process is running inside a WSL2 environment.
173+
func isWSL() bool {
174+
_, ok := os.LookupEnv("WSL_DISTRO_NAME")
175+
return ok
176+
}
177+
178+
// windowsHomeDirFromWSL resolves the Windows user's home directory from
179+
// within a WSL2 environment by running "wslpath" on the USERPROFILE path
180+
// obtained via "wslvar". This returns a Linux path like /mnt/c/Users/Name.
181+
func windowsHomeDirFromWSL(ctx context.Context) (string, error) {
182+
out, err := exec.CommandContext(ctx, "wslvar", "USERPROFILE").Output()
183+
if err != nil {
184+
return "", fmt.Errorf("wslvar USERPROFILE: %w", err)
185+
}
186+
winPath := strings.TrimSpace(string(out))
187+
if winPath == "" {
188+
return "", fmt.Errorf("USERPROFILE is empty")
189+
}
190+
out, err = exec.CommandContext(ctx, "wslpath", "-u", winPath).Output()
191+
if err != nil {
192+
return "", fmt.Errorf("wslpath -u %q: %w", winPath, err)
193+
}
194+
linuxPath := strings.TrimSpace(string(out))
195+
if linuxPath == "" {
196+
return "", fmt.Errorf("wslpath returned empty path")
197+
}
198+
return linuxPath, nil
199+
}
200+
153201
var timestampRe = regexp.MustCompile(`\[(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z)\].*`)
154202

155203
const timeFmt = "2006-01-02T15:04:05.000000000Z"

cmd/cli/commands/ps.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,10 @@ func psTable(ps []desktop.BackendStatus) string {
5555
}
5656

5757
func formatUntil(status desktop.BackendStatus) string {
58+
if status.Loading {
59+
return "Loading..."
60+
}
61+
5862
keepAlive := inference.KeepAliveDefault
5963
if status.KeepAlive != nil {
6064
keepAlive = *status.KeepAlive

cmd/cli/desktop/desktop.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -747,6 +747,7 @@ type BackendStatus struct {
747747
Mode string `json:"mode"`
748748
LastUsed time.Time `json:"last_used,omitempty"`
749749
InUse bool `json:"in_use,omitempty"`
750+
Loading bool `json:"loading,omitempty"`
750751
KeepAlive *inference.KeepAlive `json:"keep_alive,omitempty"`
751752
}
752753

pkg/inference/scheduling/api.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,9 @@ type BackendStatus struct {
7979
// LastUsed represents when this (backend, model, mode) tuple was last used
8080
LastUsed time.Time `json:"last_used,omitempty"`
8181
// InUse indicates whether this backend is currently handling a request
82-
InUse bool `json:"in_use,omitempty"`
82+
InUse bool `json:"in_use,omitempty"`
83+
// Loading indicates whether this backend is currently being initialized
84+
Loading bool `json:"loading,omitempty"`
8385
KeepAlive *inference.KeepAlive `json:"keep_alive,omitempty"`
8486
}
8587

pkg/inference/scheduling/loader.go

Lines changed: 102 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,15 @@ type runnerInfo struct {
8080
modelRef string
8181
}
8282

83+
// loadingInfo holds metadata about a runner that is being initialized.
84+
type loadingInfo struct {
85+
backendName string
86+
modelID string
87+
draftModelID string
88+
modelRef string
89+
mode inference.BackendMode
90+
}
91+
8392
// loader manages the loading and unloading of backend runners. It regulates
8493
// active backends in a manner that avoids exhausting system resources. Loaders
8594
// assume that all of their backends have been installed, so no load requests
@@ -109,6 +118,12 @@ type loader struct {
109118
waiters map[chan<- struct{}]bool
110119
// runners maps runner keys to their slot index.
111120
runners map[runnerKey]runnerInfo
121+
// loading tracks slots that have a runner being initialized. This
122+
// allows the lock to be released during long-running operations
123+
// (run + wait) while still preventing other goroutines from using
124+
// or evicting those slots. The value contains metadata needed to
125+
// report loading status.
126+
loading map[int]loadingInfo
112127
// slots maps slot indices to associated runners. A slot is considered free
113128
// if the runner value in it is nil.
114129
slots []*runner
@@ -157,6 +172,7 @@ func newLoader(
157172
guard: make(chan struct{}, 1),
158173
waiters: make(map[chan<- struct{}]bool),
159174
runners: make(map[runnerKey]runnerInfo, nSlots),
175+
loading: make(map[int]loadingInfo),
160176
slots: make([]*runner, nSlots),
161177
references: make([]uint, nSlots),
162178
timestamps: make([]time.Time, nSlots),
@@ -411,6 +427,30 @@ func (l *loader) run(ctx context.Context) {
411427
}
412428
}
413429

430+
// usedSlots returns the number of slots that are either occupied by a
431+
// registered runner or reserved for a runner being loaded.
432+
func (l *loader) usedSlots() int {
433+
return len(l.runners) + len(l.loading)
434+
}
435+
436+
// isSlotLoading reports whether the given slot is reserved for a runner
437+
// that is currently being initialized.
438+
func (l *loader) isSlotLoading(slot int) bool {
439+
_, ok := l.loading[slot]
440+
return ok
441+
}
442+
443+
// isModelLoading reports whether a runner for the given model is currently
444+
// being initialized by another goroutine.
445+
func (l *loader) isModelLoading(backendName, modelID, draftModelID string, mode inference.BackendMode) bool {
446+
for _, info := range l.loading {
447+
if info.backendName == backendName && info.modelID == modelID && info.draftModelID == draftModelID && info.mode == mode {
448+
return true
449+
}
450+
}
451+
return false
452+
}
453+
414454
// load allocates a runner using the specified backend and modelID. If allocated,
415455
// it should be released by the caller using the release mechanism (once the
416456
// runner is no longer needed).
@@ -427,7 +467,9 @@ func (l *loader) load(ctx context.Context, backendName, modelID, modelRef string
427467
if !l.lock(ctx) {
428468
return nil, context.Canceled
429469
}
430-
defer l.unlock()
470+
// Note: the lock is managed explicitly throughout this function rather
471+
// than via defer, because it is released during long-running operations
472+
// (run + wait) and re-acquired afterwards.
431473

432474
// Get runner configuration if available (must be done under lock since
433475
// runnerConfigs can be modified concurrently by setRunnerConfig).
@@ -468,22 +510,37 @@ func (l *loader) load(ctx context.Context, backendName, modelID, modelRef string
468510
// ensure that it's deregistered by the time we return.
469511
poll := make(chan struct{}, 1)
470512
l.waiters[poll] = true
471-
defer func() {
513+
514+
// cleanupAndReturn is a helper that cleans up the waiter registration,
515+
// releases the lock, and returns. All exit paths must go through this
516+
// to avoid leaking the poll channel or double-unlocking.
517+
cleanupAndReturn := func(r *runner, err error) (*runner, error) {
472518
delete(l.waiters, poll)
473-
}()
519+
l.unlock()
520+
return r, err
521+
}
474522

475523
// Loop until we can satisfy the request or an error occurs.
524+
// These are declared outside the loop to avoid goto-over-declaration errors.
525+
var existing runnerInfo
526+
var existingOK bool
476527
for {
477528
slot := -1
478529

479530
// If loads are disabled, then there's nothing we can do.
480531
if !l.loadsEnabled {
481-
return nil, errLoadsDisabled
532+
return cleanupAndReturn(nil, errLoadsDisabled)
533+
}
534+
535+
// See if another goroutine is already loading this runner.
536+
// If so, wait for it to finish rather than starting a duplicate load.
537+
if l.isModelLoading(backendName, modelID, draftModelID, mode) {
538+
goto WaitForChange
482539
}
483540

484541
// See if we can satisfy the request with an existing runner.
485-
existing, ok := l.runners[makeRunnerKey(backendName, modelID, draftModelID, mode)]
486-
if ok {
542+
existing, existingOK = l.runners[makeRunnerKey(backendName, modelID, draftModelID, mode)]
543+
if existingOK {
487544
select {
488545
case <-l.slots[existing.slot].done:
489546
l.log.Warn("Runner is defunct, waiting for eviction", "backend", backendName, "model", existing.modelRef)
@@ -497,13 +554,13 @@ func (l *loader) load(ctx context.Context, backendName, modelID, modelRef string
497554
default:
498555
l.references[existing.slot]++
499556
l.timestamps[existing.slot] = time.Time{}
500-
return l.slots[existing.slot], nil
557+
return cleanupAndReturn(l.slots[existing.slot], nil)
501558
}
502559
}
503560

504-
// If all slots are full, try evicting unused runners.
505-
if len(l.runners) == len(l.slots) {
506-
l.log.Info("Evicting to make room", "runners", len(l.runners), "slots", len(l.slots))
561+
// If all slots are full (including loading reservations), try evicting unused runners.
562+
if l.usedSlots() >= len(l.slots) {
563+
l.log.Info("Evicting to make room", "runners", len(l.runners), "loading", len(l.loading), "slots", len(l.slots))
507564
runnerCountAtLoopStart := len(l.runners)
508565
remainingRunners := l.evict(false)
509566
// Restart the loop if eviction happened
@@ -512,46 +569,62 @@ func (l *loader) load(ctx context.Context, backendName, modelID, modelRef string
512569
}
513570
}
514571

515-
// If there's a free slot, then find the slot.
516-
if len(l.runners) < len(l.slots) {
572+
// If there's a free slot, then find one that is not reserved for loading.
573+
if l.usedSlots() < len(l.slots) {
517574
for s, runner := range l.slots {
518-
if runner == nil {
575+
if runner == nil && !l.isSlotLoading(s) {
519576
slot = s
520577
break
521578
}
522579
}
523580
}
524581

525582
if slot < 0 {
526-
l.log.Debug("Cannot load model yet", "runners", len(l.runners), "slots", len(l.slots))
583+
l.log.Debug("Cannot load model yet", "runners", len(l.runners), "loading", len(l.loading), "slots", len(l.slots))
527584
}
528585

529586
// If we've identified a slot, then we're ready to start a runner.
530587
if slot >= 0 {
531-
// Create the runner.
532-
runner, err := run(l.log, backend, modelID, modelRef, mode, slot, runnerConfig, l.openAIRecorder)
588+
// Reserve the slot and release the lock for the long-running
589+
// operations (run + wait). This allows other goroutines to
590+
// proceed with loading different models, releasing runners, etc.
591+
l.loading[slot] = loadingInfo{
592+
backendName: backendName,
593+
modelID: modelID,
594+
draftModelID: draftModelID,
595+
modelRef: modelRef,
596+
mode: mode,
597+
}
598+
l.unlock()
599+
600+
newRunner, err := run(l.log, backend, modelID, modelRef, mode, slot, runnerConfig, l.openAIRecorder)
533601
if err != nil {
534602
l.log.Warn("Unable to start backend runner", "backend", backendName, "model", modelID, "mode", mode, "error", err)
535-
return nil, fmt.Errorf("unable to start runner: %w", err)
603+
l.lock(context.Background())
604+
delete(l.loading, slot)
605+
l.broadcast()
606+
return cleanupAndReturn(nil, fmt.Errorf("unable to start runner: %w", err))
536607
}
537608

538-
// Wait for the runner to be ready. In theory it's a little
539-
// inefficient to block all other loaders (including those that
540-
// might not want this runner), but in reality they would probably
541-
// be blocked by the underlying loading anyway (in terms of disk and
542-
// GPU performance). We have to retain a lock here though to enforce
543-
// deduplication of runners and keep slot / memory reservations.
544-
if err := runner.wait(ctx); err != nil {
545-
runner.terminate()
609+
if err := newRunner.wait(ctx); err != nil {
610+
newRunner.terminate()
546611
l.log.Warn("Backend runner initialization failed", "backend", backendName, "model", modelID, "mode", mode, "error", err)
547-
return nil, fmt.Errorf("error waiting for runner to be ready: %w", err)
612+
l.lock(context.Background())
613+
delete(l.loading, slot)
614+
l.broadcast()
615+
return cleanupAndReturn(nil, fmt.Errorf("error waiting for runner to be ready: %w", err))
548616
}
549617

618+
// Re-acquire lock and register the runner.
619+
l.lock(context.Background())
620+
delete(l.loading, slot)
621+
550622
// Perform registration and return the runner.
551623
l.runners[makeRunnerKey(backendName, modelID, draftModelID, mode)] = runnerInfo{slot, modelRef}
552-
l.slots[slot] = runner
624+
l.slots[slot] = newRunner
553625
l.references[slot] = 1
554-
return runner, nil
626+
l.broadcast()
627+
return cleanupAndReturn(newRunner, nil)
555628
}
556629

557630
// Wait for something to change. Note that we always re-lock with
@@ -562,7 +635,7 @@ func (l *loader) load(ctx context.Context, backendName, modelID, modelRef string
562635
select {
563636
case <-ctx.Done():
564637
l.lock(context.Background())
565-
return nil, context.Canceled
638+
return cleanupAndReturn(nil, context.Canceled)
566639
case <-poll:
567640
l.lock(context.Background())
568641
}

pkg/inference/scheduling/scheduler.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ func (s *Scheduler) getLoaderStatus(ctx context.Context) []BackendStatus {
180180
}
181181
defer s.loader.unlock()
182182

183-
result := make([]BackendStatus, 0, len(s.loader.runners))
183+
result := make([]BackendStatus, 0, len(s.loader.runners)+len(s.loader.loading))
184184

185185
for key, runnerInfo := range s.loader.runners {
186186
if s.loader.slots[runnerInfo.slot] != nil {
@@ -205,6 +205,16 @@ func (s *Scheduler) getLoaderStatus(ctx context.Context) []BackendStatus {
205205
}
206206
}
207207

208+
// Include models that are currently being loaded.
209+
for _, info := range s.loader.loading {
210+
result = append(result, BackendStatus{
211+
BackendName: info.backendName,
212+
ModelName: info.modelRef,
213+
Mode: info.mode.String(),
214+
Loading: true,
215+
})
216+
}
217+
208218
return result
209219
}
210220

0 commit comments

Comments
 (0)