fix(cli): support docker model logs from WSL2 with Docker Desktop

doringeman · doringeman · commit 56d46326a2fc · 2026-03-17T18:30:57.000+02:00
Signed-off-by: Dorin Geman &lt;dorin.geman@docker.com&gt;
diff --git a/cmd/cli/commands/logs.go b/cmd/cli/commands/logs.go
@@ -7,10 +7,12 @@ import (
 	"fmt"
 	"io"
 	"os"
+	"os/exec"
 	"os/signal"
 	"path/filepath"
 	"regexp"
 	"runtime"
+	"strings"
 	"time"
 
 	"github.com/docker/model-runner/cmd/cli/commands/completion"
@@ -70,9 +72,22 @@ func newLogsCmd() *cobra.Command {
 			case "darwin":
 				serviceLogPath = filepath.Join(homeDir, "Library/Containers/com.docker.docker/Data/log/host/inference.log")
 				runtimeLogPath = filepath.Join(homeDir, "Library/Containers/com.docker.docker/Data/log/host/inference-llama.cpp-server.log")
-			case "windows":
-				serviceLogPath = filepath.Join(homeDir, "AppData/Local/Docker/log/host/inference.log")
-				runtimeLogPath = filepath.Join(homeDir, "AppData/Local/Docker/log/host/inference-llama.cpp-server.log")
+			case "windows", "linux":
+				baseDir := homeDir
+				if runtime.GOOS == "linux" {
+					if !isWSL() {
+						return fmt.Errorf("log viewing on native Linux is only supported in standalone mode")
+					}
+					// When running inside WSL2 with Docker Desktop, the log files
+					// are on the Windows host filesystem mounted under /mnt/.
+					winHomeDir, wslErr := windowsHomeDirFromWSL(cmd.Context())
+					if wslErr != nil {
+						return fmt.Errorf("unable to determine Windows home directory from WSL2: %w", wslErr)
+					}
+					baseDir = winHomeDir
+				}
+				serviceLogPath = filepath.Join(baseDir, "AppData/Local/Docker/log/host/inference.log")
+				runtimeLogPath = filepath.Join(baseDir, "AppData/Local/Docker/log/host/inference-llama.cpp-server.log")
 			default:
 				return fmt.Errorf("unsupported OS: %s", runtime.GOOS)
 			}
@@ -98,9 +113,13 @@ func newLogsCmd() *cobra.Command {
 
 			g, ctx := errgroup.WithContext(ctx)
 
+			// Poll mode is needed when tailing files over a mounted filesystem
+			// (Windows or WSL2 accessing the Windows host via /mnt/).
+			pollMode := runtime.GOOS == "windows" || (runtime.GOOS == "linux" && isWSL())
+
 			g.Go(func() error {
 				t, err := tail.TailFile(
-					serviceLogPath, tail.Config{Location: &tail.SeekInfo{Offset: 0, Whence: io.SeekEnd}, Follow: true, ReOpen: true},
+					serviceLogPath, tail.Config{Location: &tail.SeekInfo{Offset: 0, Whence: io.SeekEnd}, Follow: true, ReOpen: true, Poll: pollMode},
 				)
 				if err != nil {
 					return err
@@ -121,7 +140,7 @@ func newLogsCmd() *cobra.Command {
 			if !noEngines {
 				g.Go(func() error {
 					t, err := tail.TailFile(
-						runtimeLogPath, tail.Config{Location: &tail.SeekInfo{Offset: 0, Whence: io.SeekEnd}, Follow: true, ReOpen: true},
+						runtimeLogPath, tail.Config{Location: &tail.SeekInfo{Offset: 0, Whence: io.SeekEnd}, Follow: true, ReOpen: true, Poll: pollMode},
 					)
 					if err != nil {
 						return err
@@ -150,6 +169,35 @@ func newLogsCmd() *cobra.Command {
 	return c
 }
 
+// isWSL reports whether the current process is running inside a WSL2 environment.
+func isWSL() bool {
+	_, ok := os.LookupEnv("WSL_DISTRO_NAME")
+	return ok
+}
+
+// windowsHomeDirFromWSL resolves the Windows user's home directory from
+// within a WSL2 environment by running "wslpath" on the USERPROFILE path
+// obtained via "wslvar". This returns a Linux path like /mnt/c/Users/Name.
+func windowsHomeDirFromWSL(ctx context.Context) (string, error) {
+	out, err := exec.CommandContext(ctx, "wslvar", "USERPROFILE").Output()
+	if err != nil {
+		return "", fmt.Errorf("wslvar USERPROFILE: %w", err)
+	}
+	winPath := strings.TrimSpace(string(out))
+	if winPath == "" {
+		return "", fmt.Errorf("USERPROFILE is empty")
+	}
+	out, err = exec.CommandContext(ctx, "wslpath", "-u", winPath).Output()
+	if err != nil {
+		return "", fmt.Errorf("wslpath -u %q: %w", winPath, err)
+	}
+	linuxPath := strings.TrimSpace(string(out))
+	if linuxPath == "" {
+		return "", fmt.Errorf("wslpath returned empty path")
+	}
+	return linuxPath, nil
+}
+
 var timestampRe = regexp.MustCompile(`\[(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z)\].*`)
 
 const timeFmt = "2006-01-02T15:04:05.000000000Z"
diff --git a/cmd/cli/commands/ps.go b/cmd/cli/commands/ps.go
@@ -55,6 +55,10 @@ func psTable(ps []desktop.BackendStatus) string {
 }
 
 func formatUntil(status desktop.BackendStatus) string {
+	if status.Loading {
+		return "Loading..."
+	}
+
 	keepAlive := inference.KeepAliveDefault
 	if status.KeepAlive != nil {
 		keepAlive = *status.KeepAlive
diff --git a/cmd/cli/desktop/desktop.go b/cmd/cli/desktop/desktop.go
@@ -747,6 +747,7 @@ type BackendStatus struct {
 	Mode        string               `json:"mode"`
 	LastUsed    time.Time            `json:"last_used,omitempty"`
 	InUse       bool                 `json:"in_use,omitempty"`
+	Loading     bool                 `json:"loading,omitempty"`
 	KeepAlive   *inference.KeepAlive `json:"keep_alive,omitempty"`
 }
 
diff --git a/pkg/inference/scheduling/api.go b/pkg/inference/scheduling/api.go
@@ -79,7 +79,9 @@ type BackendStatus struct {
 	// LastUsed represents when this (backend, model, mode) tuple was last used
 	LastUsed time.Time `json:"last_used,omitempty"`
 	// InUse indicates whether this backend is currently handling a request
-	InUse     bool                 `json:"in_use,omitempty"`
+	InUse bool `json:"in_use,omitempty"`
+	// Loading indicates whether this backend is currently being initialized
+	Loading   bool                 `json:"loading,omitempty"`
 	KeepAlive *inference.KeepAlive `json:"keep_alive,omitempty"`
 }
 
diff --git a/pkg/inference/scheduling/loader.go b/pkg/inference/scheduling/loader.go
@@ -80,6 +80,15 @@ type runnerInfo struct {
 	modelRef string
 }
 
+// loadingInfo holds metadata about a runner that is being initialized.
+type loadingInfo struct {
+	backendName  string
+	modelID      string
+	draftModelID string
+	modelRef     string
+	mode         inference.BackendMode
+}
+
 // loader manages the loading and unloading of backend runners. It regulates
 // active backends in a manner that avoids exhausting system resources. Loaders
 // assume that all of their backends have been installed, so no load requests
@@ -109,6 +118,12 @@ type loader struct {
 	waiters map[chan<- struct{}]bool
 	// runners maps runner keys to their slot index.
 	runners map[runnerKey]runnerInfo
+	// loading tracks slots that have a runner being initialized. This
+	// allows the lock to be released during long-running operations
+	// (run + wait) while still preventing other goroutines from using
+	// or evicting those slots. The value contains metadata needed to
+	// report loading status.
+	loading map[int]loadingInfo
 	// slots maps slot indices to associated runners. A slot is considered free
 	// if the runner value in it is nil.
 	slots []*runner
@@ -157,6 +172,7 @@ func newLoader(
 		guard:             make(chan struct{}, 1),
 		waiters:           make(map[chan<- struct{}]bool),
 		runners:           make(map[runnerKey]runnerInfo, nSlots),
+		loading:           make(map[int]loadingInfo),
 		slots:             make([]*runner, nSlots),
 		references:        make([]uint, nSlots),
 		timestamps:        make([]time.Time, nSlots),
@@ -411,6 +427,30 @@ func (l *loader) run(ctx context.Context) {
 	}
 }
 
+// usedSlots returns the number of slots that are either occupied by a
+// registered runner or reserved for a runner being loaded.
+func (l *loader) usedSlots() int {
+	return len(l.runners) + len(l.loading)
+}
+
+// isSlotLoading reports whether the given slot is reserved for a runner
+// that is currently being initialized.
+func (l *loader) isSlotLoading(slot int) bool {
+	_, ok := l.loading[slot]
+	return ok
+}
+
+// isModelLoading reports whether a runner for the given model is currently
+// being initialized by another goroutine.
+func (l *loader) isModelLoading(backendName, modelID, draftModelID string, mode inference.BackendMode) bool {
+	for _, info := range l.loading {
+		if info.backendName == backendName && info.modelID == modelID && info.draftModelID == draftModelID && info.mode == mode {
+			return true
+		}
+	}
+	return false
+}
+
 // load allocates a runner using the specified backend and modelID. If allocated,
 // it should be released by the caller using the release mechanism (once the
 // runner is no longer needed).
@@ -427,7 +467,9 @@ func (l *loader) load(ctx context.Context, backendName, modelID, modelRef string
 	if !l.lock(ctx) {
 		return nil, context.Canceled
 	}
-	defer l.unlock()
+	// Note: the lock is managed explicitly throughout this function rather
+	// than via defer, because it is released during long-running operations
+	// (run + wait) and re-acquired afterwards.
 
 	// Get runner configuration if available (must be done under lock since
 	// runnerConfigs can be modified concurrently by setRunnerConfig).
@@ -468,22 +510,37 @@ func (l *loader) load(ctx context.Context, backendName, modelID, modelRef string
 	// ensure that it's deregistered by the time we return.
 	poll := make(chan struct{}, 1)
 	l.waiters[poll] = true
-	defer func() {
+
+	// cleanupAndReturn is a helper that cleans up the waiter registration,
+	// releases the lock, and returns. All exit paths must go through this
+	// to avoid leaking the poll channel or double-unlocking.
+	cleanupAndReturn := func(r *runner, err error) (*runner, error) {
 		delete(l.waiters, poll)
-	}()
+		l.unlock()
+		return r, err
+	}
 
 	// Loop until we can satisfy the request or an error occurs.
+	// These are declared outside the loop to avoid goto-over-declaration errors.
+	var existing runnerInfo
+	var existingOK bool
 	for {
 		slot := -1
 
 		// If loads are disabled, then there's nothing we can do.
 		if !l.loadsEnabled {
-			return nil, errLoadsDisabled
+			return cleanupAndReturn(nil, errLoadsDisabled)
+		}
+
+		// See if another goroutine is already loading this runner.
+		// If so, wait for it to finish rather than starting a duplicate load.
+		if l.isModelLoading(backendName, modelID, draftModelID, mode) {
+			goto WaitForChange
 		}
 
 		// See if we can satisfy the request with an existing runner.
-		existing, ok := l.runners[makeRunnerKey(backendName, modelID, draftModelID, mode)]
-		if ok {
+		existing, existingOK = l.runners[makeRunnerKey(backendName, modelID, draftModelID, mode)]
+		if existingOK {
 			select {
 			case <-l.slots[existing.slot].done:
 				l.log.Warn("Runner is defunct, waiting for eviction", "backend", backendName, "model", existing.modelRef)
@@ -497,13 +554,13 @@ func (l *loader) load(ctx context.Context, backendName, modelID, modelRef string
 			default:
 				l.references[existing.slot]++
 				l.timestamps[existing.slot] = time.Time{}
-				return l.slots[existing.slot], nil
+				return cleanupAndReturn(l.slots[existing.slot], nil)
 			}
 		}
 
-		// If all slots are full, try evicting unused runners.
-		if len(l.runners) == len(l.slots) {
-			l.log.Info("Evicting to make room", "runners", len(l.runners), "slots", len(l.slots))
+		// If all slots are full (including loading reservations), try evicting unused runners.
+		if l.usedSlots() >= len(l.slots) {
+			l.log.Info("Evicting to make room", "runners", len(l.runners), "loading", len(l.loading), "slots", len(l.slots))
 			runnerCountAtLoopStart := len(l.runners)
 			remainingRunners := l.evict(false)
 			// Restart the loop if eviction happened
@@ -512,46 +569,62 @@ func (l *loader) load(ctx context.Context, backendName, modelID, modelRef string
 			}
 		}
 
-		// If there's a free slot, then find the slot.
-		if len(l.runners) < len(l.slots) {
+		// If there's a free slot, then find one that is not reserved for loading.
+		if l.usedSlots() < len(l.slots) {
 			for s, runner := range l.slots {
-				if runner == nil {
+				if runner == nil && !l.isSlotLoading(s) {
 					slot = s
 					break
 				}
 			}
 		}
 
 		if slot < 0 {
-			l.log.Debug("Cannot load model yet", "runners", len(l.runners), "slots", len(l.slots))
+			l.log.Debug("Cannot load model yet", "runners", len(l.runners), "loading", len(l.loading), "slots", len(l.slots))
 		}
 
 		// If we've identified a slot, then we're ready to start a runner.
 		if slot >= 0 {
-			// Create the runner.
-			runner, err := run(l.log, backend, modelID, modelRef, mode, slot, runnerConfig, l.openAIRecorder)
+			// Reserve the slot and release the lock for the long-running
+			// operations (run + wait). This allows other goroutines to
+			// proceed with loading different models, releasing runners, etc.
+			l.loading[slot] = loadingInfo{
+				backendName:  backendName,
+				modelID:      modelID,
+				draftModelID: draftModelID,
+				modelRef:     modelRef,
+				mode:         mode,
+			}
+			l.unlock()
+
+			newRunner, err := run(l.log, backend, modelID, modelRef, mode, slot, runnerConfig, l.openAIRecorder)
 			if err != nil {
 				l.log.Warn("Unable to start backend runner", "backend", backendName, "model", modelID, "mode", mode, "error", err)
-				return nil, fmt.Errorf("unable to start runner: %w", err)
+				l.lock(context.Background())
+				delete(l.loading, slot)
+				l.broadcast()
+				return cleanupAndReturn(nil, fmt.Errorf("unable to start runner: %w", err))
 			}
 
-			// Wait for the runner to be ready. In theory it's a little
-			// inefficient to block all other loaders (including those that
-			// might not want this runner), but in reality they would probably
-			// be blocked by the underlying loading anyway (in terms of disk and
-			// GPU performance). We have to retain a lock here though to enforce
-			// deduplication of runners and keep slot / memory reservations.
-			if err := runner.wait(ctx); err != nil {
-				runner.terminate()
+			if err := newRunner.wait(ctx); err != nil {
+				newRunner.terminate()
 				l.log.Warn("Backend runner initialization failed", "backend", backendName, "model", modelID, "mode", mode, "error", err)
-				return nil, fmt.Errorf("error waiting for runner to be ready: %w", err)
+				l.lock(context.Background())
+				delete(l.loading, slot)
+				l.broadcast()
+				return cleanupAndReturn(nil, fmt.Errorf("error waiting for runner to be ready: %w", err))
 			}
 
+			// Re-acquire lock and register the runner.
+			l.lock(context.Background())
+			delete(l.loading, slot)
+
 			// Perform registration and return the runner.
 			l.runners[makeRunnerKey(backendName, modelID, draftModelID, mode)] = runnerInfo{slot, modelRef}
-			l.slots[slot] = runner
+			l.slots[slot] = newRunner
 			l.references[slot] = 1
-			return runner, nil
+			l.broadcast()
+			return cleanupAndReturn(newRunner, nil)
 		}
 
 		// Wait for something to change. Note that we always re-lock with
@@ -562,7 +635,7 @@ func (l *loader) load(ctx context.Context, backendName, modelID, modelRef string
 		select {
 		case <-ctx.Done():
 			l.lock(context.Background())
-			return nil, context.Canceled
+			return cleanupAndReturn(nil, context.Canceled)
 		case <-poll:
 			l.lock(context.Background())
 		}
diff --git a/pkg/inference/scheduling/scheduler.go b/pkg/inference/scheduling/scheduler.go
@@ -180,7 +180,7 @@ func (s *Scheduler) getLoaderStatus(ctx context.Context) []BackendStatus {
 	}
 	defer s.loader.unlock()
 
-	result := make([]BackendStatus, 0, len(s.loader.runners))
+	result := make([]BackendStatus, 0, len(s.loader.runners)+len(s.loader.loading))
 
 	for key, runnerInfo := range s.loader.runners {
 		if s.loader.slots[runnerInfo.slot] != nil {
@@ -205,6 +205,16 @@ func (s *Scheduler) getLoaderStatus(ctx context.Context) []BackendStatus {
 		}
 	}
 
+	// Include models that are currently being loaded.
+	for _, info := range s.loader.loading {
+		result = append(result, BackendStatus{
+			BackendName: info.backendName,
+			ModelName:   info.modelRef,
+			Mode:        info.mode.String(),
+			Loading:     true,
+		})
+	}
+
 	return result
 }
 

Original file line number	Diff line number	Diff line change
`@@ -747,6 +747,7 @@ type BackendStatus struct {`
`747`	`747`	Mode string `json:"mode"`
`748`	`748`	LastUsed time.Time `json:"last_used,omitempty"`
`749`	`749`	InUse bool `json:"in_use,omitempty"`
	`750`	+ Loading bool `json:"loading,omitempty"`
`750`	`751`	KeepAlive *inference.KeepAlive `json:"keep_alive,omitempty"`
`751`	`752`	`}`
`752`	`753`
Original file line number	Diff line number	Diff line change
`@@ -180,7 +180,7 @@ func (s *Scheduler) getLoaderStatus(ctx context.Context) []BackendStatus {`
`180`	`180`	`}`
`181`	`181`	`defer s.loader.unlock()`
`182`	`182`
`183`		`- result := make([]BackendStatus, 0, len(s.loader.runners))`
	`183`	`+ result := make([]BackendStatus, 0, len(s.loader.runners)+len(s.loader.loading))`
`184`	`184`
`185`	`185`	`for key, runnerInfo := range s.loader.runners {`
`186`	`186`	`if s.loader.slots[runnerInfo.slot] != nil {`
`@@ -205,6 +205,16 @@ func (s *Scheduler) getLoaderStatus(ctx context.Context) []BackendStatus {`
`205`	`205`	`}`
`206`	`206`	`}`
`207`	`207`
	`208`	`+ // Include models that are currently being loaded.`
	`209`	`+ for _, info := range s.loader.loading {`
	`210`	`+ result = append(result, BackendStatus{`
	`211`	`+ BackendName: info.backendName,`
	`212`	`+ ModelName: info.modelRef,`
	`213`	`+ Mode: info.mode.String(),`
	`214`	`+ Loading: true,`
	`215`	`+ })`
	`216`	`+ }`
	`217`	`+`
`208`	`218`	`return result`
`209`	`219`	`}`
`210`	`220`