Skip to content

Commit 26a0a73

Browse files
authored
Merge pull request #95 from docker/config-list
Allow configuration through argument list (in addition to string)
2 parents 24299c1 + cce6a71 commit 26a0a73

4 files changed

Lines changed: 19 additions & 14 deletions

File tree

pkg/inference/backend.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ func (m BackendMode) String() string {
3030
}
3131

3232
type BackendConfiguration struct {
33-
ContextSize int64 `json:"context_size,omitempty"`
34-
RawFlags []string `json:"flags,omitempty"`
33+
ContextSize int64 `json:"context-size,omitempty"`
34+
RuntimeFlags []string `json:"runtime-flags,omitempty"`
3535
}
3636

3737
// Backend is the interface implemented by inference engine backends. Backend

pkg/inference/backends/llamacpp/llamacpp.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ func (l *llamaCpp) Run(ctx context.Context, socket, model string, mode inference
144144
if config.ContextSize >= 0 {
145145
args = append(args, "--ctx-size", strconv.Itoa(int(config.ContextSize)))
146146
}
147-
args = append(args, config.RawFlags...)
147+
args = append(args, config.RuntimeFlags...)
148148
}
149149

150150
l.log.Infof("llamaCppArgs: %v", args)

pkg/inference/scheduling/api.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,8 @@ type UnloadResponse struct {
8686

8787
// ConfigureRequest specifies per-model runtime configuration options.
8888
type ConfigureRequest struct {
89-
Model string `json:"model"`
90-
ContextSize int64 `json:"context-size,omitempty"`
91-
RawRuntimeFlags string `json:"raw-runtime-flags,omitempty"`
89+
Model string `json:"model"`
90+
ContextSize int64 `json:"context-size,omitempty"`
91+
RuntimeFlags []string `json:"runtime-flags,omitempty"`
92+
RawRuntimeFlags string `json:"raw-runtime-flags,omitempty"`
9293
}

pkg/inference/scheduling/scheduler.go

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -388,23 +388,27 @@ func (s *Scheduler) Configure(w http.ResponseWriter, r *http.Request) {
388388
}
389389

390390
configureRequest := ConfigureRequest{
391-
Model: "",
392-
ContextSize: -1,
393-
RawRuntimeFlags: "",
391+
ContextSize: -1,
394392
}
395393
if err := json.Unmarshal(body, &configureRequest); err != nil {
396394
http.Error(w, "invalid request", http.StatusBadRequest)
397395
return
398396
}
399-
rawFlags, err := shellwords.Parse(configureRequest.RawRuntimeFlags)
400-
if err != nil {
401-
http.Error(w, "invalid request", http.StatusBadRequest)
402-
return
397+
var runtimeFlags []string
398+
if len(configureRequest.RuntimeFlags) > 0 {
399+
runtimeFlags = configureRequest.RuntimeFlags
400+
} else {
401+
rawFlags, err := shellwords.Parse(configureRequest.RawRuntimeFlags)
402+
if err != nil {
403+
http.Error(w, "invalid request", http.StatusBadRequest)
404+
return
405+
}
406+
runtimeFlags = rawFlags
403407
}
404408

405409
var runnerConfig inference.BackendConfiguration
406410
runnerConfig.ContextSize = configureRequest.ContextSize
407-
runnerConfig.RawFlags = rawFlags
411+
runnerConfig.RuntimeFlags = runtimeFlags
408412

409413
if err := s.loader.setRunnerConfig(r.Context(), backend.Name(), configureRequest.Model, inference.BackendModeCompletion, runnerConfig); err != nil {
410414
s.log.Warnf("Failed to configure %s runner for %s: %s", backend.Name(), configureRequest.Model, err)

0 commit comments

Comments
 (0)