Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions catalog/engines/z-image-diffusers-rocm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ startup:
HSA_OVERRIDE_GFX_VERSION: "11.0.0"
default_args:
port: 8188
accepted_config_keys:
- port
health_check:
path: /health
timeout_s: 600
Expand Down
2 changes: 2 additions & 0 deletions catalog/engines/z-image-diffusers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ startup:
MODEL_PATH: "{{.ModelPath}}"
default_args:
port: 8188
accepted_config_keys:
- port
health_check:
path: /health
timeout_s: 600
Expand Down
31 changes: 16 additions & 15 deletions cmd/aima/tooldeps_deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,21 +84,22 @@ func buildDeployDeps(ac *appContext, deps *mcp.ToolDeps,
}

req := &runtime.DeployRequest{
Name: modelName,
Engine: resolved.Engine,
Image: resolved.EngineImage,
Command: resolved.Command,
PortSpecs: append([]knowledge.StartupPort(nil), resolved.PortSpecs...),
InitCommands: resolved.InitCommands,
ModelPath: modelPath,
Config: resolved.Config,
RuntimeClassName: resolved.RuntimeClassName,
CPUArch: resolved.CPUArch,
Env: resolved.Env,
WorkDir: resolved.WorkDir,
Container: resolved.Container,
GPUResourceName: resolved.GPUResourceName,
ExtraVolumes: resolved.ExtraVolumes,
Name: modelName,
Engine: resolved.Engine,
Image: resolved.EngineImage,
Command: resolved.Command,
PortSpecs: append([]knowledge.StartupPort(nil), resolved.PortSpecs...),
InitCommands: resolved.InitCommands,
ModelPath: modelPath,
Config: resolved.Config,
AcceptedConfigKeys: append([]string(nil), resolved.AcceptedConfigKeys...),
RuntimeClassName: resolved.RuntimeClassName,
CPUArch: resolved.CPUArch,
Env: resolved.Env,
WorkDir: resolved.WorkDir,
Container: resolved.Container,
GPUResourceName: resolved.GPUResourceName,
ExtraVolumes: resolved.ExtraVolumes,
Labels: map[string]string{
// Label carries the resolved asset metadata.name so the
// runtime's findEngineAsset lookup (keyed on metadata.name)
Expand Down
5 changes: 5 additions & 0 deletions docs/engine.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,11 @@ api:
base_path: /v1
```

`startup.accepted_config_keys` is optional. Leave it unset for broad CLI engines
such as vLLM that accept many runtime flags; set it for narrow wrapper servers
that reject generic LLM flags, for example image or audio FastAPI wrappers that
only accept `port`.

---

## 核心功能
Expand Down
35 changes: 35 additions & 0 deletions internal/knowledge/configflags.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,41 @@ import (
"strings"
)

func canonicalConfigKey(key string) string {
return strings.ReplaceAll(strings.ToLower(strings.TrimSpace(key)), "-", "_")
}

// AcceptedConfigKeySet normalizes an optional engine-declared config allowlist.
// Empty means "legacy permissive mode" so older engine YAML stays compatible.
func AcceptedConfigKeySet(keys []string) map[string]struct{} {
if len(keys) == 0 {
return nil
}
set := make(map[string]struct{}, len(keys))
for _, key := range keys {
key = canonicalConfigKey(key)
if key == "" {
continue
}
set[key] = struct{}{}
}
if len(set) == 0 {
return nil
}
return set
}

// ConfigKeyAccepted reports whether an engine should receive the config key as
// a CLI flag. The allowlist is intentionally opt-in to avoid breaking older
// catalog entries that have not declared accepted_config_keys yet.
func ConfigKeyAccepted(key string, accepted map[string]struct{}) bool {
if len(accepted) == 0 {
return true
}
_, ok := accepted[canonicalConfigKey(key)]
return ok
}

// FormatConfigFlag emits CLI tokens for a single config key/value pair.
// Returns tokens to append to args, e.g. ["--flag", "value"], ["--flag"], or ["--no-flag"].
//
Expand Down
5 changes: 5 additions & 0 deletions internal/knowledge/loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ type EngineStartup struct {
WorkDir string `yaml:"work_dir,omitempty" json:"work_dir,omitempty"`
Ports []StartupPort `yaml:"ports,omitempty" json:"ports,omitempty"`
DefaultArgs map[string]any `yaml:"default_args" json:"default_args"`
AcceptedConfigKeys []string `yaml:"accepted_config_keys,omitempty" json:"accepted_config_keys,omitempty"`
InternalArgs []string `yaml:"internal_args,omitempty" json:"internal_args,omitempty"`
HealthCheck HealthCheck `yaml:"health_check" json:"health_check"`
Warmup WarmupConfig `yaml:"warmup" json:"warmup"`
Expand Down Expand Up @@ -800,6 +801,9 @@ func mergeStartup(dst, src *EngineStartup) {
if len(dst.Ports) == 0 {
dst.Ports = src.Ports
}
if len(dst.AcceptedConfigKeys) == 0 {
dst.AcceptedConfigKeys = src.AcceptedConfigKeys
}
if dst.DefaultArgs == nil {
dst.DefaultArgs = src.DefaultArgs
} else if src.DefaultArgs != nil {
Expand Down Expand Up @@ -958,6 +962,7 @@ func cloneEngineAsset(src EngineAsset) EngineAsset {
dst.Startup.InitCommands = append([]string(nil), src.Startup.InitCommands...)
dst.Startup.Env = cloneStringMap(src.Startup.Env)
dst.Startup.DefaultArgs = cloneAnyMap(src.Startup.DefaultArgs)
dst.Startup.AcceptedConfigKeys = append([]string(nil), src.Startup.AcceptedConfigKeys...)
dst.Startup.InternalArgs = append([]string(nil), src.Startup.InternalArgs...)
dst.Startup.ExtraVolumes = append([]ContainerVolume(nil), src.Startup.ExtraVolumes...)
if src.Startup.LogPatterns != nil {
Expand Down
4 changes: 4 additions & 0 deletions internal/knowledge/podgen.go
Original file line number Diff line number Diff line change
Expand Up @@ -276,13 +276,17 @@ func GeneratePod(resolved *ResolvedConfig) ([]byte, error) {
}
keys := make([]string, 0, len(resolved.Config))
portKeys := PortConfigKeys(resolved.PortSpecs)
acceptedKeys := AcceptedConfigKeySet(resolved.AcceptedConfigKeys)
for k := range resolved.Config {
if k == "model_path" {
continue
}
if _, reserved := portKeys[k]; reserved {
continue
}
if !ConfigKeyAccepted(k, acceptedKeys) {
continue
}
if !ShouldIncludeConfigFlag(resolved.Command, resolved.ModelPath, k, resolved.Config[k]) {
continue
}
Expand Down
31 changes: 31 additions & 0 deletions internal/knowledge/podgen_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,37 @@ func TestGeneratePodWithCustomStartupPorts(t *testing.T) {
}
}

func TestGeneratePodFiltersUnsupportedConfigFlags(t *testing.T) {
resolved := &ResolvedConfig{
Engine: "z-image-diffusers",
EngineImage: "qujing-z-image:latest",
ModelPath: "/data/models/z-image",
ModelName: "z-image",
Slot: "default",
Command: []string{"python3", "server.py"},
PortSpecs: []StartupPort{
{Name: "http", Flag: "--port", ConfigKey: "port", Primary: true},
},
Config: map[string]any{
"port": 8188,
"max_model_len": 8192,
},
AcceptedConfigKeys: []string{"port"},
}

podYAML, err := GeneratePod(resolved)
if err != nil {
t.Fatalf("GeneratePod: %v", err)
}
s := string(podYAML)
if !strings.Contains(s, "--port") || !strings.Contains(s, "8188") {
t.Fatalf("expected accepted port flag in YAML:\n%s", s)
}
if strings.Contains(s, "--max-model-len") {
t.Fatalf("LLM-only context flag should not be emitted for image engine:\n%s", s)
}
}

func TestGeneratePodEnvMerge(t *testing.T) {
resolved := &ResolvedConfig{
Engine: "vllm",
Expand Down
9 changes: 6 additions & 3 deletions internal/knowledge/resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ type ResolvedConfig struct {
InitCommands []string // pre-commands to run before main server (from engine YAML)
CompatibilityProbe string // container compatibility probe declared by engine YAML
RepairInitCommands []string // model-variant repair commands to prepend when compatibility probe needs self-heal
AcceptedConfigKeys []string // optional engine allowlist for config keys emitted as CLI flags
ExtraVolumes []ContainerVolume // additional host volumes to mount (from engine YAML)
HealthCheck *HealthCheck
Warmup *WarmupConfig // post-healthcheck warmup config (nil = no warmup)
Expand Down Expand Up @@ -230,6 +231,7 @@ func (c *Catalog) Resolve(hw HardwareInfo, modelName, engineType string, userOve
PortSpecs: engine.Startup.Ports,
InitCommands: engine.Startup.InitCommands,
CompatibilityProbe: engine.Startup.CompatibilityProbe,
AcceptedConfigKeys: append([]string(nil), engine.Startup.AcceptedConfigKeys...),
ExtraVolumes: engine.Startup.ExtraVolumes,
Env: engine.Startup.Env,
WorkDir: engine.Startup.WorkDir,
Expand Down Expand Up @@ -1408,10 +1410,11 @@ func (c *Catalog) buildSyntheticConfig(engineType string, hw HardwareInfo, gmu f
if maxLen > 0 {
if key := pickDeclared([]string{"context_length", "max_model_len", "ctx_size", "max_context_tokens"}); key != "" {
cfg[key] = maxLen
} else if _, hasMFS := engineArgs["mem_fraction_static"]; !hasMFS {
} else if len(engineArgs) == 0 {
// Engines that declare mem_fraction_static (SGLang family) have
// no explicit context-length knob. Fall back to max_model_len
// only for the vLLM-shaped path.
// no explicit context-length knob, and non-LLM engines commonly
// declare only port-like args. Use the vLLM-shaped fallback only
// for older/unknown engine metadata with no declared args at all.
cfg["max_model_len"] = maxLen
}
}
Expand Down
61 changes: 58 additions & 3 deletions internal/knowledge/resolver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ func TestResolveUsesVariantLocalPath(t *testing.T) {
EngineAssets: []EngineAsset{{
Metadata: EngineMetadata{Name: "testengine", Type: "testengine"},
Hardware: EngineHardware{GPUArch: "*"},
Startup: EngineStartup{Command: []string{"serve"}, DefaultArgs: map[string]any{}},
Startup: EngineStartup{Command: []string{"serve"}, DefaultArgs: map[string]any{}},
}},
ModelAssets: []ModelAsset{{
Metadata: ModelMetadata{Name: "local-variant-model"},
Expand Down Expand Up @@ -113,7 +113,7 @@ func TestResolveUsesStorageLocalPath(t *testing.T) {
EngineAssets: []EngineAsset{{
Metadata: EngineMetadata{Name: "testengine", Type: "testengine"},
Hardware: EngineHardware{GPUArch: "*"},
Startup: EngineStartup{Command: []string{"serve"}, DefaultArgs: map[string]any{}},
Startup: EngineStartup{Command: []string{"serve"}, DefaultArgs: map[string]any{}},
}},
ModelAssets: []ModelAsset{{
Metadata: ModelMetadata{Name: "local-storage-model"},
Expand Down Expand Up @@ -143,7 +143,7 @@ func TestResolveModelPathOverrideStillWins(t *testing.T) {
EngineAssets: []EngineAsset{{
Metadata: EngineMetadata{Name: "testengine", Type: "testengine"},
Hardware: EngineHardware{GPUArch: "*"},
Startup: EngineStartup{Command: []string{"serve"}, DefaultArgs: map[string]any{}},
Startup: EngineStartup{Command: []string{"serve"}, DefaultArgs: map[string]any{}},
}},
ModelAssets: []ModelAsset{{
Metadata: ModelMetadata{Name: "override-model"},
Expand Down Expand Up @@ -806,6 +806,61 @@ func TestBuildSyntheticWithHardware(t *testing.T) {
}
}

func TestBuildSyntheticImageModelDoesNotInjectLLMContext(t *testing.T) {
cat := &Catalog{
EngineAssets: []EngineAsset{
{
Metadata: EngineMetadata{
Name: "z-image-diffusers",
Type: "z-image-diffusers",
Version: "1.0",
SupportedFormats: []string{"safetensors"},
SupportedModelTypes: []string{"image_gen"},
},
Hardware: EngineHardware{GPUArch: "*"},
Startup: EngineStartup{
DefaultArgs: map[string]any{"port": 8188},
AcceptedConfigKeys: []string{"port"},
},
},
{
Metadata: EngineMetadata{
Name: "vllm-test", Type: "vllm", Version: "1.0",
Default: true, SupportedFormats: []string{"safetensors"}, SupportedModelTypes: []string{"llm", "vlm", "embedding"},
},
Hardware: EngineHardware{GPUArch: "*"},
Startup: EngineStartup{DefaultArgs: map[string]any{
"gpu_memory_utilization": 0.90,
"max_model_len": 8192,
}},
},
},
}

ma := cat.BuildSyntheticModelAsset(ScanMetadata{
Name: "stable-diffusion-v1-5",
Type: "image_gen",
Format: "safetensors",
SizeBytes: 16 * 1024 * 1024 * 1024,
}, HardwareInfo{GPUArch: "Blackwell", GPUVRAMMiB: 131072, GPUCount: 1, UnifiedMemory: true, RAMTotalMiB: 131072})

if len(ma.Variants) == 0 {
t.Fatal("expected synthetic image variant")
}
v := ma.Variants[0]
if v.Engine != "z-image-diffusers" {
t.Fatalf("engine = %q, want z-image-diffusers", v.Engine)
}
for _, key := range []string{"max_model_len", "gpu_memory_utilization", "mem_fraction_static", "tensor_parallel_size"} {
if _, ok := v.DefaultConfig[key]; ok {
t.Fatalf("image synthetic config leaked LLM-only key %q: %#v", key, v.DefaultConfig)
}
}
if _, ok := v.DefaultConfig["port"]; ok {
t.Fatalf("port should come from engine default_args at resolve time, not synthetic variant config: %#v", v.DefaultConfig)
}
}

func TestResolveSyntheticWithAutoTP(t *testing.T) {
cat := &Catalog{
EngineAssets: []EngineAsset{
Expand Down
2 changes: 1 addition & 1 deletion internal/runtime/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ func (r *DockerRuntime) buildRunArgs(name string, req *DeployRequest) []string {
command = knowledge.AppendPortBindings(command, portBindings)

// Append config values as CLI flags, with template substitution
for _, f := range configToFlags(req.Config, req.Command, req.ModelPath, knowledge.PortConfigKeys(req.PortSpecs)) {
for _, f := range configToFlags(req.Config, req.Command, req.ModelPath, knowledge.PortConfigKeys(req.PortSpecs), req.AcceptedConfigKeys) {
f = strings.ReplaceAll(f, "{{.ModelName}}", req.Name)
f = strings.ReplaceAll(f, "{{.ModelPath}}", containerModelPath)
command = append(command, f)
Expand Down
25 changes: 25 additions & 0 deletions internal/runtime/docker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,31 @@ func TestBuildRunArgs_ConfigFlags(t *testing.T) {
assertContains(t, argStr, "--flash-attn", "bool config flag")
}

func TestBuildRunArgs_FiltersUnsupportedConfigFlags(t *testing.T) {
r := &DockerRuntime{}
req := &DeployRequest{
Name: "z-image",
Engine: "z-image-diffusers",
Image: "qujing-z-image:latest",
Command: []string{"python3", "server.py"},
ModelPath: "/data/models/z-image",
PortSpecs: []knowledge.StartupPort{
{Name: "http", Flag: "--port", ConfigKey: "port", Primary: true},
},
Config: map[string]any{
"port": 8188,
"max_model_len": 8192,
},
AcceptedConfigKeys: []string{"port"},
}

args := r.buildRunArgs("z-image-z-image-diffusers", req)
argStr := joinArgs(args)

assertContains(t, argStr, "--port 8188", "accepted port flag")
assertNotContains(t, argStr, "--max-model-len", "LLM-only context flag must not be sent to image engines")
}

func TestBuildRunArgs_Labels(t *testing.T) {
r := &DockerRuntime{}
req := &DeployRequest{
Expand Down
31 changes: 16 additions & 15 deletions internal/runtime/k3s.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,21 +145,22 @@ func toResolvedConfig(req *DeployRequest) *knowledge.ResolvedConfig {
}

rc := &knowledge.ResolvedConfig{
Engine: req.Engine,
EngineImage: req.Image,
ModelPath: req.ModelPath,
ModelName: req.Name,
Slot: slot,
Config: config,
Command: req.Command,
PortSpecs: req.PortSpecs,
InitCommands: req.InitCommands,
ExtraVolumes: req.ExtraVolumes,
RuntimeClassName: req.RuntimeClassName,
CPUArch: req.CPUArch,
Env: req.Env,
Container: req.Container,
GPUResourceName: req.GPUResourceName,
Engine: req.Engine,
EngineImage: req.Image,
ModelPath: req.ModelPath,
ModelName: req.Name,
Slot: slot,
Config: config,
Command: req.Command,
PortSpecs: req.PortSpecs,
InitCommands: req.InitCommands,
AcceptedConfigKeys: append([]string(nil), req.AcceptedConfigKeys...),
ExtraVolumes: req.ExtraVolumes,
RuntimeClassName: req.RuntimeClassName,
CPUArch: req.CPUArch,
Env: req.Env,
Container: req.Container,
GPUResourceName: req.GPUResourceName,
}

if req.HealthCheck != nil {
Expand Down
Loading
Loading