Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 34 additions & 6 deletions cmd/aima/resolve.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,11 @@ var autoDetectWarned sync.Map
// resolvedDeployment holds the shared result of resolve + CheckFit,
// used by both DeployApply and DeployDryRun.
type resolvedDeployment struct {
ModelName string
Resolved *knowledge.ResolvedConfig
Fit *knowledge.FitReport
ModelName string
Resolved *knowledge.ResolvedConfig
ResolvedConfig map[string]any
ResolvedProvenance map[string]string
Fit *knowledge.FitReport
}

// queryGoldenOverrides returns config overrides from the best golden configuration
Expand Down Expand Up @@ -117,19 +119,45 @@ func resolveDeployment(ctx context.Context, cat *knowledge.Catalog, db *state.DB
return nil, err
}

resolvedConfig := cloneAnyMap(resolved.Config)
resolvedProvenance := cloneStringMap(resolved.Provenance)
fit := knowledge.CheckFit(resolved, hwInfo)
for k, v := range fit.Adjustments {
resolved.Config[k] = v
resolved.Provenance[k] = "L0-auto"
}

return &resolvedDeployment{
ModelName: canonicalName,
Resolved: resolved,
Fit: fit,
ModelName: canonicalName,
Resolved: resolved,
ResolvedConfig: resolvedConfig,
ResolvedProvenance: resolvedProvenance,
Fit: fit,
}, nil
}

func cloneAnyMap(in map[string]any) map[string]any {
if in == nil {
return nil
}
out := make(map[string]any, len(in))
for k, v := range in {
out[k] = v
}
return out
}

func cloneStringMap(in map[string]string) map[string]string {
if in == nil {
return nil
}
out := make(map[string]string, len(in))
for k, v := range in {
out[k] = v
}
return out
}

// normalizeAutoPortOverrides removes "auto" sentinels from port-like override keys
// before resolution. This preserves the engine YAML default port so Go-side host
// port allocation can still choose a free host port later in deploy.apply.
Expand Down
64 changes: 64 additions & 0 deletions cmd/aima/resolve_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,70 @@ func TestNormalizeAutoPortOverrides(t *testing.T) {
}
}

func TestResolveDeploymentKeepsResolvedAndEffectiveConfigSeparate(t *testing.T) {
ctx := context.Background()
db, err := state.Open(ctx, ":memory:")
if err != nil {
t.Fatalf("Open: %v", err)
}
defer db.Close()

cat := &knowledge.Catalog{
EngineAssets: []knowledge.EngineAsset{{
Metadata: knowledge.EngineMetadata{
Name: "vllm-test",
Type: "vllm",
Version: "1.0",
SupportedFormats: []string{"safetensors"},
},
Hardware: knowledge.EngineHardware{GPUArch: "*"},
Startup: knowledge.EngineStartup{
Command: []string{"vllm", "serve", "{{.ModelPath}}"},
DefaultArgs: map[string]any{"gpu_memory_utilization": 0.85, "port": 8000},
},
Runtime: knowledge.EngineRuntime{Default: "container"},
}},
ModelAssets: []knowledge.ModelAsset{{
Metadata: knowledge.ModelMetadata{Name: "demo-model", Type: "llm"},
Storage: knowledge.ModelStorage{DefaultPathPattern: "/models/demo"},
Variants: []knowledge.ModelVariant{{
Name: "demo-model-vllm",
Engine: "vllm",
Format: "safetensors",
Hardware: knowledge.ModelVariantHardware{GPUArch: "Blackwell"},
}},
}},
}

rd, err := resolveDeployment(ctx, cat, db, nil, knowledge.HardwareInfo{
GPUArch: "Blackwell",
GPUVRAMMiB: 122880,
GPUMemFreeMiB: 64000,
GPUMemUsedMiB: 58880,
UnifiedMemory: false,
Platform: "linux/arm64",
}, "demo-model", "vllm", "", nil, t.TempDir())
if err != nil {
t.Fatalf("resolveDeployment: %v", err)
}

if got := rd.ResolvedConfig["gpu_memory_utilization"]; got != 0.85 {
t.Fatalf("resolved_config gpu_memory_utilization = %v, want 0.85", got)
}
if got := rd.Resolved.Config["gpu_memory_utilization"]; got != 0.51 {
t.Fatalf("effective config gpu_memory_utilization = %v, want 0.51", got)
}
if got := rd.Fit.Adjustments["gpu_memory_utilization"]; got != 0.51 {
t.Fatalf("fit adjustment gpu_memory_utilization = %v, want 0.51", got)
}
if got := rd.ResolvedProvenance["gpu_memory_utilization"]; got != "L0" {
t.Fatalf("resolved provenance = %q, want L0", got)
}
if got := rd.Resolved.Provenance["gpu_memory_utilization"]; got != "L0-auto" {
t.Fatalf("effective provenance = %q, want L0-auto", got)
}
}

func TestResolveCatalogWithLocalEngineOverlayUsesInstalledContainerAsset(t *testing.T) {
ctx := context.Background()
db, err := state.Open(ctx, ":memory:")
Expand Down
21 changes: 13 additions & 8 deletions cmd/aima/tooldeps_deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -346,14 +346,19 @@ func buildDeployDeps(ac *appContext, deps *mcp.ToolDeps,
}

result := map[string]any{
"model": rd.ModelName,
"engine": resolved.Engine,
"engine_image": resolved.EngineImage,
"slot": resolved.Slot,
"runtime": runtimeName,
"config": resolved.Config,
"ports": knowledge.ResolvePortBindingsFromSpecs(resolved.PortSpecs, resolved.Config),
"provenance": resolved.Provenance,
"model": rd.ModelName,
"engine": resolved.Engine,
"engine_image": resolved.EngineImage,
"slot": resolved.Slot,
"runtime": runtimeName,
"config": resolved.Config,
"resolved_config": rd.ResolvedConfig,
"effective_config": resolved.Config,
"fit_adjustments": rd.Fit.Adjustments,
"ports": knowledge.ResolvePortBindingsFromSpecs(resolved.PortSpecs, resolved.Config),
"provenance": resolved.Provenance,
"resolved_provenance": rd.ResolvedProvenance,
"effective_provenance": resolved.Provenance,
"fit_report": map[string]any{
"fit": rd.Fit.Fit,
"reason": rd.Fit.Reason,
Expand Down
Loading