Skip to content

Commit b21b131

Browse files
author
Daniel
committed
llmproxy self hosted config wired in
1 parent 6bbb99d commit b21b131

14 files changed

Lines changed: 497 additions & 304 deletions

File tree

contract/workflow.yaml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,16 @@ components:
3131
type: object
3232
description: Workflow represents the deployment format of a project, passed to agents.
3333
required:
34-
[schemaVersion, nodes, edges, functions, declaredVariables, channels]
34+
[
35+
schemaVersion,
36+
nodes,
37+
edges,
38+
functions,
39+
declaredVariables,
40+
channels,
41+
memory,
42+
models,
43+
]
3544
properties:
3645
schemaVersion:
3746
type: integer

go/api/workflow/types.gen.go

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

go/cmd/engine/main.go

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ import (
1818
"github.com/ForestHubAI/edge-agents/go/engine/driver"
1919
"github.com/ForestHubAI/edge-agents/go/engine/memory"
2020
"github.com/ForestHubAI/edge-agents/go/engine/websearch"
21-
"github.com/ForestHubAI/edge-agents/go/llmproxy"
2221
"github.com/ForestHubAI/edge-agents/go/logging"
2322
"github.com/ForestHubAI/edge-agents/go/mapping"
2423
"github.com/rs/zerolog"
@@ -67,9 +66,8 @@ func main() {
6766
// take precedence; any provider the backend exposes that the engine lacks
6867
// a key for is registered as a backend-routed stand-in.
6968
loadCtx, cancelLoad := context.WithTimeout(context.Background(), backend.ProviderLoadTimeout)
70-
providers := buildLLMProviders(loadCtx, cfg.LLM, backendClient)
69+
llmProviders := buildLLMProviders(loadCtx, cfg.LLM, backendClient)
7170
cancelLoad() // Release loadCtx resources
72-
llmClient := llmproxy.NewClient(providers)
7371

7472
// Only normalise if the operator actually opted into file mode. An empty
7573
// path stays empty so the deploy handler can surface a clear "not configured"
@@ -127,11 +125,11 @@ func main() {
127125

128126
// Create builder and engine
129127
builder := &build.Builder{
130-
Drivers: drivers,
131-
LLM: llmClient,
132-
Memory: memoryManager,
133-
WebSearch: webSearchProvider,
134-
Retriever: retriever,
128+
Drivers: drivers,
129+
LLMProviders: llmProviders,
130+
Memory: memoryManager,
131+
WebSearch: webSearchProvider,
132+
Retriever: retriever,
135133
}
136134
eng := &engine.Engine{
137135
Secret: cfg.Secret,

go/engine/build/build.go

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package build
33
import (
44
"context"
55
"fmt"
6+
"slices"
67

78
"github.com/ForestHubAI/edge-agents/go/api/workflow"
89

@@ -11,15 +12,18 @@ import (
1112
"github.com/ForestHubAI/edge-agents/go/engine/memory"
1213
"github.com/ForestHubAI/edge-agents/go/engine/transport"
1314
"github.com/ForestHubAI/edge-agents/go/engine/websearch"
15+
"github.com/ForestHubAI/edge-agents/go/llmproxy"
1416
)
1517

1618
// Builder holds the engine-scoped dependencies needed to construct a Runner.
19+
// LLMProviders is the boot provider set; Build composes it with any per-deploy
20+
// custom-model providers into a fresh client scoped to each Runner.
1721
type Builder struct {
18-
Drivers *driver.Registry
19-
LLM engine.LlmClient
20-
Memory *memory.Manager
21-
Retriever engine.Retriever
22-
WebSearch websearch.Provider // optional; nil disables WebSearchTool nodes
22+
Drivers *driver.Registry
23+
LLMProviders []llmproxy.Provider
24+
Memory *memory.Manager
25+
Retriever engine.Retriever
26+
WebSearch websearch.Provider // optional; nil disables WebSearchTool nodes
2327
}
2428

2529
// Build constructs a Runner for the given workflow and network manifest.
@@ -36,11 +40,25 @@ func (b *Builder) Build(ctx context.Context, wf *workflow.Workflow, dm engine.De
3640
return nil, fmt.Errorf("refreshing memory: %w", err)
3741
}
3842
}
43+
// Compose a per-deploy LLM client: the boot providers plus any custom-model
44+
// providers resolved from this deploy's externalResources. Done before
45+
// transports so a provider-resolution error fails fast without leaking a
46+
// transport registry. The client is scoped to this Runner and GC'd on the
47+
// next deploy, so the boot set is never mutated.
48+
deployProviders, err := buildDeployProviders(wf, dm, ext)
49+
if err != nil {
50+
return nil, fmt.Errorf("resolving deploy llm providers: %w", err)
51+
}
52+
llmClient := llmproxy.NewClient(append(slices.Clone(b.LLMProviders), deployProviders...))
53+
if err := validateModelsResolvable(wf, llmClient); err != nil {
54+
return nil, fmt.Errorf("resolving referenced models: %w", err)
55+
}
56+
3957
transports, err := transport.NewRegistry(ext)
4058
if err != nil {
4159
return nil, fmt.Errorf("creating transport registry: %w", err)
4260
}
43-
runner, err := buildRunner(ctx, wf, dm, ext, transports, b.Drivers, b.LLM, b.Memory, b.Retriever, b.WebSearch)
61+
runner, err := buildRunner(ctx, wf, dm, ext, transports, b.Drivers, llmClient, b.Memory, b.Retriever, b.WebSearch)
4462
if err != nil {
4563
transports.CloseAll()
4664
return nil, err
@@ -52,11 +70,8 @@ func (b *Builder) Build(ctx context.Context, wf *workflow.Workflow, dm engine.De
5270
// skipping other memory kinds (e.g. VectorDatabase, consumed by Retriever
5371
// nodes). These are the canonical set of files the memory Manager restores.
5472
func declaredMemoryFiles(wf *workflow.Workflow) ([]workflow.MemoryFile, error) {
55-
if wf.Memory == nil {
56-
return nil, nil
57-
}
5873
var out []workflow.MemoryFile
59-
for i, m := range *wf.Memory {
74+
for i, m := range wf.Memory {
6075
disc, err := m.Discriminator()
6176
if err != nil {
6277
return nil, fmt.Errorf("memory[%d]: %w", i, err)

go/engine/build/llm.go

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
package build
2+
3+
import (
4+
"fmt"
5+
6+
"github.com/ForestHubAI/edge-agents/go/api/workflow"
7+
"github.com/ForestHubAI/edge-agents/go/engine"
8+
"github.com/ForestHubAI/edge-agents/go/llmproxy"
9+
"github.com/ForestHubAI/edge-agents/go/llmproxy/provider/selfhosted"
10+
)
11+
12+
// buildDeployProviders resolves a workflow's declared models into a single
13+
// per-deploy self-hosted provider. Every entry in wf.Models is a custom/
14+
// self-hosted model while catalog models are referenced by id and never declared.
15+
// An unbound or unconfigured model is a deploy error.
16+
func buildDeployProviders(wf *workflow.Workflow, dm engine.DeploymentMapping, ext *engine.ExternalResources) ([]llmproxy.Provider, error) {
17+
endpoints := make([]selfhosted.ModelEndpoint, 0, len(wf.Models))
18+
for _, mu := range wf.Models {
19+
m, err := mu.AsLLMModel()
20+
if err != nil {
21+
return nil, fmt.Errorf("declared model: %w", err)
22+
}
23+
b, ok := dm[m.Id]
24+
if !ok || b.Ref == "" {
25+
return nil, fmt.Errorf("model %q: declared but not bound by the deployment mapping", m.Id)
26+
}
27+
var cfg engine.LLMProviderConfig
28+
if ext != nil {
29+
cfg, ok = ext.Providers[b.Ref]
30+
}
31+
if !ok {
32+
return nil, fmt.Errorf("model %q: bound to %q but no provider config in deploy externalResources", m.Id, b.Ref)
33+
}
34+
if cfg.Model != "" && cfg.Model != m.Id {
35+
return nil, fmt.Errorf("model %q: upstream model-name aliasing (%q) is not supported yet", m.Id, cfg.Model)
36+
}
37+
caps, err := toCapabilities(m)
38+
if err != nil {
39+
return nil, err
40+
}
41+
endpoints = append(endpoints, selfhosted.ModelEndpoint{
42+
URL: cfg.URL,
43+
APIKey: cfg.APIKey,
44+
ID: llmproxy.ModelID(m.Id),
45+
Label: m.Label,
46+
Capabilities: caps,
47+
})
48+
}
49+
50+
// No declared custom models → no deploy provider. Returning a slice here
51+
// would wrap NewProvider's nil (it returns nil for an empty config) and
52+
// panic NewClient on ProviderID().
53+
if len(endpoints) == 0 {
54+
return nil, nil
55+
}
56+
return []llmproxy.Provider{selfhosted.NewProvider(selfhosted.Config{Endpoints: endpoints})}, nil
57+
}
58+
59+
// requiredModelIDs collects the chat-model ids referenced by Agent nodes across
60+
// the main graph and every function body — the models the deploy must be able
61+
// to serve. Only Agent nodes reference chat models today; nodes with a missing
62+
// model id are skipped here (the graph build reports that as a MissingField).
63+
func requiredModelIDs(wf *workflow.Workflow) ([]string, error) {
64+
seen := make(map[string]struct{})
65+
var out []string
66+
scan := func(nodes []workflow.Node) error {
67+
for _, n := range nodes {
68+
v, err := n.ValueByDiscriminator()
69+
if err != nil {
70+
return err
71+
}
72+
a, ok := v.(workflow.AgentNode)
73+
if !ok || a.Arguments.Model == nil || *a.Arguments.Model == "" {
74+
continue
75+
}
76+
id := *a.Arguments.Model
77+
if _, dup := seen[id]; dup {
78+
continue
79+
}
80+
seen[id] = struct{}{}
81+
out = append(out, id)
82+
}
83+
return nil
84+
}
85+
if err := scan(wf.Nodes); err != nil {
86+
return nil, err
87+
}
88+
for _, f := range wf.Functions {
89+
if err := scan(f.Nodes); err != nil {
90+
return nil, err
91+
}
92+
}
93+
return out, nil
94+
}
95+
96+
// validateModelsResolvable fails the build when an Agent node references a model
97+
// that no provider in the composed client can serve. Without this the failure
98+
// surfaces lazily at the first Chat ("no suitable provider"); here it's a clear
99+
// deploy-time error naming the model.
100+
func validateModelsResolvable(wf *workflow.Workflow, client *llmproxy.Client) error {
101+
ids, err := requiredModelIDs(wf)
102+
if err != nil {
103+
return fmt.Errorf("scanning referenced models: %w", err)
104+
}
105+
available := make(map[string]struct{})
106+
for _, m := range client.AvailableModels() {
107+
available[string(m.ID)] = struct{}{}
108+
}
109+
for _, id := range ids {
110+
if _, ok := available[id]; !ok {
111+
return fmt.Errorf("model %q is referenced by an agent node but no configured provider serves it (no local API key, backend route, or declared custom model)", id)
112+
}
113+
}
114+
return nil
115+
}
116+
117+
// toCapabilities maps a declared model's capabilities onto llmproxy's. Embedding
118+
// is rejected for now: self-hosted deploy providers need a vector dimension that
119+
// the workflow model declaration doesn't carry yet.
120+
func toCapabilities(m workflow.LLMModel) ([]llmproxy.ModelCapability, error) {
121+
caps := make([]llmproxy.ModelCapability, 0, len(m.Capabilities))
122+
for _, c := range m.Capabilities {
123+
if c == workflow.ModelCapabilityEmbedding {
124+
return nil, fmt.Errorf("model %q: the embedding capability is not supported for self-hosted deploy providers yet (no dimension in the workflow declaration)", m.Id)
125+
}
126+
caps = append(caps, llmproxy.ModelCapability(c))
127+
}
128+
return caps, nil
129+
}

0 commit comments

Comments
 (0)