Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1036,6 +1036,20 @@ routing:
endpoint: https://search.example.com/query
request_format: pinecone

- name: mlp_experimental_route
description: Experimental MLP-based model selection route.
priority: 103
rules:
operator: AND
conditions:
- type: keyword
name: machine_learning
modelRefs:
- model: qwen3-8b
use_reasoning: false
algorithm:
type: mlp

- name: image_generation_route
description: Dedicated image-generation route using the OpenAI image backend.
priority: 105
Expand Down
2 changes: 1 addition & 1 deletion ml-binding/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ license = "Apache-2.0"

[lib]
name = "ml_semantic_router"
crate-type = ["cdylib", "rlib"]
crate-type = ["staticlib", "cdylib", "rlib"]

[dependencies]
# Linfa ML framework - inference only (training done in Python)
Expand Down
1 change: 1 addition & 0 deletions src/semantic-router/pkg/config/fragment_catalog_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ func TestConfigFragmentCatalogCoversSupportedRoutingSurfaces(t *testing.T) {
"kmeans": filepath.Join("selection", "kmeans.yaml"),
"knn": filepath.Join("selection", "knn.yaml"),
"latency_aware": filepath.Join("selection", "latency-aware.yaml"),
"mlp": filepath.Join("selection", "mlp.yaml"),
"ratings": filepath.Join("looper", "ratings.yaml"),
"remom": filepath.Join("looper", "remom.yaml"),
"rl_driven": filepath.Join("selection", "rl-driven.yaml"),
Expand Down
63 changes: 48 additions & 15 deletions src/semantic-router/pkg/config/routing_surface_catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,23 +50,39 @@ var supportedDecisionPluginTypes = []string{
DecisionPluginTools,
}

var supportedDecisionAlgorithmTypes = []string{
"automix",
"confidence",
"elo",
"gmtrouter",
"hybrid",
"kmeans",
"knn",
"latency_aware",
"ratings",
"remom",
"rl_driven",
"router_dc",
"static",
"svm",
// AlgorithmCatalogEntry describes a model-selection algorithm and its tier
type AlgorithmCatalogEntry struct {
Type string // algorithm type name (e.g., "elo")
Tier string // "supported" or "experimental"
}

var decisionAlgorithmCatalog = []AlgorithmCatalogEntry{
{Type: "automix", Tier: "experimental"},
{Type: "confidence", Tier: "supported"},
{Type: "elo", Tier: "supported"},
{Type: "gmtrouter", Tier: "experimental"},
{Type: "hybrid", Tier: "supported"},
{Type: "kmeans", Tier: "experimental"},
{Type: "knn", Tier: "experimental"},
{Type: "latency_aware", Tier: "supported"},
{Type: "mlp", Tier: "experimental"},
{Type: "ratings", Tier: "supported"},
{Type: "remom", Tier: "supported"},
{Type: "rl_driven", Tier: "experimental"},
{Type: "router_dc", Tier: "supported"},
{Type: "static", Tier: "supported"},
{Type: "svm", Tier: "experimental"},
}

// supportedDecisionAlgorithmTypes is derived from the catalog for backwards compatibility
var supportedDecisionAlgorithmTypes = func() []string {
types := make([]string, len(decisionAlgorithmCatalog))
for i, entry := range decisionAlgorithmCatalog {
types[i] = entry.Type
}
return types
}()

var pluginTypeAliases = map[string]string{
"semantic_cache": DecisionPluginSemanticCache,
}
Expand Down Expand Up @@ -118,6 +134,23 @@ func IsSupportedDecisionAlgorithmType(algorithmType string) bool {
return false
}

// DecisionAlgorithmCatalog returns the full structured catalog of algorithm types and tiers
func DecisionAlgorithmCatalog() []AlgorithmCatalogEntry {
result := make([]AlgorithmCatalogEntry, len(decisionAlgorithmCatalog))
copy(result, decisionAlgorithmCatalog)
return result
}

// GetAlgorithmTier returns the tier for a given algorithm type, or empty string if unknown
func GetAlgorithmTier(algorithmType string) string {
for _, entry := range decisionAlgorithmCatalog {
if entry.Type == algorithmType {
return entry.Tier
}
}
return ""
}

func cloneSortedStrings(values []string) []string {
cloned := append([]string(nil), values...)
sort.Strings(cloned)
Expand Down
62 changes: 62 additions & 0 deletions src/semantic-router/pkg/config/routing_surface_catalog_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package config

import "testing"

func TestDecisionAlgorithmCatalog_AllTypesHaveTier(t *testing.T) {
catalog := DecisionAlgorithmCatalog()
if len(catalog) == 0 {
t.Fatal("DecisionAlgorithmCatalog() returned empty catalog")
}

for _, entry := range catalog {
if entry.Type == "" {
t.Error("Catalog entry has empty Type")
}
if entry.Tier != "supported" && entry.Tier != "experimental" {
t.Errorf("Catalog entry %q has invalid Tier %q", entry.Type, entry.Tier)
}
}
}

func TestDecisionAlgorithmCatalog_BackwardsCompatible(t *testing.T) {
previousTypes := []string{
"automix", "confidence", "elo", "gmtrouter", "hybrid",
"kmeans", "knn", "latency_aware", "ratings", "remom",
"rl_driven", "router_dc", "static", "svm",
}

for _, algType := range previousTypes {
if !IsSupportedDecisionAlgorithmType(algType) {
t.Errorf("Previously supported algorithm type %q is no longer supported", algType)
}
}
}

func TestGetAlgorithmTier(t *testing.T) {
tests := []struct {
algType string
expectedTier string
}{
{"static", "supported"},
{"elo", "supported"},
{"router_dc", "supported"},
{"latency_aware", "supported"},
{"hybrid", "supported"},
{"automix", "experimental"},
{"rl_driven", "experimental"},
{"gmtrouter", "experimental"},
{"knn", "experimental"},
{"kmeans", "experimental"},
{"svm", "experimental"},
{"mlp", "experimental"},
}

for _, tt := range tests {
t.Run(tt.algType, func(t *testing.T) {
tier := GetAlgorithmTier(tt.algType)
if tier != tt.expectedTier {
t.Errorf("GetAlgorithmTier(%q) = %q, want %q", tt.algType, tier, tt.expectedTier)
}
})
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,7 @@ func (r *OpenAIRouter) selectModelFromCandidates(modelRefs []config.ModelRef, de
modelRefs[i].LoRAName == result.SelectedModel {
logging.Infof("[ModelSelection] Selected %s (method=%s, score=%.4f, confidence=%.2f): %s",
result.SelectedModel, method, result.Score, result.Confidence, result.Reasoning)
// Record selection metrics
selection.RecordSelection(string(method), decisionName, result.SelectedModel, result.Score)
selection.RecordSelection(string(method), decisionName, result.SelectedModel, result.Tier, result.Score)
return &modelRefs[i], string(method)
}
}
Expand Down
25 changes: 25 additions & 0 deletions src/semantic-router/pkg/extproc/router_selection.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,37 @@ func createModelSelectorRegistry(cfg *config.RouterConfig) *selection.Registry {

registry := selectionFactory.CreateAll()
selection.GlobalRegistry = registry

// Collect algorithm methods actually configured in decisions
configuredMethods := collectConfiguredAlgorithmMethods(cfg)

// Warn about experimental algorithms and check dependency health
selection.WarnExperimentalAlgorithms(registry, configuredMethods)
selection.CheckDependencyHealth(registry, configuredMethods)

logging.ComponentEvent("extproc", "model_selection_registry_initialized", map[string]interface{}{
"mode": "per_decision_algorithm_config",
})
return registry
}

func collectConfiguredAlgorithmMethods(cfg *config.RouterConfig) []selection.SelectionMethod {
seen := make(map[string]bool)
var methods []selection.SelectionMethod

for _, decision := range cfg.Decisions {
if decision.Algorithm == nil || decision.Algorithm.Type == "" {
continue
}
if !seen[decision.Algorithm.Type] {
seen[decision.Algorithm.Type] = true
methods = append(methods, selection.SelectionMethod(decision.Algorithm.Type))
}
}

return methods
}

func buildModelSelectionConfig(cfg *config.RouterConfig) *selection.ModelSelectionConfig {
modelSelectionCfg := &selection.ModelSelectionConfig{
Method: "static",
Expand Down
90 changes: 90 additions & 0 deletions src/semantic-router/pkg/selection/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ limitations under the License.
package selection

import (
"context"
"fmt"
"net/http"
"strings"
"time"

"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability/logging"
)
Expand Down Expand Up @@ -301,12 +307,96 @@ func (f *Factory) CreateAll() *Registry {
latencyAwareSelector := NewLatencyAwareSelector(nil)
registry.Register(MethodLatencyAware, latencyAwareSelector)

LogRegisteredAlgorithms(registry)
logging.ComponentEvent("selection", "selection_factory_initialized", map[string]interface{}{
"selector_count": len(registry.selectors),
})
return registry
}

// LogRegisteredAlgorithms logs the tier and dependencies of each registered algorithm
func LogRegisteredAlgorithms(registry *Registry) {
registry.mu.RLock()
defer registry.mu.RUnlock()

for method, selector := range registry.selectors {
deps := selector.ExternalDependencies()
if len(deps) == 0 {
logging.Infof("[Selection] Registered algorithm: %s (tier=%s, dependencies=none)", method, selector.Tier())
} else {
depNames := make([]string, len(deps))
for i, dep := range deps {
depNames[i] = fmt.Sprintf("%s (%s)", dep.Name, dep.Type)
}
logging.Infof("[Selection] Registered algorithm: %s (tier=%s, dependencies=[%s])",
method, selector.Tier(), strings.Join(depNames, ", "))
}
}
}

// WarnExperimentalAlgorithms logs prominent warnings for experimental algorithms
// that are actually configured in operator decisions
func WarnExperimentalAlgorithms(registry *Registry, configuredMethods []SelectionMethod) {
for _, method := range configuredMethods {
selector, ok := registry.Get(method)
if !ok {
continue
}
if selector.Tier() != TierExperimental {
continue
}

deps := selector.ExternalDependencies()
logging.Warnf("[Selection] WARNING: Algorithm %q is EXPERIMENTAL and not recommended for production use", method)
for _, dep := range deps {
if dep.HealthURL != "" {
logging.Warnf("[Selection] External dependency: %s (%s)", dep.Name, dep.HealthURL)
} else {
logging.Warnf("[Selection] Dependency: %s — %s", dep.Name, dep.Description)
}
}
}
}

// CheckDependencyHealth checks reachability of external service dependencies
// for the given algorithms. Logs results but never fails.
func CheckDependencyHealth(registry *Registry, configuredMethods []SelectionMethod) {
for _, method := range configuredMethods {
selector, ok := registry.Get(method)
if !ok {
continue
}

for _, dep := range selector.ExternalDependencies() {
if dep.Type != DependencyExternalService || dep.HealthURL == "" {
continue
}

ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
client := &http.Client{Timeout: 5 * time.Second}
req, err := http.NewRequestWithContext(ctx, "GET", dep.HealthURL, nil)
if err != nil {
logging.Warnf("[Selection] Dependency check: %s — UNREACHABLE (bad URL: %v)", dep.Name, err)
cancel()
continue
}

resp, err := client.Do(req)
cancel()
if err != nil {
logging.Warnf("[Selection] Dependency check: %s at %s — UNREACHABLE (will degrade at runtime)", dep.Name, dep.HealthURL)
} else {
_ = resp.Body.Close()
if resp.StatusCode == http.StatusOK {
logging.Infof("[Selection] Dependency check: %s at %s — OK", dep.Name, dep.HealthURL)
} else {
logging.Warnf("[Selection] Dependency check: %s at %s — unhealthy (status %d)", dep.Name, dep.HealthURL, resp.StatusCode)
}
}
Comment thread
szedan-rh marked this conversation as resolved.
}
}
}
Comment thread
szedan-rh marked this conversation as resolved.

// Initialize sets up the global registry with all selectors
func Initialize(cfg *ModelSelectionConfig, modelConfig map[string]config.ModelParams, categories []config.Category, embeddingFunc func(string) ([]float32, error)) {
factory := NewFactory(cfg).
Expand Down
Loading
Loading