@@ -20,6 +20,8 @@ import (
2020 "context"
2121 "fmt"
2222 "math"
23+ "slices"
24+ "strings"
2325
2426 v1 "k8s.io/api/core/v1"
2527 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -30,13 +32,15 @@ import (
3032 "k8s.io/kubernetes/pkg/scheduler/framework"
3133
3234 llmazcoreapi "github.com/inftyai/llmaz/api/core/v1alpha1"
35+ llmazinferenceapi "github.com/inftyai/llmaz/api/inference/v1alpha1"
3336)
3437
3538const (
3639 Name = "ResourceFungibility"
3740 stateKey = Name
3841
39- modelNameLabelKey = llmazcoreapi .ModelNameLabelKey
42+ modelNameLabelKey = llmazcoreapi .ModelNameLabelKey
43+ inferenceServiceFlavorsAnnoKey = llmazinferenceapi .InferenceServiceFlavorsAnnoKey
4044)
4145
4246var (
@@ -151,7 +155,16 @@ func (rf *ResourceFungibility) calPreFilterState(ctx context.Context, pod *v1.Po
151155 return nil
152156 }
153157
154- for _ , f := range model .Spec .InferenceConfig .Flavors {
158+ // Filter the flavors from model by the inference service flavors annotation.
159+ selectedFlavors := model .Spec .InferenceConfig .Flavors
160+ if v , ok := pod .Annotations [inferenceServiceFlavorsAnnoKey ]; ok {
161+ flavorNames := strings .Split (v , "," )
162+ selectedFlavors = slices .DeleteFunc (selectedFlavors , func (f llmazcoreapi.Flavor ) bool {
163+ return ! slices .Contains (flavorNames , string (f .Name ))
164+ })
165+ }
166+
167+ for _ , f := range selectedFlavors {
155168 if len (f .NodeSelector ) == 0 {
156169 // Once nodeSelector is empty, which means all nodes are potential candidates,
157170 // so we'll skip the Filter stage.
0 commit comments