Skip to content

Commit ff95bd4

Browse files
committed
Merge branch 'main' of https://github.com/InftyAI/llmaz
2 parents 90e9a04 + 93bb8e7 commit ff95bd4

16 files changed

Lines changed: 757 additions & 627 deletions

File tree

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,8 @@ Please refer to [examples](./docs/examples/README.md) for more tutorials or read
138138

139139
Join us for more discussions:
140140

141-
- **Slack Channel**: [#llmaz](https://join.slack.com/t/inftyai/shared_invite/zt-3700res2c-_AuBGD3kixDJhzycFE6L5A)
141+
- **Discord**(recommended): [#llmaz](https://discord.gg/UWnjUG6X8j)
142+
- **Slack**: [#llmaz](https://join.slack.com/t/inftyai/shared_invite/zt-3700res2c-_AuBGD3kixDJhzycFE6L5A)
142143

143144
## Contributions
144145

chart/Chart.lock

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
dependencies:
22
- name: lws
33
repository: oci://registry.k8s.io/lws/charts
4-
version: 0.6.1
4+
version: 0.6.2
55
- name: kube-scheduler
66
repository: oci://registry-1.docker.io/inftyai
77
version: 0.0.1
@@ -14,5 +14,5 @@ dependencies:
1414
- name: ai-gateway-helm
1515
repository: oci://registry-1.docker.io/envoyproxy
1616
version: v0.1.5
17-
digest: sha256:b2b856b107e9e03d175f381a93b83b001211df02f1c1ef1ee13b23147aed50e8
18-
generated: "2025-06-09T10:04:15.540138+08:00"
17+
digest: sha256:d4ca67d95b86b66af01991f1a2eab307db36ebd9a627f06581c0292e10cad259
18+
generated: "2025-06-17T11:08:57.411227+08:00"

chart/Chart.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ appVersion: 0.1.4
2222

2323
dependencies:
2424
- name: lws
25-
version: 0.6.1
25+
version: 0.6.2
2626
repository: "oci://registry.k8s.io/lws/charts"
2727
condition: leaderWorkerSet.enabled
2828
- name: kube-scheduler

config/crd/bases/inference.llmaz.io_services.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8301,6 +8301,15 @@ spec:
83018301
SubGroupPolicy describes the policy that will be applied when creating subgroups
83028302
in each replica.
83038303
properties:
8304+
subGroupPolicyType:
8305+
default: LeaderWorker
8306+
description: |-
8307+
Defines what type of Subgroups to create. Defaults to
8308+
LeaderWorker
8309+
enum:
8310+
- LeaderWorker
8311+
- LeaderExcluded
8312+
type: string
83048313
subGroupSize:
83058314
description: |-
83068315
The number of pods per subgroup. This value is immutable,

go.mod

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
module github.com/inftyai/llmaz
22

3-
go 1.23.0
3+
go 1.24.0
4+
5+
toolchain go1.24.4
46

57
require (
68
github.com/google/go-cmp v0.7.0
@@ -15,9 +17,9 @@ require (
1517
k8s.io/client-go v0.32.5
1618
k8s.io/code-generator v0.32.5
1719
k8s.io/klog/v2 v2.130.1
18-
k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738
19-
sigs.k8s.io/controller-runtime v0.20.3
20-
sigs.k8s.io/lws v0.5.1
20+
k8s.io/utils v0.0.0-20241210054802-24370beab758
21+
sigs.k8s.io/controller-runtime v0.20.4
22+
sigs.k8s.io/lws v0.6.2
2123
sigs.k8s.io/structured-merge-diff/v4 v4.7.0
2224
sigs.k8s.io/yaml v1.4.0
2325
)

go.sum

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -259,16 +259,16 @@ k8s.io/kube-aggregator v0.31.2 h1:Uw1zUP2D/4wiSjKWVVzSOcCGLuW/+IdRwjjC0FJooYU=
259259
k8s.io/kube-aggregator v0.31.2/go.mod h1:41/VIXH+/Qcg9ERNAY6bRF/WQR6xL1wFgYagdHac1X4=
260260
k8s.io/kube-openapi v0.0.0-20241105132330-32ad38e42d3f h1:GA7//TjRY9yWGy1poLzYYJJ4JRdzg3+O6e8I+e+8T5Y=
261261
k8s.io/kube-openapi v0.0.0-20241105132330-32ad38e42d3f/go.mod h1:R/HEjbvWI0qdfb8viZUeVZm0X6IZnxAydC7YU42CMw4=
262-
k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 h1:M3sRQVHv7vB20Xc2ybTt7ODCeFj6JSWYFzOFnYeS6Ro=
263-
k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
262+
k8s.io/utils v0.0.0-20241210054802-24370beab758 h1:sdbE21q2nlQtFh65saZY+rRM6x6aJJI8IUa1AmH/qa0=
263+
k8s.io/utils v0.0.0-20241210054802-24370beab758/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
264264
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.0 h1:CPT0ExVicCzcpeN4baWEV2ko2Z/AsiZgEdwgcfwLgMo=
265265
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.0/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw=
266-
sigs.k8s.io/controller-runtime v0.20.3 h1:I6Ln8JfQjHH7JbtCD2HCYHoIzajoRxPNuvhvcDbZgkI=
267-
sigs.k8s.io/controller-runtime v0.20.3/go.mod h1:xg2XB0K5ShQzAgsoujxuKN4LNXR2LfwwHsPj7Iaw+XY=
266+
sigs.k8s.io/controller-runtime v0.20.4 h1:X3c+Odnxz+iPTRobG4tp092+CvBU9UK0t/bRf+n0DGU=
267+
sigs.k8s.io/controller-runtime v0.20.4/go.mod h1:xg2XB0K5ShQzAgsoujxuKN4LNXR2LfwwHsPj7Iaw+XY=
268268
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8=
269269
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3/go.mod h1:18nIHnGi6636UCz6m8i4DhaJ65T6EruyzmoQqI2BVDo=
270-
sigs.k8s.io/lws v0.5.1 h1:eaeMNkP0manRluQZLN32atoULaGrzP611gSLdFaHZs4=
271-
sigs.k8s.io/lws v0.5.1/go.mod h1:qprXSTTFnfmPZY3V3sUfk6ZPmAodsdoKS8XVElJ9kN0=
270+
sigs.k8s.io/lws v0.6.2 h1:5ulPJDaLBI9zk6ayGO2Lfg9P/FBL3C1LsmHmJVqvHvo=
271+
sigs.k8s.io/lws v0.6.2/go.mod h1:7nbwcpHwdDticuWPTDe6Va5OpjasS0MoVeVD61N5Y0c=
272272
sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016 h1:kXv6kKdoEtedwuqMmkqhbkgvYKeycVbC8+iPCP9j5kQ=
273273
sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
274274
sigs.k8s.io/structured-merge-diff/v4 v4.7.0 h1:qPeWmscJcXP0snki5IYF79Z8xrl8ETFxgMd7wez1XkI=

pkg/controller/inference/playground_controller.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -554,8 +554,8 @@ func setControllerReferenceForScalingConfiguration(owner metav1.Object, hpa *aut
554554
Kind: gvk.Kind,
555555
Name: owner.GetName(),
556556
UID: owner.GetUID(),
557-
BlockOwnerDeletion: ptr.To[bool](true),
558-
Controller: ptr.To[bool](true),
557+
BlockOwnerDeletion: ptr.To(true),
558+
Controller: ptr.To(true),
559559
},
560560
}
561561
return nil

pkg/controller/inference/service_controller.go

Lines changed: 40 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,11 @@ import (
2323

2424
corev1 "k8s.io/api/core/v1"
2525
apimeta "k8s.io/apimachinery/pkg/api/meta"
26-
"k8s.io/apimachinery/pkg/api/resource"
2726
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2827
"k8s.io/apimachinery/pkg/runtime"
2928
"k8s.io/apimachinery/pkg/types"
3029
"k8s.io/apimachinery/pkg/util/intstr"
30+
coreapplyv1 "k8s.io/client-go/applyconfigurations/core/v1"
3131
metaapplyv1 "k8s.io/client-go/applyconfigurations/meta/v1"
3232
"k8s.io/client-go/tools/record"
3333
"k8s.io/klog/v2"
@@ -116,7 +116,10 @@ func (r *ServiceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
116116
return ctrl.Result{}, err
117117
}
118118

119-
workloadApplyConfiguration := buildWorkloadApplyConfiguration(service, models)
119+
workloadApplyConfiguration, err := buildWorkloadApplyConfiguration(service, models)
120+
if err != nil {
121+
return ctrl.Result{}, err
122+
}
120123
if err := setControllerReferenceForWorkload(service, workloadApplyConfiguration, r.Scheme); err != nil {
121124
return ctrl.Result{}, err
122125
}
@@ -159,14 +162,35 @@ func (r *ServiceReconciler) SetupWithManager(mgr ctrl.Manager) error {
159162
Complete(r)
160163
}
161164

162-
func buildWorkloadApplyConfiguration(service *inferenceapi.Service, models []*coreapi.OpenModel) *applyconfigurationv1.LeaderWorkerSetApplyConfiguration {
165+
func buildWorkloadApplyConfiguration(service *inferenceapi.Service, models []*coreapi.OpenModel) (*applyconfigurationv1.LeaderWorkerSetApplyConfiguration, error) {
163166
workload := applyconfigurationv1.LeaderWorkerSet(service.Name, service.Namespace)
164167

165168
leaderWorkerTemplate := applyconfigurationv1.LeaderWorkerTemplate()
166169
if service.Spec.WorkloadTemplate.LeaderTemplate != nil {
167-
leaderWorkerTemplate.WithLeaderTemplate(*service.Spec.WorkloadTemplate.LeaderTemplate)
170+
// construct pod template spec configuration
171+
obj, err := runtime.DefaultUnstructuredConverter.ToUnstructured(service.Spec.WorkloadTemplate.LeaderTemplate)
172+
if err != nil {
173+
return nil, err
174+
}
175+
var podTemplateSpecApplyConfiguration coreapplyv1.PodTemplateSpecApplyConfiguration
176+
err = runtime.DefaultUnstructuredConverter.FromUnstructured(obj, &podTemplateSpecApplyConfiguration)
177+
if err != nil {
178+
return nil, err
179+
}
180+
leaderWorkerTemplate.WithLeaderTemplate(&podTemplateSpecApplyConfiguration)
168181
}
169-
leaderWorkerTemplate.WithWorkerTemplate(service.Spec.WorkloadTemplate.WorkerTemplate)
182+
183+
// construct pod template spec configuration
184+
obj, err := runtime.DefaultUnstructuredConverter.ToUnstructured(&service.Spec.WorkloadTemplate.WorkerTemplate)
185+
if err != nil {
186+
return nil, err
187+
}
188+
var podTemplateSpecApplyConfiguration coreapplyv1.PodTemplateSpecApplyConfiguration
189+
err = runtime.DefaultUnstructuredConverter.FromUnstructured(obj, &podTemplateSpecApplyConfiguration)
190+
if err != nil {
191+
return nil, err
192+
}
193+
leaderWorkerTemplate.WithWorkerTemplate(&podTemplateSpecApplyConfiguration)
170194

171195
// The core logic to inject additional configurations.
172196
injectModelProperties(leaderWorkerTemplate, models, service)
@@ -188,7 +212,7 @@ func buildWorkloadApplyConfiguration(service *inferenceapi.Service, models []*co
188212
spec.WithStartupPolicy(lws.LeaderReadyStartupPolicy)
189213

190214
workload.WithSpec(spec)
191-
return workload
215+
return workload, nil
192216
}
193217

194218
func injectModelProperties(template *applyconfigurationv1.LeaderWorkerTemplateApplyConfiguration, models []*coreapi.OpenModel, service *inferenceapi.Service) {
@@ -234,14 +258,14 @@ func injectModelProperties(template *applyconfigurationv1.LeaderWorkerTemplateAp
234258
}
235259
}
236260

237-
func injectModelFlavor(template *corev1.PodTemplateSpec, model *coreapi.OpenModel, service *inferenceapi.Service) {
261+
func injectModelFlavor(template *coreapplyv1.PodTemplateSpecApplyConfiguration, model *coreapi.OpenModel, service *inferenceapi.Service) {
238262
if model.Spec.InferenceConfig == nil || len(model.Spec.InferenceConfig.Flavors) == 0 {
239263
return
240264
}
241265

242-
container := &corev1.Container{}
266+
container := &coreapplyv1.ContainerApplyConfiguration{}
243267
for i, c := range template.Spec.Containers {
244-
if c.Name == modelSource.MODEL_RUNNER_CONTAINER_NAME {
268+
if *c.Name == modelSource.MODEL_RUNNER_CONTAINER_NAME {
245269
container = &template.Spec.Containers[i]
246270
}
247271
}
@@ -256,17 +280,20 @@ func injectModelFlavor(template *corev1.PodTemplateSpec, model *coreapi.OpenMode
256280
if flavor.Name == flavorName {
257281
limits := model.Spec.InferenceConfig.Flavors[i].Limits
258282
for k, v := range limits {
283+
if container.Resources == nil {
284+
container.WithResources(coreapplyv1.ResourceRequirements())
285+
}
259286
if container.Resources.Requests == nil {
260-
container.Resources.Requests = map[corev1.ResourceName]resource.Quantity{}
287+
container.Resources.WithRequests(corev1.ResourceList{})
261288
}
262289
// overwrite the requests and limits.
263-
container.Resources.Requests[k] = v
290+
(*container.Resources.Requests)[k] = v
264291

265292
if container.Resources.Limits == nil {
266-
container.Resources.Limits = map[corev1.ResourceName]resource.Quantity{}
293+
container.Resources.WithLimits(corev1.ResourceList{})
267294
}
268295
// overwrite the requests and limits.
269-
container.Resources.Limits[k] = v
296+
(*container.Resources.Limits)[k] = v
270297
}
271298
break
272299
}

0 commit comments

Comments
 (0)