Skip to content

Commit e5d58c7

Browse files
committed
feat: add Topology Aware Scheduling support
Signed-off-by: Julien Mancuso <jmancuso@nvidia.com>
1 parent f385661 commit e5d58c7

29 files changed

Lines changed: 2024 additions & 27 deletions

deploy/helm/charts/crds/templates/nvidia.com_dynamocomponentdeployments.yaml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11052,6 +11052,27 @@ spec:
1105211052
subComponentType:
1105311053
description: SubComponentType indicates the sub-role of this component (for example, "prefill").
1105411054
type: string
11055+
topologyConstraint:
11056+
description: |-
11057+
TopologyConstraint for this service. When both this and spec.topologyConstraint
11058+
are set, this must be narrower than or equal to the spec-level constraint.
11059+
properties:
11060+
packDomain:
11061+
description: |-
11062+
PackDomain specifies the topology domain for grouping replicas.
11063+
Must be one of: region, zone, datacenter, block, rack, host, numa
11064+
enum:
11065+
- region
11066+
- zone
11067+
- datacenter
11068+
- block
11069+
- rack
11070+
- host
11071+
- numa
11072+
type: string
11073+
required:
11074+
- packDomain
11075+
type: object
1105511076
volumeMounts:
1105611077
description: VolumeMounts references PVCs defined at the top level for volumes to be mounted by the component.
1105711078
items:

deploy/helm/charts/crds/templates/nvidia.com_dynamographdeployments.yaml

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11261,6 +11261,27 @@ spec:
1126111261
subComponentType:
1126211262
description: SubComponentType indicates the sub-role of this component (for example, "prefill").
1126311263
type: string
11264+
topologyConstraint:
11265+
description: |-
11266+
TopologyConstraint for this service. When both this and spec.topologyConstraint
11267+
are set, this must be narrower than or equal to the spec-level constraint.
11268+
properties:
11269+
packDomain:
11270+
description: |-
11271+
PackDomain specifies the topology domain for grouping replicas.
11272+
Must be one of: region, zone, datacenter, block, rack, host, numa
11273+
enum:
11274+
- region
11275+
- zone
11276+
- datacenter
11277+
- block
11278+
- rack
11279+
- host
11280+
- numa
11281+
type: string
11282+
required:
11283+
- packDomain
11284+
type: object
1126411285
volumeMounts:
1126511286
description: VolumeMounts references PVCs defined at the top level for volumes to be mounted by the component.
1126611287
items:
@@ -11289,6 +11310,28 @@ spec:
1128911310
description: Services are the services to deploy as part of this deployment.
1129011311
maxProperties: 25
1129111312
type: object
11313+
topologyConstraint:
11314+
description: |-
11315+
TopologyConstraint is the deployment-level topology constraint.
11316+
When set, applies a broad topology constraint across the whole deployment.
11317+
Services without their own topologyConstraint inherit this value.
11318+
properties:
11319+
packDomain:
11320+
description: |-
11321+
PackDomain specifies the topology domain for grouping replicas.
11322+
Must be one of: region, zone, datacenter, block, rack, host, numa
11323+
enum:
11324+
- region
11325+
- zone
11326+
- datacenter
11327+
- block
11328+
- rack
11329+
- host
11330+
- numa
11331+
type: string
11332+
required:
11333+
- packDomain
11334+
type: object
1129211335
type: object
1129311336
status:
1129411337
description: Status reflects the current observed state of this graph deployment.

deploy/helm/charts/platform/.helmignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,5 @@ test-values.yaml
2626
minikube-demo-values.yaml
2727
# subchart sources
2828
components/
29+
bin/
30+
Makefile

deploy/helm/charts/platform/Chart.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ dependencies:
3535
repository: "https://charts.bitnami.com/bitnami"
3636
condition: global.etcd.install
3737
- name: kai-scheduler
38-
version: v0.9.4
38+
version: v0.13.0-rc1
3939
repository: oci://ghcr.io/nvidia/kai-scheduler
4040
condition: kai-scheduler.enabled
4141
- name: grove-charts
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
LOCALBIN ?= $(shell pwd)/bin
17+
$(LOCALBIN):
18+
mkdir -p $(LOCALBIN)
19+
20+
HELM_DOCS_VERSION ?= 1.14.2
21+
HELM_DOCS ?= $(LOCALBIN)/helm-docs-$(HELM_DOCS_VERSION)
22+
23+
##@ General
24+
25+
.PHONY: help
26+
help: ## Display this help
27+
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-25s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) }' $(MAKEFILE_LIST)
28+
29+
##@ Helm Documentation
30+
31+
.PHONY: helm-docs-install
32+
helm-docs-install: $(HELM_DOCS) ## Download helm-docs locally if necessary
33+
$(HELM_DOCS): $(LOCALBIN)
34+
@echo "Downloading helm-docs $(HELM_DOCS_VERSION)..."
35+
@ARCH=$$(uname -m); \
36+
OS=$$(uname -s | tr '[:upper:]' '[:lower:]'); \
37+
curl -sSL "https://github.com/norwoodj/helm-docs/releases/download/v$(HELM_DOCS_VERSION)/helm-docs_$(HELM_DOCS_VERSION)_$${OS}_$${ARCH}.tar.gz" | \
38+
tar xz -C $(LOCALBIN) helm-docs && \
39+
mv $(LOCALBIN)/helm-docs $(HELM_DOCS)
40+
41+
.PHONY: generate-helm-docs
42+
generate-helm-docs: helm-docs-install ## Generate README.md from values.yaml and README.md.gotmpl
43+
@echo "Generating Helm chart documentation..."
44+
@$(HELM_DOCS) \
45+
--template-files=README.md.gotmpl \
46+
--output-file=README.md \
47+
--sort-values-order=file \
48+
--chart-to-generate=. \
49+
--ignore-non-descriptions
50+
@echo "Generated README.md"
51+
52+
.PHONY: helm-docs-clean
53+
helm-docs-clean: ## Remove generated helm documentation
54+
@rm -f README.md
55+
@echo "Cleaned generated README.md"

deploy/helm/charts/platform/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ The chart includes built-in validation to prevent all operator conflicts:
101101
| https://charts.bitnami.com/bitnami | etcd | 12.0.18 |
102102
| https://nats-io.github.io/k8s/helm/charts/ | nats | 1.3.2 |
103103
| oci://ghcr.io/ai-dynamo/grove | grove(grove-charts) | v0.1.0-alpha.6 |
104-
| oci://ghcr.io/nvidia/kai-scheduler | kai-scheduler | v0.9.4 |
104+
| oci://ghcr.io/nvidia/kai-scheduler | kai-scheduler | v0.13.0-rc1 |
105105
106106
## Values
107107

deploy/helm/charts/platform/components/operator/templates/manager-rbac.yaml

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -145,13 +145,6 @@ rules:
145145
- get
146146
- patch
147147
- update
148-
- apiGroups:
149-
- scheduling.run.ai
150-
resources:
151-
- queues
152-
verbs:
153-
- get
154-
- list
155148
- apiGroups:
156149
- apps
157150
resources:
@@ -517,6 +510,46 @@ subjects:
517510
name: '{{ include "dynamo-operator.fullname" . }}-controller-manager'
518511
namespace: '{{ .Release.Namespace }}'
519512
---
513+
# ClusterRole for Grove ClusterTopology access
514+
# This is always a ClusterRole since ClusterTopology resources are cluster-scoped
515+
apiVersion: rbac.authorization.k8s.io/v1
516+
kind: ClusterRole
517+
metadata:
518+
name: {{ include "dynamo-operator.fullname" . }}-{{ .Release.Namespace }}-clustertopology-reader
519+
labels:
520+
app.kubernetes.io/component: rbac
521+
app.kubernetes.io/created-by: dynamo-operator
522+
app.kubernetes.io/part-of: dynamo-operator
523+
{{- include "dynamo-operator.labels" . | nindent 4 }}
524+
rules:
525+
- apiGroups:
526+
- grove.io
527+
resources:
528+
- clustertopologies
529+
verbs:
530+
- get
531+
- list
532+
- watch
533+
---
534+
# ClusterRoleBinding for Grove ClusterTopology access
535+
apiVersion: rbac.authorization.k8s.io/v1
536+
kind: ClusterRoleBinding
537+
metadata:
538+
name: {{ include "dynamo-operator.fullname" . }}-{{ .Release.Namespace }}-clustertopology-reader-binding
539+
labels:
540+
app.kubernetes.io/component: rbac
541+
app.kubernetes.io/created-by: dynamo-operator
542+
app.kubernetes.io/part-of: dynamo-operator
543+
{{- include "dynamo-operator.labels" . | nindent 4 }}
544+
roleRef:
545+
apiGroup: rbac.authorization.k8s.io
546+
kind: ClusterRole
547+
name: {{ include "dynamo-operator.fullname" . }}-{{ .Release.Namespace }}-clustertopology-reader
548+
subjects:
549+
- kind: ServiceAccount
550+
name: '{{ include "dynamo-operator.fullname" . }}-controller-manager'
551+
namespace: '{{ .Release.Namespace }}'
552+
---
520553
# ClusterRole for kai-scheduler queue access
521554
# This is always a ClusterRole since Queue resources are cluster-scoped
522555
apiVersion: rbac.authorization.k8s.io/v1

deploy/operator/Makefile

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ help: ## Display this help.
4545
##@ Development
4646

4747
.PHONY: check
48-
check: generate manifests generate-api-docs
48+
check: generate manifests generate-api-docs generate-helm-docs
4949
@echo "> Checking for uncommitted changes"
5050
@if [ -n "$$(git status --porcelain)" ]; then \
5151
echo "ERROR: Git tree is dirty after running validation steps."; \
@@ -342,6 +342,12 @@ generate-api-docs: crd-ref-docs ## Generate API reference documentation from CRD
342342
# across API versions; prepend "v1beta1 " to affected v1beta1 headings and links.
343343
python3 docs/fix-api-anchors.py ../../docs/pages/kubernetes/api-reference.md
344344

345+
HELM_CHART_DIR := ../helm/charts/platform
346+
347+
.PHONY: generate-helm-docs
348+
generate-helm-docs: ## Generate Helm chart README from values.yaml and template
349+
@$(MAKE) -C $(HELM_CHART_DIR) generate-helm-docs
350+
345351
.PHONY: coverage
346352
coverage: test
347353
go tool cover -func=cover.out

deploy/operator/api/v1alpha1/dynamocomponentdeployment_types.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,11 @@ type DynamoComponentDeploymentSharedSpec struct {
135135
// When enabled, pods can be restored from a checkpoint files for faster cold start.
136136
// +optional
137137
Checkpoint *ServiceCheckpointConfig `json:"checkpoint,omitempty"`
138+
139+
// TopologyConstraint for this service. When both this and spec.topologyConstraint
140+
// are set, this must be narrower than or equal to the spec-level constraint.
141+
// +optional
142+
TopologyConstraint *TopologyConstraint `json:"topologyConstraint,omitempty"`
138143
}
139144

140145
type MultinodeSpec struct {

deploy/operator/api/v1alpha1/dynamographdeployment_types.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,12 @@ type DynamoGraphDeploymentSpec struct {
7676
// Restart specifies the restart policy for the graph deployment.
7777
// +kubebuilder:validation:Optional
7878
Restart *Restart `json:"restart,omitempty"`
79+
80+
// TopologyConstraint is the deployment-level topology constraint.
81+
// When set, applies a broad topology constraint across the whole deployment.
82+
// Services without their own topologyConstraint inherit this value.
83+
// +optional
84+
TopologyConstraint *TopologyConstraint `json:"topologyConstraint,omitempty"`
7985
}
8086

8187
type Restart struct {
@@ -311,6 +317,19 @@ func (s *DynamoGraphDeployment) AddStatusCondition(condition metav1.Condition) {
311317
s.Status.Conditions = append(s.Status.Conditions, condition)
312318
}
313319

320+
// HasAnyTopologyConstraint reports whether any topology constraint is set at any level.
321+
func (s *DynamoGraphDeployment) HasAnyTopologyConstraint() bool {
322+
if s.Spec.TopologyConstraint != nil {
323+
return true
324+
}
325+
for _, svc := range s.Spec.Services {
326+
if svc != nil && svc.TopologyConstraint != nil {
327+
return true
328+
}
329+
}
330+
return false
331+
}
332+
314333
// HasAnyMultinodeService reports whether any service in the graph is configured with more than one node.
315334
func (s *DynamoGraphDeployment) HasAnyMultinodeService() bool {
316335
for _, svc := range s.Spec.Services {

0 commit comments

Comments
 (0)