Skip to content

Commit 24913b1

Browse files
authored
Add global configmap (#431)
* Add global configmap Signed-off-by: kerthcet <kerthcet@gmail.com> * fix scheduler name Signed-off-by: kerthcet <kerthcet@gmail.com> * revert version Signed-off-by: kerthcet <kerthcet@gmail.com> --------- Signed-off-by: kerthcet <kerthcet@gmail.com>
1 parent cd84215 commit 24913b1

21 files changed

Lines changed: 233 additions & 22 deletions

File tree

Makefile

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ include Makefile-deps.mk
22

33
# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
44
ENVTEST_K8S_VERSION = 1.32.0
5-
ENVTEST_LWS_VERSION = v0.5.1
65

76
# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set)
87
ifeq (,$(shell go env GOBIN))
@@ -84,9 +83,7 @@ manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and Cust
8483
rbac:roleName=manager-role output:rbac:artifacts:config=config/rbac \
8584
crd:generateEmbeddedObjectMeta=true output:crd:artifacts:config=config/crd/bases \
8685
webhook output:webhook:artifacts:config=config/webhook \
87-
paths="./cmd/..."
88-
paths="./api/..."
89-
paths="./pkg/..."
86+
paths="./api/...;./pkg/...;./cmd/..."
9087

9188
.PHONY: generate
9289
generate: controller-gen code-generator ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations.
@@ -136,7 +133,7 @@ test-integration: manifests fmt vet envtest ginkgo ## Run integration tests.
136133

137134
.PHONY: test-e2e
138135
test-e2e: kustomize manifests fmt vet envtest ginkgo kind-image-build
139-
E2E_KIND_NODE_VERSION=$(E2E_KIND_NODE_VERSION) KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) KIND=$(KIND) KUBECTL=$(KUBECTL) KUSTOMIZE=$(KUSTOMIZE) GINKGO=$(GINKGO) USE_EXISTING_CLUSTER=$(USE_EXISTING_CLUSTER) IMAGE_TAG=$(IMG) ENVTEST_LWS_VERSION=$(ENVTEST_LWS_VERSION) ./hack/e2e-test.sh
136+
E2E_KIND_NODE_VERSION=$(E2E_KIND_NODE_VERSION) KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) KIND=$(KIND) KUBECTL=$(KUBECTL) KUSTOMIZE=$(KUSTOMIZE) GINKGO=$(GINKGO) USE_EXISTING_CLUSTER=$(USE_EXISTING_CLUSTER) IMAGE_TAG=$(IMG) ./hack/e2e-test.sh
140137

141138
test-deploy-with-helm: kind-image-build
142139
E2E_KIND_NODE_VERSION=$(E2E_KIND_NODE_VERSION) KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) KIND=$(KIND) KUBECTL=$(KUBECTL) USE_EXISTING_CLUSTER=$(USE_EXISTING_CLUSTER) IMAGE_TAG=$(IMG) TAG=$(GIT_TAG) ./hack/test-deploy-with-helm.sh

api/core/v1alpha1/model_types.go

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,6 @@ type ModelHub struct {
7171
// URIProtocol represents the protocol of the URI.
7272
type URIProtocol string
7373

74-
// Add roles for operating leaderWorkerSet.
75-
//
76-
// +kubebuilder:rbac:groups=leaderworkerset.x-k8s.io,resources=leaderworkersets,verbs=get;list;watch;create;update;patch;delete
77-
// +kubebuilder:rbac:groups=leaderworkerset.x-k8s.io,resources=leaderworkersets/status,verbs=get;update;patch
78-
7974
// ModelSource represents the source of the model.
8075
// Only one model source will be used.
8176
type ModelSource struct {

config/crd/bases/inference.llmaz.io_playgrounds.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -897,7 +897,7 @@ spec:
897897
description: |-
898898
InferenceFlavors represents a list of flavor names with fungibility supported
899899
to serve the model.
900-
- If not set, always apply with the 0-index model by default.
900+
- If not set, will employ the model configured flavors by default.
901901
- If set, will lookup the flavor names following the model orders.
902902
items:
903903
type: string

config/crd/bases/inference.llmaz.io_services.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ spec:
5252
description: |-
5353
InferenceFlavors represents a list of flavor names with fungibility supported
5454
to serve the model.
55-
- If not set, always apply with the 0-index model by default.
55+
- If not set, will employ the model configured flavors by default.
5656
- If set, will lookup the flavor names following the model orders.
5757
items:
5858
type: string

config/default/configmap.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
apiVersion: v1
2+
kind: ConfigMap
3+
metadata:
4+
name: global-config
5+
data:
6+
config.data: |
7+
scheduler-name: default-scheduler
8+
# init-container-image: inftyai/model-loader:v0.0.10

config/default/kustomization.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ resources:
2222
# crd/kustomization.yaml
2323
- ../webhook
2424

25+
# [customized]
26+
- configmap.yaml
27+
2528
# [INTERNALCERT]
2629
- ../internalcert
2730

config/prometheus/monitor.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,4 @@ spec:
2121
selector:
2222
matchLabels:
2323
app.kubernetes.io/name: llmaz
24+
control-plane: controller-manager

config/rbac/role.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,13 @@ kind: ClusterRole
44
metadata:
55
name: manager-role
66
rules:
7+
- apiGroups:
8+
- ""
9+
resources:
10+
- configmaps
11+
verbs:
12+
- get
13+
- list
714
- apiGroups:
815
- ""
916
resources:
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
apiVersion: llmaz.io/v1alpha1
2+
kind: OpenModel
3+
metadata:
4+
name: qwen3-0--6b
5+
spec:
6+
familyName: qwen3
7+
source:
8+
modelHub:
9+
modelID: Qwen/Qwen3-0.6B
10+
inferenceConfig:
11+
flavors:
12+
- name: t4 # GPU type
13+
limits:
14+
nvidia.com/gpu: 1
15+
---
16+
apiVersion: inference.llmaz.io/v1alpha1
17+
kind: Playground
18+
metadata:
19+
name: qwen3-0--6b
20+
spec:
21+
replicas: 1
22+
modelClaim:
23+
modelName: qwen3-0--6b
24+
backendRuntimeConfig:
25+
backendName: vllm
26+
version: v0.8.5
27+
---
28+
apiVersion: gateway.networking.k8s.io/v1
29+
kind: GatewayClass
30+
metadata:
31+
name: default-envoy-ai-gateway
32+
spec:
33+
controllerName: gateway.envoyproxy.io/gatewayclass-controller
34+
---
35+
apiVersion: gateway.networking.k8s.io/v1
36+
kind: Gateway
37+
metadata:
38+
name: default-envoy-ai-gateway
39+
spec:
40+
gatewayClassName: default-envoy-ai-gateway
41+
listeners:
42+
- name: http
43+
protocol: HTTP
44+
port: 80
45+
---
46+
apiVersion: aigateway.envoyproxy.io/v1alpha1
47+
kind: AIGatewayRoute
48+
metadata:
49+
name: default-envoy-ai-gateway
50+
spec:
51+
schema:
52+
name: OpenAI
53+
targetRefs:
54+
- name: default-envoy-ai-gateway
55+
kind: Gateway
56+
group: gateway.networking.k8s.io
57+
rules:
58+
- matches:
59+
- headers:
60+
- type: Exact
61+
name: x-ai-eg-model
62+
value: qwen3-0--6b
63+
backendRefs:
64+
- name: qwen3-0--6b
65+
---
66+
apiVersion: aigateway.envoyproxy.io/v1alpha1
67+
kind: AIServiceBackend
68+
metadata:
69+
name: qwen3-0--6b
70+
spec:
71+
timeouts:
72+
request: 3m
73+
schema:
74+
name: OpenAI
75+
backendRef:
76+
name: qwen3-0--6b-lb
77+
kind: Service
78+
port: 8080
79+
---

docs/examples/envoy-ai-gateway/basic.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,14 +82,14 @@ spec:
8282
- headers:
8383
- type: Exact
8484
name: x-ai-eg-model
85-
value: qwen2-0.5b
85+
value: qwen2-0--5b
8686
backendRefs:
8787
- name: qwen2-0--5b
8888
- matches:
8989
- headers:
9090
- type: Exact
9191
name: x-ai-eg-model
92-
value: qwen2.5-coder
92+
value: qwen2--5-coder
9393
backendRefs:
9494
- name: qwen2--5-coder
9595
---

0 commit comments

Comments
 (0)