diff --git a/charts/llm-d/Chart.yaml b/charts/llm-d/Chart.yaml
index 7752df9..7cbbfaa 100644
--- a/charts/llm-d/Chart.yaml
+++ b/charts/llm-d/Chart.yaml
@@ -1,7 +1,7 @@
apiVersion: v2
name: llm-d
type: application
-version: 0.0.9
+version: 0.1.0
appVersion: "0.0.1"
icon: data:null
description: A Helm chart for llm-d
diff --git a/charts/llm-d/README.md b/charts/llm-d/README.md
index 6780f01..cb4002b 100644
--- a/charts/llm-d/README.md
+++ b/charts/llm-d/README.md
@@ -1,7 +1,7 @@
# llm-d Helm Chart for OpenShift
-
+

A Helm chart for llm-d
@@ -168,12 +168,13 @@ Kubernetes: `>= 1.25.0-0`
| ingress.tls.enabled | Enable TLS configuration for the host defined at `ingress.host` parameter | bool | `false` |
| ingress.tls.secretName | The name to which the TLS Secret will be called | string | `""` |
| kubeVersion | Override Kubernetes version | string | `""` |
-| modelservice | Model service controller configuration | object | `{"annotations":{},"enabled":true,"epp":{"image":{"imagePullPolicy":"IfNotPresent","registry":"quay.io","repository":"llm-d/llm-d-gateway-api-inference-extension-dev","tag":"0.0.5-amd64"}},"fullnameOverride":"","image":{"imagePullPolicy":"Always","registry":"quay.io","repository":"llm-d/llm-d-model-service-dev","tag":"0.0.7"},"nameOverride":"","podAnnotations":{},"podLabels":{},"rbac":{"create":true},"replicas":1,"routingProxy":{"image":{"imagePullPolicy":"Always","registry":"quay.io","repository":"llm-d/llm-d-routing-sidecar-dev","tag":"0.0.6"}},"service":{"enabled":true,"port":8443,"type":"ClusterIP"},"serviceAccount":{"annotations":{},"create":true,"fullnameOverride":"","labels":{},"nameOverride":""},"vllm":{"image":{"imagePullPolicy":"IfNotPresent","registry":"quay.io","repository":"llm-d/llm-d-dev","tag":"0.0.5"}},"vllmSim":{"image":{"imagePullPolicy":"IfNotPresent","registry":"quay.io","repository":"llm-d/vllm-sim-dev","tag":"0.0.4"}}}` |
+| modelservice | Model service controller configuration | object | `{"annotations":{},"enabled":true,"epp":{"image":{"imagePullPolicy":"IfNotPresent","registry":"quay.io","repository":"llm-d/llm-d-gateway-api-inference-extension-dev","tag":"0.0.5-amd64"},"metrics":{"enabled":true}},"fullnameOverride":"","image":{"imagePullPolicy":"Always","registry":"quay.io","repository":"llm-d/llm-d-model-service","tag":"0.0.6"},"metrics":{"enabled":true},"nameOverride":"","podAnnotations":{},"podLabels":{},"rbac":{"create":true},"replicas":1,"routingProxy":{"image":{"imagePullPolicy":"Always","registry":"quay.io","repository":"llm-d/llm-d-routing-sidecar-dev","tag":"0.0.6"}},"service":{"enabled":true,"port":8443,"type":"ClusterIP"},"serviceAccount":{"annotations":{},"create":true,"fullnameOverride":"","labels":{},"nameOverride":""},"vllm":{"image":{"imagePullPolicy":"IfNotPresent","registry":"quay.io","repository":"llm-d/llm-d-dev","tag":"0.0.5"},"metrics":{"enabled":true}},"vllmSim":{"image":{"imagePullPolicy":"IfNotPresent","registry":"quay.io","repository":"llm-d/vllm-sim-dev","tag":"0.0.4"}}}` |
| modelservice.annotations | Annotations to add to all modelservice resources | object | `{}` |
| modelservice.enabled | Toggle to deploy modelservice controller related resources | bool | `true` |
-| modelservice.epp | Endpoint picker image used in ModelService CR presets | object | `{"image":{"imagePullPolicy":"IfNotPresent","registry":"quay.io","repository":"llm-d/llm-d-gateway-api-inference-extension-dev","tag":"0.0.5-amd64"}}` |
+| modelservice.epp | Endpoint picker image used in ModelService CR presets | object | `{"image":{"imagePullPolicy":"IfNotPresent","registry":"quay.io","repository":"llm-d/llm-d-gateway-api-inference-extension-dev","tag":"0.0.5-amd64"},"metrics":{"enabled":true}}` |
| modelservice.fullnameOverride | String to fully override modelservice.fullname | string | `""` |
-| modelservice.image | Modelservice controller image, please change only if appropriate adjustments to the CRD are being made | object | `{"imagePullPolicy":"Always","registry":"quay.io","repository":"llm-d/llm-d-model-service-dev","tag":"0.0.7"}` |
+| modelservice.image | Modelservice controller image, please change only if appropriate adjustments to the CRD are being made | object | `{"imagePullPolicy":"Always","registry":"quay.io","repository":"llm-d/llm-d-model-service","tag":"0.0.6"}` |
+| modelservice.metrics | Enable metrics gathering via podMonitor / ServiceMonitor | object | `{"enabled":true}` |
| modelservice.nameOverride | String to partially override modelservice.fullname | string | `""` |
| modelservice.podAnnotations | Pod annotations for modelservice | object | `{}` |
| modelservice.podLabels | Pod labels for modelservice | object | `{}` |
@@ -189,15 +190,15 @@ Kubernetes: `>= 1.25.0-0`
| modelservice.serviceAccount.fullnameOverride | String to fully override modelservice.serviceAccountName, defaults to modelservice.fullname | string | `""` |
| modelservice.serviceAccount.labels | Additional custom labels to the service ServiceAccount. | object | `{}` |
| modelservice.serviceAccount.nameOverride | String to partially override modelservice.serviceAccountName, defaults to modelservice.fullname | string | `""` |
-| modelservice.vllm | vLLM image used in ModelService CR presets | object | `{"image":{"imagePullPolicy":"IfNotPresent","registry":"quay.io","repository":"llm-d/llm-d-dev","tag":"0.0.5"}}` |
+| modelservice.vllm | vLLM image used in ModelService CR presets | object | `{"image":{"imagePullPolicy":"IfNotPresent","registry":"quay.io","repository":"llm-d/llm-d-dev","tag":"0.0.5"},"metrics":{"enabled":true}}` |
| modelservice.vllmSim | vLLM sim image used in ModelService CR presets | object | `{"image":{"imagePullPolicy":"IfNotPresent","registry":"quay.io","repository":"llm-d/vllm-sim-dev","tag":"0.0.4"}}` |
| nameOverride | String to partially override common.names.fullname | string | `""` |
| redis | Bitnami/Redis chart configuration | object | Use sane defaults for minimal Redis deployment |
-| sampleApplication | Sample application deploying a p-d pair of specific model | object | `{"enabled":true,"inferencePoolPort":8000,"modelArtifactUri":"pvc://llama-3.2-3b-instruct-pvc/models/meta-llama/Llama-32-3B-Instruct","modelName":"Llama-32-3B-Instruct","modelPath":"/cache/models/meta-llama/Llama-3.2-3B-Instruct","resources":{"limits":{"nvidia.com/gpu":1},"requests":{"nvidia.com/gpu":1}}}` |
+| sampleApplication | Sample application deploying a p-d pair of specific model | object | `{"enabled":true,"inferencePoolPort":8000,"modelArtifactUri":"pvc://llama-3.2-3b-instruct-pvc/models/meta-llama/Llama-32-3B-Instruct","modelName":"Llama-3.2-3B-Instruct","modelPath":"/cache/models/meta-llama/Llama-3.2-3B-Instruct","resources":{"limits":{"nvidia.com/gpu":1},"requests":{"nvidia.com/gpu":1}}}` |
| sampleApplication.enabled | Enable rendering of sample application resources | bool | `true` |
| sampleApplication.inferencePoolPort | InferencePool port configuration | int | `8000` |
| sampleApplication.modelArtifactUri | Location where the model can be loaded from. Currently supports pvc:// backed by preexisting PVC | string | `"pvc://llama-3.2-3b-instruct-pvc/models/meta-llama/Llama-32-3B-Instruct"` |
-| sampleApplication.modelName | Specify the model name as it is available to the api | string | `"Llama-32-3B-Instruct"` |
+| sampleApplication.modelName | Specify the model name as it is available to the api | string | `"Llama-3.2-3B-Instruct"` |
| sampleApplication.modelPath | Specify the filepath for the model | string | `"/cache/models/meta-llama/Llama-3.2-3B-Instruct"` |
| sampleApplication.resources | Resource requests/limits
Ref: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-requests-and-limits-of-pod-and-container | object | `{"limits":{"nvidia.com/gpu":1},"requests":{"nvidia.com/gpu":1}}` |
| test | Helm tests | object | `{"enabled":false}` |
diff --git a/charts/llm-d/templates/_helpers.tpl b/charts/llm-d/templates/_helpers.tpl
index b446665..f2e3157 100644
--- a/charts/llm-d/templates/_helpers.tpl
+++ b/charts/llm-d/templates/_helpers.tpl
@@ -6,3 +6,7 @@ FDQN for Redis master service in ..svc.cluster.local:
{{- $port := default 6379 .Values.redis.master.service.ports.redis -}}
{{- printf "%s-redis-master.%s.svc.cluster.local:%v" $name .Release.Namespace $port -}}
{{- end }}
+
+{{- define "metrics.label" -}}
+llmd.ai/gather-metrics: "true"
+{{- end }}
diff --git a/charts/llm-d/templates/inference-gateway/gateway.yaml b/charts/llm-d/templates/inference-gateway/gateway.yaml
index dafa3fe..7ba616e 100644
--- a/charts/llm-d/templates/inference-gateway/gateway.yaml
+++ b/charts/llm-d/templates/inference-gateway/gateway.yaml
@@ -5,6 +5,7 @@ metadata:
name: {{ include "gateway.fullname" . }}
labels:
{{- include "common.labels.standard" . | nindent 4 }}
+ app.kubernetes.io/gateway: {{ include "gateway.fullname" . }}
app.kubernetes.io/component: inference-gateway
{{- if .Values.commonLabels }}
{{- include "common.tplvalues.render" ( dict "value" .Values.commonLabels "context" . ) | nindent 4 }}
diff --git a/charts/llm-d/templates/inference-gateway/gatewayparameters.yaml b/charts/llm-d/templates/inference-gateway/gatewayparameters.yaml
index 0197d4c..9127d3a 100644
--- a/charts/llm-d/templates/inference-gateway/gatewayparameters.yaml
+++ b/charts/llm-d/templates/inference-gateway/gatewayparameters.yaml
@@ -4,6 +4,7 @@ kind: GatewayParameters
metadata:
name: {{ include "gateway.fullname" . }}
labels: {{ include "common.labels.standard" . | nindent 4 }}
+ app.kubernetes.io/gateway: {{ include "gateway.fullname" . }}
app.kubernetes.io/component: inference-gateway
{{- if .Values.commonLabels }}
{{- include "common.tplvalues.render" ( dict "value" .Values.commonLabels "context" $ ) | nindent 4 }}
diff --git a/charts/llm-d/templates/modelservice/deployment.yaml b/charts/llm-d/templates/modelservice/deployment.yaml
index b60f26d..de26a21 100644
--- a/charts/llm-d/templates/modelservice/deployment.yaml
+++ b/charts/llm-d/templates/modelservice/deployment.yaml
@@ -47,8 +47,6 @@ spec:
- endpoint-picker-clusterrole
- --epp-pull-secrets
- {{ include "common.images.renderImagePullSecretsString" (dict "images" (list .Values.modelservice.epp.image) "context" $) }}
- - --pd-cluster-role
- - prefill-decode-clusterrole
- --pd-pull-secrets
- {{ include "common.images.renderImagePullSecretsString" (dict "images" (list .Values.modelservice.vllm.image) "context" $) }}
# MSV2 HACK END
diff --git a/charts/llm-d/templates/modelservice/ms-v2-hack/clusterRole-pd.yaml b/charts/llm-d/templates/modelservice/ms-v2-hack/clusterRole-pd.yaml
deleted file mode 100644
index f318f01..0000000
--- a/charts/llm-d/templates/modelservice/ms-v2-hack/clusterRole-pd.yaml
+++ /dev/null
@@ -1,5 +0,0 @@
-kind: ClusterRole
-apiVersion: rbac.authorization.k8s.io/v1
-metadata:
- name: prefill-decode-clusterrole
-rules:
diff --git a/charts/llm-d/templates/modelservice/presets/basic-gpu-with-nixl-preset.yaml b/charts/llm-d/templates/modelservice/presets/basic-gpu-with-nixl-preset.yaml
index 824100b..314a41d 100644
--- a/charts/llm-d/templates/modelservice/presets/basic-gpu-with-nixl-preset.yaml
+++ b/charts/llm-d/templates/modelservice/presets/basic-gpu-with-nixl-preset.yaml
@@ -219,7 +219,9 @@ data:
kind: Service
metadata:
labels:
- llmd.ai/gather-metrics: "true"
+ {{- if .Values.modelservice.vllm.metrics.enabled }}
+ {{ include "metrics.label" . }}
+ {{- end }}
spec:
clusterIP: None
ports:
@@ -235,7 +237,9 @@ data:
kind: Service
metadata:
labels:
- llmd.ai/gather-metrics: "true"
+ {{- if .Values.modelservice.vllm.metrics.enabled }}
+ {{ include "metrics.label" . }}
+ {{- end }}
spec:
clusterIP: None
ports:
@@ -249,6 +253,12 @@ data:
eppService: |
apiVersion: v1
kind: Service
+ metadata:
+ labels:
+ app.kubernetes.io/gateway: {{ include "gateway.fullname" . }}
+ {{- if .Values.modelservice.epp.metrics.enabled }}
+ {{ include "metrics.label" . }}
+ {{- end}}
spec:
ports:
- port: 9002 # Needs to match the port of the eppDeployment
@@ -261,20 +271,31 @@ data:
protocol: TCP
name: metrics
type: NodePort # accepts "LoadBalancer" or "NodePort"
+ selector:
+ app.kubernetes.io/gateway: {{ include "gateway.fullname" . }}
eppDeployment: |
apiVersion: apps/v1
kind: Deployment
+ metadata:
+ labels:
+ app.kubernetes.io/gateway: {{ include "gateway.fullname" . }}
spec:
+ selector:
+ matchLabels:
+ app.kubernetes.io/gateway: {{ include "gateway.fullname" . }}
template:
+ metadata:
+ labels:
+ app.kubernetes.io/gateway: {{ include "gateway.fullname" . }}
spec:
serviceAccountName: endpoint-picker-sa # manually created in workaround w/ proper RBAC
containers:
- args:
- --poolName
- - {{ include "modelservice.fullname" . }}
+ - {{`"{{ .InferencePoolName }}"`}}
- --poolNamespace
- - {{ .Release.Namespace }}
+ - {{`"{{ .ModelServiceNamespace }}"`}}
- -v
- "4"
- --zap-encoder
@@ -301,7 +322,7 @@ data:
# failureThreshold: 3
# grpc:
# port: 9003
- # service: "llama-32-3b-instruct-epp-service"
+ # service: {{`"{{ .EPPServiceName }}"`}}
# initialDelaySeconds: 5
# periodSeconds: 10
# successThreshold: 1
@@ -310,7 +331,7 @@ data:
# failureThreshold: 3
# grpc:
# port: 9003
- # service: "llama-32-3b-instruct-epp-service"
+ # service: {{`"{{ .EPPServiceName }}"`}}
# initialDelaySeconds: 5
# periodSeconds: 10
# successThreshold: 1
diff --git a/charts/llm-d/templates/modelservice/service.yaml b/charts/llm-d/templates/modelservice/service.yaml
index 09dcd24..4bed1d4 100644
--- a/charts/llm-d/templates/modelservice/service.yaml
+++ b/charts/llm-d/templates/modelservice/service.yaml
@@ -9,6 +9,9 @@ metadata:
{{- if .Values.commonLabels }}
{{- include "common.tplvalues.render" ( dict "value" .Values.commonLabels "context" $ ) | nindent 4 }}
{{- end }}
+ {{- if .Values.modelservice.metrics.enabled }}
+ {{ include "metrics.label" . }}
+ {{- end }}
annotations:
{{- if .Values.commonAnnotations }}
{{- include "common.tplvalues.render" ( dict "value" .Values.commonAnnotations "context" $ ) | nindent 4 }}
diff --git a/charts/llm-d/templates/sample-application/_helpers.tpl b/charts/llm-d/templates/sample-application/_helpers.tpl
index ce8a70d..dbe4179 100644
--- a/charts/llm-d/templates/sample-application/_helpers.tpl
+++ b/charts/llm-d/templates/sample-application/_helpers.tpl
@@ -6,6 +6,7 @@ Sanitize the model name into a valid k8s label.
{{- $name = regexReplaceAll "[^a-z0-9_.-]" $name "-" -}}
{{- $name = regexReplaceAll "^[\\-._]+" $name "" -}}
{{- $name = regexReplaceAll "[\\-._]+$" $name "" -}}
+ {{- $name = regexReplaceAll "\\." $name "-" -}}
{{- if gt (len $name) 63 -}}
{{- $name = substr 0 63 $name -}}
diff --git a/charts/llm-d/templates/sample-application/ingress.yaml b/charts/llm-d/templates/sample-application/ingress.yaml
index 9213983..d4babfc 100644
--- a/charts/llm-d/templates/sample-application/ingress.yaml
+++ b/charts/llm-d/templates/sample-application/ingress.yaml
@@ -4,6 +4,7 @@ kind: Ingress
metadata:
name: {{ include "gateway.fullname" . }}
labels: {{ include "common.labels.standard" $ | nindent 4 }}
+ app.kubernetes.io/gateway: {{ include "gateway.fullname" . }}
app.kubernetes.io/component: sample-application
{{- if $.Values.commonLabels }}
{{- include "common.tplvalues.render" ( dict "value" $.Values.commonLabels "context" $ ) | nindent 4 }}
diff --git a/charts/llm-d/values.yaml b/charts/llm-d/values.yaml
index 35e5979..5a0c06e 100644
--- a/charts/llm-d/values.yaml
+++ b/charts/llm-d/values.yaml
@@ -53,7 +53,7 @@ sampleApplication:
enabled: true
# -- Specify the model name as it is available to the api
- modelName: Llama-32-3B-Instruct
+ modelName: Llama-3.2-3B-Instruct
# -- Specify the filepath for the model
modelPath: /cache/models/meta-llama/Llama-3.2-3B-Instruct
@@ -168,6 +168,10 @@ modelservice:
# -- Toggle to deploy modelservice controller related resources
enabled: true
+ # -- Enable metrics gathering via podMonitor / ServiceMonitor
+ metrics:
+ enabled: true
+
# -- String to fully override modelservice.fullname
fullnameOverride: ""
@@ -180,8 +184,8 @@ modelservice:
# -- Modelservice controller image, please change only if appropriate adjustments to the CRD are being made
image:
registry: quay.io
- repository: llm-d/llm-d-model-service-dev
- tag: "0.0.7"
+ repository: llm-d/llm-d-model-service
+ tag: "0.0.6"
imagePullPolicy: "Always"
# -- Endpoint picker image used in ModelService CR presets
@@ -191,6 +195,8 @@ modelservice:
repository: llm-d/llm-d-gateway-api-inference-extension-dev
tag: "0.0.5-amd64"
imagePullPolicy: "IfNotPresent"
+ metrics:
+ enabled: true
# -- vLLM image used in ModelService CR presets
vllm:
@@ -199,6 +205,8 @@ modelservice:
repository: llm-d/llm-d-dev
tag: "0.0.5"
imagePullPolicy: "IfNotPresent"
+ metrics:
+ enabled: true
# -- Routing proxy image used in ModelService CR presets
routingProxy: