diff --git a/charts/llm-d/Chart.yaml b/charts/llm-d/Chart.yaml index 7752df9..7cbbfaa 100644 --- a/charts/llm-d/Chart.yaml +++ b/charts/llm-d/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v2 name: llm-d type: application -version: 0.0.9 +version: 0.1.0 appVersion: "0.0.1" icon: data:null description: A Helm chart for llm-d diff --git a/charts/llm-d/README.md b/charts/llm-d/README.md index 6780f01..cb4002b 100644 --- a/charts/llm-d/README.md +++ b/charts/llm-d/README.md @@ -1,7 +1,7 @@ # llm-d Helm Chart for OpenShift -![Version: 0.0.9](https://img.shields.io/badge/Version-0.0.9-informational?style=flat-square) +![Version: 0.1.0](https://img.shields.io/badge/Version-0.1.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) A Helm chart for llm-d @@ -168,12 +168,13 @@ Kubernetes: `>= 1.25.0-0` | ingress.tls.enabled | Enable TLS configuration for the host defined at `ingress.host` parameter | bool | `false` | | ingress.tls.secretName | The name to which the TLS Secret will be called | string | `""` | | kubeVersion | Override Kubernetes version | string | `""` | -| modelservice | Model service controller configuration | object | `{"annotations":{},"enabled":true,"epp":{"image":{"imagePullPolicy":"IfNotPresent","registry":"quay.io","repository":"llm-d/llm-d-gateway-api-inference-extension-dev","tag":"0.0.5-amd64"}},"fullnameOverride":"","image":{"imagePullPolicy":"Always","registry":"quay.io","repository":"llm-d/llm-d-model-service-dev","tag":"0.0.7"},"nameOverride":"","podAnnotations":{},"podLabels":{},"rbac":{"create":true},"replicas":1,"routingProxy":{"image":{"imagePullPolicy":"Always","registry":"quay.io","repository":"llm-d/llm-d-routing-sidecar-dev","tag":"0.0.6"}},"service":{"enabled":true,"port":8443,"type":"ClusterIP"},"serviceAccount":{"annotations":{},"create":true,"fullnameOverride":"","labels":{},"nameOverride":""},"vllm":{"image":{"imagePullPolicy":"IfNotPresent","registry":"quay.io","repository":"llm-d/llm-d-dev","tag":"0.0.5"}},"vllmSim":{"image":{"imagePullPolicy":"IfNotPresent","registry":"quay.io","repository":"llm-d/vllm-sim-dev","tag":"0.0.4"}}}` | +| modelservice | Model service controller configuration | object | `{"annotations":{},"enabled":true,"epp":{"image":{"imagePullPolicy":"IfNotPresent","registry":"quay.io","repository":"llm-d/llm-d-gateway-api-inference-extension-dev","tag":"0.0.5-amd64"},"metrics":{"enabled":true}},"fullnameOverride":"","image":{"imagePullPolicy":"Always","registry":"quay.io","repository":"llm-d/llm-d-model-service","tag":"0.0.6"},"metrics":{"enabled":true},"nameOverride":"","podAnnotations":{},"podLabels":{},"rbac":{"create":true},"replicas":1,"routingProxy":{"image":{"imagePullPolicy":"Always","registry":"quay.io","repository":"llm-d/llm-d-routing-sidecar-dev","tag":"0.0.6"}},"service":{"enabled":true,"port":8443,"type":"ClusterIP"},"serviceAccount":{"annotations":{},"create":true,"fullnameOverride":"","labels":{},"nameOverride":""},"vllm":{"image":{"imagePullPolicy":"IfNotPresent","registry":"quay.io","repository":"llm-d/llm-d-dev","tag":"0.0.5"},"metrics":{"enabled":true}},"vllmSim":{"image":{"imagePullPolicy":"IfNotPresent","registry":"quay.io","repository":"llm-d/vllm-sim-dev","tag":"0.0.4"}}}` | | modelservice.annotations | Annotations to add to all modelservice resources | object | `{}` | | modelservice.enabled | Toggle to deploy modelservice controller related resources | bool | `true` | -| modelservice.epp | Endpoint picker image used in ModelService CR presets | object | `{"image":{"imagePullPolicy":"IfNotPresent","registry":"quay.io","repository":"llm-d/llm-d-gateway-api-inference-extension-dev","tag":"0.0.5-amd64"}}` | +| modelservice.epp | Endpoint picker image used in ModelService CR presets | object | `{"image":{"imagePullPolicy":"IfNotPresent","registry":"quay.io","repository":"llm-d/llm-d-gateway-api-inference-extension-dev","tag":"0.0.5-amd64"},"metrics":{"enabled":true}}` | | modelservice.fullnameOverride | String to fully override modelservice.fullname | string | `""` | -| modelservice.image | Modelservice controller image, please change only if appropriate adjustments to the CRD are being made | object | `{"imagePullPolicy":"Always","registry":"quay.io","repository":"llm-d/llm-d-model-service-dev","tag":"0.0.7"}` | +| modelservice.image | Modelservice controller image, please change only if appropriate adjustments to the CRD are being made | object | `{"imagePullPolicy":"Always","registry":"quay.io","repository":"llm-d/llm-d-model-service","tag":"0.0.6"}` | +| modelservice.metrics | Enable metrics gathering via podMonitor / ServiceMonitor | object | `{"enabled":true}` | | modelservice.nameOverride | String to partially override modelservice.fullname | string | `""` | | modelservice.podAnnotations | Pod annotations for modelservice | object | `{}` | | modelservice.podLabels | Pod labels for modelservice | object | `{}` | @@ -189,15 +190,15 @@ Kubernetes: `>= 1.25.0-0` | modelservice.serviceAccount.fullnameOverride | String to fully override modelservice.serviceAccountName, defaults to modelservice.fullname | string | `""` | | modelservice.serviceAccount.labels | Additional custom labels to the service ServiceAccount. | object | `{}` | | modelservice.serviceAccount.nameOverride | String to partially override modelservice.serviceAccountName, defaults to modelservice.fullname | string | `""` | -| modelservice.vllm | vLLM image used in ModelService CR presets | object | `{"image":{"imagePullPolicy":"IfNotPresent","registry":"quay.io","repository":"llm-d/llm-d-dev","tag":"0.0.5"}}` | +| modelservice.vllm | vLLM image used in ModelService CR presets | object | `{"image":{"imagePullPolicy":"IfNotPresent","registry":"quay.io","repository":"llm-d/llm-d-dev","tag":"0.0.5"},"metrics":{"enabled":true}}` | | modelservice.vllmSim | vLLM sim image used in ModelService CR presets | object | `{"image":{"imagePullPolicy":"IfNotPresent","registry":"quay.io","repository":"llm-d/vllm-sim-dev","tag":"0.0.4"}}` | | nameOverride | String to partially override common.names.fullname | string | `""` | | redis | Bitnami/Redis chart configuration | object | Use sane defaults for minimal Redis deployment | -| sampleApplication | Sample application deploying a p-d pair of specific model | object | `{"enabled":true,"inferencePoolPort":8000,"modelArtifactUri":"pvc://llama-3.2-3b-instruct-pvc/models/meta-llama/Llama-32-3B-Instruct","modelName":"Llama-32-3B-Instruct","modelPath":"/cache/models/meta-llama/Llama-3.2-3B-Instruct","resources":{"limits":{"nvidia.com/gpu":1},"requests":{"nvidia.com/gpu":1}}}` | +| sampleApplication | Sample application deploying a p-d pair of specific model | object | `{"enabled":true,"inferencePoolPort":8000,"modelArtifactUri":"pvc://llama-3.2-3b-instruct-pvc/models/meta-llama/Llama-32-3B-Instruct","modelName":"Llama-3.2-3B-Instruct","modelPath":"/cache/models/meta-llama/Llama-3.2-3B-Instruct","resources":{"limits":{"nvidia.com/gpu":1},"requests":{"nvidia.com/gpu":1}}}` | | sampleApplication.enabled | Enable rendering of sample application resources | bool | `true` | | sampleApplication.inferencePoolPort | InferencePool port configuration | int | `8000` | | sampleApplication.modelArtifactUri | Location where the model can be loaded from. Currently supports pvc:// backed by preexisting PVC | string | `"pvc://llama-3.2-3b-instruct-pvc/models/meta-llama/Llama-32-3B-Instruct"` | -| sampleApplication.modelName | Specify the model name as it is available to the api | string | `"Llama-32-3B-Instruct"` | +| sampleApplication.modelName | Specify the model name as it is available to the api | string | `"Llama-3.2-3B-Instruct"` | | sampleApplication.modelPath | Specify the filepath for the model | string | `"/cache/models/meta-llama/Llama-3.2-3B-Instruct"` | | sampleApplication.resources | Resource requests/limits
Ref: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#resource-requests-and-limits-of-pod-and-container | object | `{"limits":{"nvidia.com/gpu":1},"requests":{"nvidia.com/gpu":1}}` | | test | Helm tests | object | `{"enabled":false}` | diff --git a/charts/llm-d/templates/_helpers.tpl b/charts/llm-d/templates/_helpers.tpl index b446665..f2e3157 100644 --- a/charts/llm-d/templates/_helpers.tpl +++ b/charts/llm-d/templates/_helpers.tpl @@ -6,3 +6,7 @@ FDQN for Redis master service in ..svc.cluster.local: {{- $port := default 6379 .Values.redis.master.service.ports.redis -}} {{- printf "%s-redis-master.%s.svc.cluster.local:%v" $name .Release.Namespace $port -}} {{- end }} + +{{- define "metrics.label" -}} +llmd.ai/gather-metrics: "true" +{{- end }} diff --git a/charts/llm-d/templates/inference-gateway/gateway.yaml b/charts/llm-d/templates/inference-gateway/gateway.yaml index dafa3fe..7ba616e 100644 --- a/charts/llm-d/templates/inference-gateway/gateway.yaml +++ b/charts/llm-d/templates/inference-gateway/gateway.yaml @@ -5,6 +5,7 @@ metadata: name: {{ include "gateway.fullname" . }} labels: {{- include "common.labels.standard" . | nindent 4 }} + app.kubernetes.io/gateway: {{ include "gateway.fullname" . }} app.kubernetes.io/component: inference-gateway {{- if .Values.commonLabels }} {{- include "common.tplvalues.render" ( dict "value" .Values.commonLabels "context" . ) | nindent 4 }} diff --git a/charts/llm-d/templates/inference-gateway/gatewayparameters.yaml b/charts/llm-d/templates/inference-gateway/gatewayparameters.yaml index 0197d4c..9127d3a 100644 --- a/charts/llm-d/templates/inference-gateway/gatewayparameters.yaml +++ b/charts/llm-d/templates/inference-gateway/gatewayparameters.yaml @@ -4,6 +4,7 @@ kind: GatewayParameters metadata: name: {{ include "gateway.fullname" . }} labels: {{ include "common.labels.standard" . | nindent 4 }} + app.kubernetes.io/gateway: {{ include "gateway.fullname" . }} app.kubernetes.io/component: inference-gateway {{- if .Values.commonLabels }} {{- include "common.tplvalues.render" ( dict "value" .Values.commonLabels "context" $ ) | nindent 4 }} diff --git a/charts/llm-d/templates/modelservice/deployment.yaml b/charts/llm-d/templates/modelservice/deployment.yaml index b60f26d..de26a21 100644 --- a/charts/llm-d/templates/modelservice/deployment.yaml +++ b/charts/llm-d/templates/modelservice/deployment.yaml @@ -47,8 +47,6 @@ spec: - endpoint-picker-clusterrole - --epp-pull-secrets - {{ include "common.images.renderImagePullSecretsString" (dict "images" (list .Values.modelservice.epp.image) "context" $) }} - - --pd-cluster-role - - prefill-decode-clusterrole - --pd-pull-secrets - {{ include "common.images.renderImagePullSecretsString" (dict "images" (list .Values.modelservice.vllm.image) "context" $) }} # MSV2 HACK END diff --git a/charts/llm-d/templates/modelservice/ms-v2-hack/clusterRole-pd.yaml b/charts/llm-d/templates/modelservice/ms-v2-hack/clusterRole-pd.yaml deleted file mode 100644 index f318f01..0000000 --- a/charts/llm-d/templates/modelservice/ms-v2-hack/clusterRole-pd.yaml +++ /dev/null @@ -1,5 +0,0 @@ -kind: ClusterRole -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: prefill-decode-clusterrole -rules: diff --git a/charts/llm-d/templates/modelservice/presets/basic-gpu-with-nixl-preset.yaml b/charts/llm-d/templates/modelservice/presets/basic-gpu-with-nixl-preset.yaml index 824100b..314a41d 100644 --- a/charts/llm-d/templates/modelservice/presets/basic-gpu-with-nixl-preset.yaml +++ b/charts/llm-d/templates/modelservice/presets/basic-gpu-with-nixl-preset.yaml @@ -219,7 +219,9 @@ data: kind: Service metadata: labels: - llmd.ai/gather-metrics: "true" + {{- if .Values.modelservice.vllm.metrics.enabled }} + {{ include "metrics.label" . }} + {{- end }} spec: clusterIP: None ports: @@ -235,7 +237,9 @@ data: kind: Service metadata: labels: - llmd.ai/gather-metrics: "true" + {{- if .Values.modelservice.vllm.metrics.enabled }} + {{ include "metrics.label" . }} + {{- end }} spec: clusterIP: None ports: @@ -249,6 +253,12 @@ data: eppService: | apiVersion: v1 kind: Service + metadata: + labels: + app.kubernetes.io/gateway: {{ include "gateway.fullname" . }} + {{- if .Values.modelservice.epp.metrics.enabled }} + {{ include "metrics.label" . }} + {{- end}} spec: ports: - port: 9002 # Needs to match the port of the eppDeployment @@ -261,20 +271,31 @@ data: protocol: TCP name: metrics type: NodePort # accepts "LoadBalancer" or "NodePort" + selector: + app.kubernetes.io/gateway: {{ include "gateway.fullname" . }} eppDeployment: | apiVersion: apps/v1 kind: Deployment + metadata: + labels: + app.kubernetes.io/gateway: {{ include "gateway.fullname" . }} spec: + selector: + matchLabels: + app.kubernetes.io/gateway: {{ include "gateway.fullname" . }} template: + metadata: + labels: + app.kubernetes.io/gateway: {{ include "gateway.fullname" . }} spec: serviceAccountName: endpoint-picker-sa # manually created in workaround w/ proper RBAC containers: - args: - --poolName - - {{ include "modelservice.fullname" . }} + - {{`"{{ .InferencePoolName }}"`}} - --poolNamespace - - {{ .Release.Namespace }} + - {{`"{{ .ModelServiceNamespace }}"`}} - -v - "4" - --zap-encoder @@ -301,7 +322,7 @@ data: # failureThreshold: 3 # grpc: # port: 9003 - # service: "llama-32-3b-instruct-epp-service" + # service: {{`"{{ .EPPServiceName }}"`}} # initialDelaySeconds: 5 # periodSeconds: 10 # successThreshold: 1 @@ -310,7 +331,7 @@ data: # failureThreshold: 3 # grpc: # port: 9003 - # service: "llama-32-3b-instruct-epp-service" + # service: {{`"{{ .EPPServiceName }}"`}} # initialDelaySeconds: 5 # periodSeconds: 10 # successThreshold: 1 diff --git a/charts/llm-d/templates/modelservice/service.yaml b/charts/llm-d/templates/modelservice/service.yaml index 09dcd24..4bed1d4 100644 --- a/charts/llm-d/templates/modelservice/service.yaml +++ b/charts/llm-d/templates/modelservice/service.yaml @@ -9,6 +9,9 @@ metadata: {{- if .Values.commonLabels }} {{- include "common.tplvalues.render" ( dict "value" .Values.commonLabels "context" $ ) | nindent 4 }} {{- end }} + {{- if .Values.modelservice.metrics.enabled }} + {{ include "metrics.label" . }} + {{- end }} annotations: {{- if .Values.commonAnnotations }} {{- include "common.tplvalues.render" ( dict "value" .Values.commonAnnotations "context" $ ) | nindent 4 }} diff --git a/charts/llm-d/templates/sample-application/_helpers.tpl b/charts/llm-d/templates/sample-application/_helpers.tpl index ce8a70d..dbe4179 100644 --- a/charts/llm-d/templates/sample-application/_helpers.tpl +++ b/charts/llm-d/templates/sample-application/_helpers.tpl @@ -6,6 +6,7 @@ Sanitize the model name into a valid k8s label. {{- $name = regexReplaceAll "[^a-z0-9_.-]" $name "-" -}} {{- $name = regexReplaceAll "^[\\-._]+" $name "" -}} {{- $name = regexReplaceAll "[\\-._]+$" $name "" -}} + {{- $name = regexReplaceAll "\\." $name "-" -}} {{- if gt (len $name) 63 -}} {{- $name = substr 0 63 $name -}} diff --git a/charts/llm-d/templates/sample-application/ingress.yaml b/charts/llm-d/templates/sample-application/ingress.yaml index 9213983..d4babfc 100644 --- a/charts/llm-d/templates/sample-application/ingress.yaml +++ b/charts/llm-d/templates/sample-application/ingress.yaml @@ -4,6 +4,7 @@ kind: Ingress metadata: name: {{ include "gateway.fullname" . }} labels: {{ include "common.labels.standard" $ | nindent 4 }} + app.kubernetes.io/gateway: {{ include "gateway.fullname" . }} app.kubernetes.io/component: sample-application {{- if $.Values.commonLabels }} {{- include "common.tplvalues.render" ( dict "value" $.Values.commonLabels "context" $ ) | nindent 4 }} diff --git a/charts/llm-d/values.yaml b/charts/llm-d/values.yaml index 35e5979..5a0c06e 100644 --- a/charts/llm-d/values.yaml +++ b/charts/llm-d/values.yaml @@ -53,7 +53,7 @@ sampleApplication: enabled: true # -- Specify the model name as it is available to the api - modelName: Llama-32-3B-Instruct + modelName: Llama-3.2-3B-Instruct # -- Specify the filepath for the model modelPath: /cache/models/meta-llama/Llama-3.2-3B-Instruct @@ -168,6 +168,10 @@ modelservice: # -- Toggle to deploy modelservice controller related resources enabled: true + # -- Enable metrics gathering via podMonitor / ServiceMonitor + metrics: + enabled: true + # -- String to fully override modelservice.fullname fullnameOverride: "" @@ -180,8 +184,8 @@ modelservice: # -- Modelservice controller image, please change only if appropriate adjustments to the CRD are being made image: registry: quay.io - repository: llm-d/llm-d-model-service-dev - tag: "0.0.7" + repository: llm-d/llm-d-model-service + tag: "0.0.6" imagePullPolicy: "Always" # -- Endpoint picker image used in ModelService CR presets @@ -191,6 +195,8 @@ modelservice: repository: llm-d/llm-d-gateway-api-inference-extension-dev tag: "0.0.5-amd64" imagePullPolicy: "IfNotPresent" + metrics: + enabled: true # -- vLLM image used in ModelService CR presets vllm: @@ -199,6 +205,8 @@ modelservice: repository: llm-d/llm-d-dev tag: "0.0.5" imagePullPolicy: "IfNotPresent" + metrics: + enabled: true # -- Routing proxy image used in ModelService CR presets routingProxy: