diff --git a/Makefile b/Makefile
index 430e490..e2f78b2 100644
--- a/Makefile
+++ b/Makefile
@@ -4,6 +4,11 @@ SHELL := /usr/bin/env bash
NAMESPACE ?= hc4ai-operator
CHART ?= charts/llm-d
+MS_VERSION ?= v0.0.15
+EPP_VERSION ?= v0.1.0
+VLLM_VERSION ?= 0.0.8
+ROUTING_PROXY_VERSION ?= 0.0.7
+INFERENCE_SIM_VERSION ?= 0.0.4
.PHONY: help
help: ## Print help
@@ -45,10 +50,39 @@ helm-uninstall: ## Uninstall the Helm release
.Phony: bump-modelservice-crd
bump-modelservice-crd:
- git clone git@github.com:llm-d/llm-d-model-service.git
+ git clone git@github.com:llm-d/llm-d-model-service.git -b $(MS_VERSION) --depth=1
kustomize build llm-d-model-service/config/crd > charts/llm-d/crds/modelservice-crd.yaml
rm -rf llm-d-model-service
-.Phony: bump-chart-version
+# Setting SED allows macos users to install GNU sed and use the latter
+# instead of the default BSD sed.
+ifeq ($(shell command -v gsed 2>/dev/null),)
+ SED ?= $(shell command -v sed)
+else
+ SED ?= $(shell command -v gsed)
+endif
+ifeq ($(shell ${SED} --version 2>&1 | grep -q GNU; echo $$?),1)
+ $(error !!! GNU sed is required. If on OS X, use 'brew install gnu-sed'.)
+endif
+
+VALUES_FILE := charts/llm-d/values.yaml
+
+.Phony: bump-image-tags
+bump-image-tags:
+ @echo "Updating image tags in $(VALUES_FILE)..."
+ # Update modelservice.image.tag
+ $(SED) -i '/^modelservice:/,/^[a-zA-Z]/ { /^ image:/,/^ [a-zA-Z]/ { s/^\( tag: \).*$$/\1"$(MS_VERSION)"/; } }' $(VALUES_FILE)
+ # Update modelservice.epp.image.tag
+ $(SED) -i '/^modelservice:/,/^[a-zA-Z]/ { /^ epp:/,/^ [a-zA-Z]/ { /^ image:/,/^ [a-zA-Z]/ { s/^\( tag: \).*$$/\1"$(EPP_VERSION)"/; } } }' $(VALUES_FILE)
+ # Update modelservice.vllm.image.tag
+ $(SED) -i '/^modelservice:/,/^[a-zA-Z]/ { /^ vllm:/,/^ [a-zA-Z]/ { /^ image:/,/^ [a-zA-Z]/ { s/^\( tag: \).*$$/\1"$(VLLM_VERSION)"/; } } }' $(VALUES_FILE)
+ # Update modelservice.routingProxy.image.tag
+ $(SED) -i '/^modelservice:/,/^[a-zA-Z]/ { /^ routingProxy:/,/^ [a-zA-Z]/ { /^ image:/,/^ [a-zA-Z]/ { s/^\( tag: \).*$$/\1"$(ROUTING_PROXY_VERSION)"/; } } }' $(VALUES_FILE)
+ # Update modelservice.inferenceSimulator.image.tag
+ $(SED) -i '/^modelservice:/,/^[a-zA-Z]/ { /^ inferenceSimulator:/,/^ [a-zA-Z]/ { /^ image:/,/^ [a-zA-Z]/ { s/^\( tag: \).*$$/\1"$(INFERENCE_SIM_VERSION)"/; } } }' $(VALUES_FILE)
+ @echo "Image tags updated successfully!"
+
+.PHONY: bump-chart-version
+# Bump Helm chart version, usage: make bump-chart-version bump_type=[patch|minor|major]
bump-chart-version:
- helpers/scripts/increment-chart-version.sh
+ helpers/scripts/increment-chart-version.sh $(bump_type)
diff --git a/charts/llm-d/Chart.yaml b/charts/llm-d/Chart.yaml
index 4e4aea0..1a00ac0 100644
--- a/charts/llm-d/Chart.yaml
+++ b/charts/llm-d/Chart.yaml
@@ -1,7 +1,7 @@
apiVersion: v2
name: llm-d
type: application
-version: 1.0.22
+version: 1.0.23
appVersion: "0.1"
icon: data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiIHN0YW5kYWxvbmU9Im5vIj8+CjwhLS0gQ3JlYXRlZCB3aXRoIElua3NjYXBlIChodHRwOi8vd3d3Lmlua3NjYXBlLm9yZy8pIC0tPgoKPHN2ZwogICB3aWR0aD0iODBtbSIKICAgaGVpZ2h0PSI4MG1tIgogICB2aWV3Qm94PSIwIDAgODAuMDAwMDA0IDgwLjAwMDAwMSIKICAgdmVyc2lvbj0iMS4xIgogICBpZD0ic3ZnMSIKICAgeG1sOnNwYWNlPSJwcmVzZXJ2ZSIKICAgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIgogICB4bWxuczpzdmc9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48ZGVmcwogICAgIGlkPSJkZWZzMSIgLz48cGF0aAogICAgIHN0eWxlPSJmaWxsOiM0ZDRkNGQ7ZmlsbC1vcGFjaXR5OjE7c3Ryb2tlOiM0ZDRkNGQ7c3Ryb2tlLXdpZHRoOjIuMzQyOTk7c3Ryb2tlLW1pdGVybGltaXQ6MTA7c3Ryb2tlLWRhc2hhcnJheTpub25lIgogICAgIGQ9Im0gNTEuNjI5Nyw0My4wNzY3IGMgLTAuODI1NCwwIC0xLjY1MDgsMC4yMTI4IC0yLjM4ODEsMC42Mzg0IGwgLTEwLjcyNjksNi4xOTI2IGMgLTEuNDc2MywwLjg1MjIgLTIuMzg3MywyLjQzNDUgLTIuMzg3Myw0LjEzNTQgdiAxMi4zODQ3IGMgMCwxLjcwNDEgMC45MTI4LDMuMjg1NCAyLjM4ODUsNC4xMzU4IGwgMTAuNzI1Nyw2LjE5MTggYyAxLjQ3NDcsMC44NTEzIDMuMzAxNSwwLjg1MTMgNC43NzYyLDAgTCA2NC43NDQ3LDcwLjU2MzIgQyA2Ni4yMjEsNjkuNzExIDY3LjEzMiw2OC4xMjg4IDY3LjEzMiw2Ni40Mjc4IFYgNTQuMDQzMSBjIDAsLTEuNzAzNiAtMC45MTIzLC0zLjI4NDggLTIuMzg3MywtNC4xMzU0IGwgLThlLTQsLTRlLTQgLTEwLjcyNjEsLTYuMTkyMiBjIC0wLjczNzQsLTAuNDI1NiAtMS41NjI3LC0wLjYzODQgLTIuMzg4MSwtMC42Mzg0IHogbSAwLDMuNzM5NyBjIDAuMTc3NCwwIDAuMzU0NiwwLjA0NyAwLjUxNjcsMC4xNDA2IGwgMTAuNzI3Niw2LjE5MjUgNGUtNCw0ZS00IGMgMC4zMTkzLDAuMTg0IDAuNTE0MywwLjUyMDMgMC41MTQzLDAuODkzMiB2IDEyLjM4NDcgYyAwLDAuMzcyMSAtMC4xOTI3LDAuNzA3MyAtMC41MTU1LDAuODkzNiBsIC0xMC43MjY4LDYuMTkyMiBjIC0wLjMyNDMsMC4xODcyIC0wLjcwOTEsMC4xODcyIC0xLjAzMzQsMCBsIC0xMC43MjcyLC02LjE5MjYgLThlLTQsLTRlLTQgQyA0MC4wNjU3LDY3LjEzNjcgMzkuODcwNyw2Ni44MDA3IDM5Ljg3MDcsNjYuNDI3OCBWIDU0LjA0MzEgYyAwLC0wLjM3MiAwLjE5MjcsLTAuNzA3NyAwLjUxNTUsLTAuODk0IEwgNTEuMTEzLDQ2Ljk1NyBjIDAuMTYyMSwtMC4wOTQgMC4zMzkzLC0wLjE0MDYgMC41MTY3LC0wLjE0MDYgeiIKICAgICBpZD0icGF0aDEyMiIgLz48cGF0aAogICAgIGlkPSJwYXRoMTI0IgogICAgIHN0eWxlPSJmaWxsOiM0ZDRkNGQ7ZmlsbC1vcGFjaXR5OjE7c3Ryb2tlOiM0ZDRkNGQ7c3Ryb2tlLXdpZHRoOjIuMzQyOTk7c3Ryb2tlLWxpbmVjYXA6cm91bmQ7c3Ryb2tlLW1pdGVybGltaXQ6MTA7c3Ryb2tlLWRhc2hhcnJheTpub25lIgogICAgIGQ9Im0gNjMuMzg5MDE4LDM0LjgxOTk1OCB2IDIyLjM0NDE3NSBhIDEuODcxNTQzLDEuODcxNTQzIDAgMCAwIDEuODcxNTQxLDEuODcxNTQxIDEuODcxNTQzLDEuODcxNTQzIDAgMCAwIDEuODcxNTQxLC0xLjg3MTU0MSBWIDMyLjY1ODY0NyBaIiAvPjxwYXRoCiAgICAgc3R5bGU9ImZpbGw6IzdmMzE3ZjtmaWxsLW9wYWNpdHk6MTtzdHJva2U6IzdmMzE3ZjtzdHJva2Utd2lkdGg6Mi4yNDM7c3Ryb2tlLW1pdGVybGltaXQ6MTA7c3Ryb2tlLWRhc2hhcnJheTpub25lO3N0cm9rZS1vcGFjaXR5OjEiCiAgICAgZD0ibSAzNi43MzQyLDI4LjIzNDggYyAwLjQwOTcsMC43MTY1IDEuMDA0MiwxLjMyNzMgMS43Mzk4LDEuNzU2MSBsIDEwLjcwMSw2LjIzNzIgYyAxLjQ3MjcsMC44NTg0IDMuMjk4NCwwLjg2MzcgNC43NzUsMC4wMTkgbCAxMC43NTA2LC02LjE0ODUgYyAxLjQ3OTMsLTAuODQ2IDIuMzk4NywtMi40MjM0IDIuNDA0NCwtNC4xMjY3IGwgMC4wNSwtMTIuMzg0NCBjIDAuMDEsLTEuNzAyOSAtMC45LC0zLjI4ODYgLTIuMzcxMiwtNC4xNDYxIEwgNTQuMDgzMiwzLjIwNCBDIDUyLjYxMDUsMi4zNDU1IDUwLjc4NDcsMi4zNDAyIDQ5LjMwODIsMy4xODUgTCAzOC41NTc1LDkuMzMzNSBjIC0xLjQ3ODksMC44NDU4IC0yLjM5ODQsMi40MjI3IC0yLjQwNDYsNC4xMjU0IGwgMTBlLTUsOGUtNCAtMC4wNSwxMi4zODUgYyAwLDAuODUxNSAwLjIyMTYsMS42NzM1IDAuNjMxNCwyLjM5IHogbSAzLjI0NjMsLTEuODU2NiBjIC0wLjA4OCwtMC4xNTQgLTAuMTM1MywtMC4zMzExIC0wLjEzNDUsLTAuNTE4MyBsIDAuMDUsLTEyLjM4NjYgMmUtNCwtNmUtNCBjIDAsLTAuMzY4NCAwLjE5NjMsLTAuNzA0NyAwLjUyLC0wLjg4OTkgTCA1MS4xNjY5LDYuNDM0MyBjIDAuMzIyOSwtMC4xODQ3IDAuNzA5NywtMC4xODM4IDEuMDMxNiwwIGwgMTAuNzAwNiw2LjIzNzQgYyAwLjMyMzUsMC4xODg1IDAuNTE0NSwwLjUyMjYgMC41MTMsMC44OTcgbCAtMC4wNSwxMi4zODYyIHYgOWUtNCBjIDAsMC4zNjg0IC0wLjE5NiwwLjcwNDUgLTAuNTE5NywwLjg4OTYgbCAtMTAuNzUwNiw2LjE0ODUgYyAtMC4zMjMsMC4xODQ3IC0wLjcxMDEsMC4xODQgLTEuMDMyLDAgTCA0MC4zNTkyLDI2Ljc1NjcgYyAtMC4xNjE3LC0wLjA5NCAtMC4yOTA1LC0wLjIyNDggLTAuMzc4NSwtMC4zNzg4IHoiCiAgICAgaWQ9InBhdGgxMjYiIC8+PHBhdGgKICAgICBpZD0icGF0aDEyOSIKICAgICBzdHlsZT0iZmlsbDojN2YzMTdmO2ZpbGwtb3BhY2l0eToxO3N0cm9rZTojN2YzMTdmO3N0cm9rZS13aWR0aDoyLjI0MztzdHJva2UtbGluZWNhcDpyb3VuZDtzdHJva2UtbWl0ZXJsaW1pdDoxMDtzdHJva2UtZGFzaGFycmF5Om5vbmU7c3Ryb2tlLW9wYWNpdHk6MSIKICAgICBkPSJNIDIzLjcyODgzNSwyMi4xMjYxODUgNDMuMTI0OTI0LDExLjAzMzIyIEEgMS44NzE1NDMsMS44NzE1NDMgMCAwIDAgNDMuODIwMzkxLDguNDc5NDY2NiAxLjg3MTU0MywxLjg3MTU0MyAwIDAgMCA0MS4yNjY2MzcsNy43ODM5OTk4IEwgMTkuOTk0NDAxLDE5Ljk0OTk2NyBaIiAvPjxwYXRoCiAgICAgc3R5bGU9ImZpbGw6IzdmMzE3ZjtmaWxsLW9wYWNpdHk6MTtzdHJva2U6IzdmMzE3ZjtzdHJva2Utd2lkdGg6Mi4yNDM7c3Ryb2tlLW1pdGVybGltaXQ6MTA7c3Ryb2tlLWRhc2hhcnJheTpub25lO3N0cm9rZS1vcGFjaXR5OjEiCiAgICAgZD0ibSAzMS40NzY2LDQ4LjQ1MDQgYyAwLjQxNDUsLTAuNzEzOCAwLjY0NSwtMS41MzQ0IDAuNjQ3MiwtMi4zODU4IGwgMC4wMzIsLTEyLjM4NiBjIDAsLTEuNzA0NiAtMC45MDY0LC0zLjI4NyAtMi4zNzczLC00LjE0MTIgTCAxOS4wNjg4LDIzLjMxOCBjIC0xLjQ3MzcsLTAuODU1OCAtMy4yOTk1LC0wLjg2MDUgLTQuNzc2LC0wLjAxMSBMIDMuNTUyMSwyOS40NzI3IGMgLTEuNDc2OCwwLjg0NzggLTIuMzk0MiwyLjQyNzUgLTIuMzk4Niw0LjEzMDQgbCAtMC4wMzIsMTIuMzg1NyBjIDAsMS43MDQ3IDAuOTA2MywzLjI4NzEgMi4zNzcyLDQuMTQxMiBsIDEwLjcwOTgsNi4yMTk1IGMgMS40NzMyLDAuODU1NSAzLjI5ODcsMC44NjA2IDQuNzc1LDAuMDEyIGwgNmUtNCwtNGUtNCAxMC43NDEyLC02LjE2NTggYyAwLjczODUsLTAuNDIzOSAxLjMzNjksLTEuMDMwOCAxLjc1MTUsLTEuNzQ0NSB6IG0gLTMuMjM0LC0xLjg3ODEgYyAtMC4wODksMC4xNTM0IC0wLjIxODYsMC4yODMxIC0wLjM4MSwwLjM3NjMgbCAtMTAuNzQyMyw2LjE2NyAtNmUtNCwyZS00IGMgLTAuMzE5NCwwLjE4MzYgLTAuNzA4MiwwLjE4MzQgLTEuMDMwNywwIEwgNS4zNzgyLDQ2Ljg5NjQgQyA1LjA1NjUsNDYuNzA5NiA0Ljg2MzMsNDYuMzc0NSA0Ljg2NDMsNDYuMDAxOSBsIDAuMDMyLC0xMi4zODU4IGMgMCwtMC4zNzQ0IDAuMTk0MiwtMC43MDcyIDAuNTE4OSwtMC44OTM2IGwgMTAuNzQyMiwtNi4xNjY3IDZlLTQsLTRlLTQgYyAwLjMxOTQsLTAuMTgzNyAwLjcwNzgsLTAuMTgzNyAxLjAzMDMsMCBsIDEwLjcwOTgsNi4yMTk0IGMgMC4zMjE3LDAuMTg2OSAwLjUxNTIsMC41MjIxIDAuNTE0MiwwLjg5NDggbCAtMC4wMzIsMTIuMzg1NiBjIC00ZS00LDAuMTg3MiAtMC4wNDksMC4zNjQxIC0wLjEzNzksMC41MTc0IHoiCiAgICAgaWQ9InBhdGgxMzkiIC8+PHBhdGgKICAgICBpZD0icGF0aDE0MSIKICAgICBzdHlsZT0iZmlsbDojN2YzMTdmO2ZpbGwtb3BhY2l0eToxO3N0cm9rZTojN2YzMTdmO3N0cm9rZS13aWR0aDoyLjI0MztzdHJva2UtbGluZWNhcDpyb3VuZDtzdHJva2UtbWl0ZXJsaW1pdDoxMDtzdHJva2UtZGFzaGFycmF5Om5vbmU7c3Ryb2tlLW9wYWNpdHk6MSIKICAgICBkPSJNIDMyLjcxMTI5OSw2Mi43NjU3NDYgMTMuMzg4OTY5LDUxLjU0NDc5OCBhIDEuODcxNTQzLDEuODcxNTQzIDAgMCAwIC0yLjU1ODI5NSwwLjY3ODU2OCAxLjg3MTU0MywxLjg3MTU0MyAwIDAgMCAwLjY3ODU2OSwyLjU1ODI5NiBsIDIxLjE5MTM0NCwxMi4zMDYzMyB6IiAvPjwvc3ZnPgo=
description: llm-d is a Kubernetes-native high-performance distributed LLM inference framework
diff --git a/charts/llm-d/README.md b/charts/llm-d/README.md
index 896fa2d..97fa970 100644
--- a/charts/llm-d/README.md
+++ b/charts/llm-d/README.md
@@ -1,7 +1,7 @@
# llm-d Helm Chart
-
+

llm-d is a Kubernetes-native high-performance distributed LLM inference framework
@@ -194,7 +194,7 @@ Kubernetes: `>= 1.30.0-0`
| modelservice.epp.image.pullSecrets | Optionally specify an array of imagePullSecrets (evaluated as templates) | list | `[]` |
| modelservice.epp.image.registry | Endpoint picker image registry | string | `"ghcr.io"` |
| modelservice.epp.image.repository | Endpoint picker image repository | string | `"llm-d/llm-d-inference-scheduler"` |
-| modelservice.epp.image.tag | Endpoint picker image tag | string | `"0.0.4"` |
+| modelservice.epp.image.tag | Endpoint picker image tag | string | `"v0.1.0"` |
| modelservice.epp.metrics | Enable metrics gathering via podMonitor / ServiceMonitor | object | `{"enabled":true,"serviceMonitor":{"annotations":{},"interval":"10s","labels":{},"namespaceSelector":{"any":false,"matchNames":[]},"path":"/metrics","port":"metrics","selector":{"matchLabels":{}}}}` |
| modelservice.epp.metrics.enabled | Enable metrics scraping from endpoint picker service | bool | `true` |
| modelservice.epp.metrics.serviceMonitor | Prometheus ServiceMonitor configuration
Ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md | object | See below |
@@ -215,7 +215,7 @@ Kubernetes: `>= 1.30.0-0`
| modelservice.image.pullSecrets | Optionally specify an array of imagePullSecrets (evaluated as templates) | list | `[]` |
| modelservice.image.registry | Model Service controller image registry | string | `"ghcr.io"` |
| modelservice.image.repository | Model Service controller image repository | string | `"llm-d/llm-d-model-service"` |
-| modelservice.image.tag | Model Service controller image tag | string | `"0.0.10"` |
+| modelservice.image.tag | Model Service controller image tag | string | `"v0.0.15"` |
| modelservice.inferenceSimulator | llm-d inference simulator container options | object | See below |
| modelservice.inferenceSimulator.containerSecurityContext | Security settings for a Container.
Ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container | object | `{}` |
| modelservice.inferenceSimulator.image | llm-d inference simulator image used in ModelService CR presets | object | See below |
@@ -253,12 +253,12 @@ Kubernetes: `>= 1.30.0-0`
| modelservice.replicas | Number of controller replicas | int | `1` |
| modelservice.routingProxy | Routing proxy container options | object | See below |
| modelservice.routingProxy.containerSecurityContext | Security settings for a Container.
Ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container | object | `{}` |
-| modelservice.routingProxy.image | Routing proxy image used in ModelService CR presets | object | `{"imagePullPolicy":"IfNotPresent","pullSecrets":[],"registry":"ghcr.io","repository":"llm-d/llm-d-routing-sidecar","tag":"0.0.6"}` |
+| modelservice.routingProxy.image | Routing proxy image used in ModelService CR presets | object | `{"imagePullPolicy":"IfNotPresent","pullSecrets":[],"registry":"ghcr.io","repository":"llm-d/llm-d-routing-sidecar","tag":"0.0.7"}` |
| modelservice.routingProxy.image.imagePullPolicy | Specify a imagePullPolicy | string | `"IfNotPresent"` |
| modelservice.routingProxy.image.pullSecrets | Optionally specify an array of imagePullSecrets (evaluated as templates) | list | `[]` |
| modelservice.routingProxy.image.registry | Routing proxy image registry | string | `"ghcr.io"` |
| modelservice.routingProxy.image.repository | Routing proxy image repository | string | `"llm-d/llm-d-routing-sidecar"` |
-| modelservice.routingProxy.image.tag | Routing proxy image tag | string | `"0.0.6"` |
+| modelservice.routingProxy.image.tag | Routing proxy image tag | string | `"0.0.7"` |
| modelservice.service.enabled | Toggle to deploy a Service resource for Model service controller | bool | `true` |
| modelservice.service.port | Port number exposed from Model Service controller | int | `8443` |
| modelservice.service.type | Service type | string | `"ClusterIP"` |
diff --git a/charts/llm-d/crds/modelservice-crd.yaml b/charts/llm-d/crds/modelservice-crd.yaml
index 78571ad..2448c36 100644
--- a/charts/llm-d/crds/modelservice-crd.yaml
+++ b/charts/llm-d/crds/modelservice-crd.yaml
@@ -348,6 +348,16 @@ spec:
description: Image that is used to spawn container if present
will override base config
type: string
+ mountModelVolume:
+ description: |-
+ Boolean to indicate mounting the model artifacts to this container
+ For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache
+ For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference,
+ and the value is mounted to an environment variable called HF_TOKEN
+ For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
+ is created and mounted with the mountPath oci-dir
+ default:false
+ type: boolean
name:
description: |-
Name of the container specified as a DNS_LABEL.
@@ -639,6 +649,16 @@ spec:
description: Image that is used to spawn container if present
will override base config
type: string
+ mountModelVolume:
+ description: |-
+ Boolean to indicate mounting the model artifacts to this container
+ For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache
+ For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference,
+ and the value is mounted to an environment variable called HF_TOKEN
+ For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
+ is created and mounted with the mountPath oci-dir
+ default:false
+ type: boolean
name:
description: |-
Name of the container specified as a DNS_LABEL.
@@ -965,6 +985,16 @@ spec:
description: Image that is used to spawn container if present
will override base config
type: string
+ mountModelVolume:
+ description: |-
+ Boolean to indicate mounting the model artifacts to this container
+ For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache
+ For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference,
+ and the value is mounted to an environment variable called HF_TOKEN
+ For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
+ is created and mounted with the mountPath oci-dir
+ default:false
+ type: boolean
name:
description: |-
Name of the container specified as a DNS_LABEL.
@@ -1256,6 +1286,16 @@ spec:
description: Image that is used to spawn container if present
will override base config
type: string
+ mountModelVolume:
+ description: |-
+ Boolean to indicate mounting the model artifacts to this container
+ For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache
+ For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference,
+ and the value is mounted to an environment variable called HF_TOKEN
+ For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
+ is created and mounted with the mountPath oci-dir
+ default:false
+ type: boolean
name:
description: |-
Name of the container specified as a DNS_LABEL.
@@ -1610,6 +1650,16 @@ spec:
description: Image that is used to spawn container if present
will override base config
type: string
+ mountModelVolume:
+ description: |-
+ Boolean to indicate mounting the model artifacts to this container
+ For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache
+ For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference,
+ and the value is mounted to an environment variable called HF_TOKEN
+ For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
+ is created and mounted with the mountPath oci-dir
+ default:false
+ type: boolean
name:
description: |-
Name of the container specified as a DNS_LABEL.
@@ -1901,6 +1951,16 @@ spec:
description: Image that is used to spawn container if present
will override base config
type: string
+ mountModelVolume:
+ description: |-
+ Boolean to indicate mounting the model artifacts to this container
+ For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache
+ For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference,
+ and the value is mounted to an environment variable called HF_TOKEN
+ For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/)
+ is created and mounted with the mountPath oci-dir
+ default:false
+ type: boolean
name:
description: |-
Name of the container specified as a DNS_LABEL.
@@ -2002,6 +2062,226 @@ spec:
description: Routing provides information needed to create configuration
for routing
properties:
+ gatewayRefs:
+ description: |-
+ GatewayRef is merged to baseconfig based on the Name field.
+ Directly from Gateway API: https://gateway-api.sigs.k8s.io/reference/spec/#commonroutespec
+ ParentRefs references the resources (usually Gateways) that a Route wants
+ to be attached to. Note that the referenced parent resource needs to
+ allow this for the attachment to be complete. For Gateways, that means
+ the Gateway needs to allow attachment from Routes of this kind and
+ namespace. For Services, that means the Service must either be in the same
+ namespace for a "producer" route, or the mesh implementation must support
+ and allow "consumer" routes for the referenced Service. ReferenceGrant is
+ not applicable for governing ParentRefs to Services - it is not possible to
+ create a "producer" route for a Service in a different namespace from the
+ Route.
+
+ There are two kinds of parent resources with "Core" support:
+
+ * Gateway (Gateway conformance profile)
+ * Service (Mesh conformance profile, ClusterIP Services only)
+
+ This API may be extended in the future to support additional kinds of parent
+ resources.
+
+ ParentRefs must be _distinct_. This means either that:
+
+ * They select different objects. If this is the case, then parentRef
+ entries are distinct. In terms of fields, this means that the
+ multi-part key defined by `group`, `kind`, `namespace`, and `name` must
+ be unique across all parentRef entries in the Route.
+ * They do not select different objects, but for each optional field used,
+ each ParentRef that selects the same object must set the same set of
+ optional fields to different values. If one ParentRef sets a
+ combination of optional fields, all must set the same combination.
+
+ Some examples:
+
+ * If one ParentRef sets `sectionName`, all ParentRefs referencing the
+ same object must also set `sectionName`.
+ * If one ParentRef sets `port`, all ParentRefs referencing the same
+ object must also set `port`.
+ * If one ParentRef sets `sectionName` and `port`, all ParentRefs
+ referencing the same object must also set `sectionName` and `port`.
+
+ It is possible to separately reference multiple distinct objects that may
+ be collapsed by an implementation. For example, some implementations may
+ choose to merge compatible Gateway Listeners together. If that is the
+ case, the list of routes attached to those resources should also be
+ merged.
+
+ Note that for ParentRefs that cross namespace boundaries, there are specific
+ rules. Cross-namespace references are only valid if they are explicitly
+ allowed by something in the namespace they are referring to. For example,
+ Gateway has the AllowedRoutes field, and ReferenceGrant provides a
+ generic way to enable other kinds of cross-namespace reference.
+
+
+ ParentRefs from a Route to a Service in the same namespace are "producer"
+ routes, which apply default routing rules to inbound connections from
+ any namespace to the Service.
+
+ ParentRefs from a Route to a Service in a different namespace are
+ "consumer" routes, and these routing rules are only applied to outbound
+ connections originating from the same namespace as the Route, for which
+ the intended destination of the connections are a Service targeted as a
+ ParentRef of the Route.
+
+
+
+
+
+
+ items:
+ description: |-
+ ParentReference identifies an API object (usually a Gateway) that can be considered
+ a parent of this resource (usually a route). There are two kinds of parent resources
+ with "Core" support:
+
+ * Gateway (Gateway conformance profile)
+ * Service (Mesh conformance profile, ClusterIP Services only)
+
+ This API may be extended in the future to support additional kinds of parent
+ resources.
+
+ The API object must be valid in the cluster; the Group and Kind must
+ be registered in the cluster for this reference to be valid.
+ properties:
+ group:
+ default: gateway.networking.k8s.io
+ description: |-
+ Group is the group of the referent.
+ When unspecified, "gateway.networking.k8s.io" is inferred.
+ To set the core API group (such as for a "Service" kind referent),
+ Group must be explicitly set to "" (empty string).
+
+ Support: Core
+ maxLength: 253
+ pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$
+ type: string
+ kind:
+ default: Gateway
+ description: |-
+ Kind is kind of the referent.
+
+ There are two kinds of parent resources with "Core" support:
+
+ * Gateway (Gateway conformance profile)
+ * Service (Mesh conformance profile, ClusterIP Services only)
+
+ Support for other resources is Implementation-Specific.
+ maxLength: 63
+ minLength: 1
+ pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$
+ type: string
+ name:
+ description: |-
+ Name is the name of the referent.
+
+ Support: Core
+ maxLength: 253
+ minLength: 1
+ type: string
+ namespace:
+ description: |-
+ Namespace is the namespace of the referent. When unspecified, this refers
+ to the local namespace of the Route.
+
+ Note that there are specific rules for ParentRefs which cross namespace
+ boundaries. Cross-namespace references are only valid if they are explicitly
+ allowed by something in the namespace they are referring to. For example:
+ Gateway has the AllowedRoutes field, and ReferenceGrant provides a
+ generic way to enable any other kind of cross-namespace reference.
+
+
+ ParentRefs from a Route to a Service in the same namespace are "producer"
+ routes, which apply default routing rules to inbound connections from
+ any namespace to the Service.
+
+ ParentRefs from a Route to a Service in a different namespace are
+ "consumer" routes, and these routing rules are only applied to outbound
+ connections originating from the same namespace as the Route, for which
+ the intended destination of the connections are a Service targeted as a
+ ParentRef of the Route.
+
+
+ Support: Core
+ maxLength: 63
+ minLength: 1
+ pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$
+ type: string
+ port:
+ description: |-
+ Port is the network port this Route targets. It can be interpreted
+ differently based on the type of parent resource.
+
+ When the parent resource is a Gateway, this targets all listeners
+ listening on the specified port that also support this kind of Route(and
+ select this Route). It's not recommended to set `Port` unless the
+ networking behaviors specified in a Route must apply to a specific port
+ as opposed to a listener(s) whose port(s) may be changed. When both Port
+ and SectionName are specified, the name and port of the selected listener
+ must match both specified values.
+
+
+ When the parent resource is a Service, this targets a specific port in the
+ Service spec. When both Port (experimental) and SectionName are specified,
+ the name and port of the selected port must match both specified values.
+
+
+ Implementations MAY choose to support other parent resources.
+ Implementations supporting other types of parent resources MUST clearly
+ document how/if Port is interpreted.
+
+ For the purpose of status, an attachment is considered successful as
+ long as the parent resource accepts it partially. For example, Gateway
+ listeners can restrict which Routes can attach to them by Route kind,
+ namespace, or hostname. If 1 of 2 Gateway listeners accept attachment
+ from the referencing Route, the Route MUST be considered successfully
+ attached. If no Gateway listeners accept attachment from this Route,
+ the Route MUST be considered detached from the Gateway.
+
+ Support: Extended
+ format: int32
+ maximum: 65535
+ minimum: 1
+ type: integer
+ sectionName:
+ description: |-
+ SectionName is the name of a section within the target resource. In the
+ following resources, SectionName is interpreted as the following:
+
+ * Gateway: Listener name. When both Port (experimental) and SectionName
+ are specified, the name and port of the selected listener must match
+ both specified values.
+ * Service: Port name. When both Port (experimental) and SectionName
+ are specified, the name and port of the selected listener must match
+ both specified values.
+
+ Implementations MAY choose to support attaching Routes to other resources.
+ If that is the case, they MUST clearly document how SectionName is
+ interpreted.
+
+ When unspecified (empty string), this will reference the entire resource.
+ For the purpose of status, an attachment is considered successful if at
+ least one section in the parent resource accepts it. For example, Gateway
+ listeners can restrict which Routes can attach to them by Route kind,
+ namespace, or hostname. If 1 of 2 Gateway listeners accept attachment from
+ the referencing Route, the Route MUST be considered successfully
+ attached. If no Gateway listeners accept attachment from this Route, the
+ Route MUST be considered detached from the Gateway.
+
+ Support: Core
+ maxLength: 253
+ minLength: 1
+ pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$
+ type: string
+ required:
+ - name
+ type: object
+ maxItems: 32
+ type: array
modelName:
description: |-
// CreateInferencePool indicates if inference pool resource will be created
@@ -2168,6 +2448,12 @@ spec:
if EppRoleBinding is yet to be created,
this reference will be nil
type: string
+ httpRouteRef:
+ description: |-
+ HTTPRoute identifies the HTTPRoute resource
+ if HTTPRoute is yet to be created,
+ this reference will be nil
+ type: string
inferenceModelRef:
description: |-
InferenceModelRef identifies the inference model resource
diff --git a/charts/llm-d/templates/modelservice/deployment.yaml b/charts/llm-d/templates/modelservice/deployment.yaml
index 2331650..854a1ec 100644
--- a/charts/llm-d/templates/modelservice/deployment.yaml
+++ b/charts/llm-d/templates/modelservice/deployment.yaml
@@ -58,7 +58,11 @@ spec:
{{- include "common.tplvalues.render" ( dict "value" .Values.modelservice.tolerations "context" $) | nindent 8 }}
{{- end }}
containers:
- - args:
+ - name: manager
+ command:
+ - /manager
+ - run
+ args:
- --leader-elect=false
- --health-probe-bind-address=:8081
- --epp-cluster-role
@@ -67,8 +71,6 @@ spec:
- {{ include "common.images.renderImagePullSecretsString" (dict "images" (list .Values.modelservice.epp.image) "context" $) }}
- --pd-pull-secrets
- {{ include "common.images.renderImagePullSecretsString" (dict "images" (list .Values.modelservice.vllm.image) "context" $) }}
- command:
- - /manager
image: {{ include "modelservice.image" . }}
imagePullPolicy: {{ .Values.modelservice.image.imagePullPolicy }}
{{- if .Values.modelservice.containerSecurityContext }}
@@ -81,7 +83,6 @@ spec:
port: 8081
initialDelaySeconds: 15
periodSeconds: 20
- name: manager
readinessProbe:
httpGet:
path: /readyz
diff --git a/charts/llm-d/templates/modelservice/rbac/manager_clusterrole.yaml b/charts/llm-d/templates/modelservice/rbac/manager_clusterrole.yaml
index 33425ff..15ff800 100644
--- a/charts/llm-d/templates/modelservice/rbac/manager_clusterrole.yaml
+++ b/charts/llm-d/templates/modelservice/rbac/manager_clusterrole.yaml
@@ -77,6 +77,18 @@ rules:
- get
- list
- watch
+- apiGroups:
+ - gateway.networking.k8s.io
+ resources:
+ - httproutes
+ verbs:
+ - create
+ - delete
+ - get
+ - list
+ - patch
+ - update
+ - watch
- apiGroups:
- inference.networking.x-k8s.io
resources:
diff --git a/charts/llm-d/values.schema.json b/charts/llm-d/values.schema.json
index 4c948a4..6c0f643 100644
--- a/charts/llm-d/values.schema.json
+++ b/charts/llm-d/values.schema.json
@@ -4025,7 +4025,7 @@
"title": "repository"
},
"tag": {
- "default": "0.0.4",
+ "default": "v0.1.0",
"description": "Endpoint picker image tag",
"required": [],
"title": "tag"
@@ -4491,7 +4491,7 @@
"title": "repository"
},
"tag": {
- "default": "0.0.10",
+ "default": "v0.0.15",
"description": "Model Service controller image tag",
"required": [],
"title": "tag"
@@ -6580,7 +6580,7 @@
"title": "repository"
},
"tag": {
- "default": "0.0.6",
+ "default": "0.0.7",
"description": "Routing proxy image tag",
"required": [],
"title": "tag"
diff --git a/charts/llm-d/values.schema.tmpl.json b/charts/llm-d/values.schema.tmpl.json
index 2fd5e3d..dd183e4 100644
--- a/charts/llm-d/values.schema.tmpl.json
+++ b/charts/llm-d/values.schema.tmpl.json
@@ -848,7 +848,7 @@
"title": "repository"
},
"tag": {
- "default": "0.0.4",
+ "default": "v0.1.0",
"description": "Endpoint picker image tag",
"required": [],
"title": "tag"
@@ -1018,7 +1018,7 @@
"title": "repository"
},
"tag": {
- "default": "0.0.10",
+ "default": "v0.0.15",
"description": "Model Service controller image tag",
"required": [],
"title": "tag"
@@ -1340,7 +1340,7 @@
"title": "repository"
},
"tag": {
- "default": "0.0.6",
+ "default": "0.0.7",
"description": "Routing proxy image tag",
"required": [],
"title": "tag"
diff --git a/charts/llm-d/values.yaml b/charts/llm-d/values.yaml
index 1255c5a..9f40d64 100644
--- a/charts/llm-d/values.yaml
+++ b/charts/llm-d/values.yaml
@@ -372,7 +372,7 @@ modelservice:
repository: llm-d/llm-d-model-service
# -- Model Service controller image tag
- tag: "0.0.10"
+ tag: "v0.0.15"
# -- Specify a imagePullPolicy
imagePullPolicy: "Always"
@@ -449,7 +449,7 @@ modelservice:
repository: llm-d/llm-d-inference-scheduler
# -- Endpoint picker image tag
- tag: 0.0.4
+ tag: "v0.1.0"
# -- Specify a imagePullPolicy
imagePullPolicy: "Always"
@@ -778,7 +778,7 @@ modelservice:
repository: llm-d/llm-d
# -- llm-d image tag
- tag: 0.0.8
+ tag: "0.0.8"
# -- Specify a imagePullPolicy
imagePullPolicy: "IfNotPresent"
@@ -815,7 +815,7 @@ modelservice:
repository: llm-d/llm-d-routing-sidecar
# -- Routing proxy image tag
- tag: "0.0.6"
+ tag: "0.0.7"
# -- Specify a imagePullPolicy
imagePullPolicy: "IfNotPresent"