From 0529b5a262a4ff1f0c1da1538501d522b6c7bba8 Mon Sep 17 00:00:00 2001 From: nicole-lihui Date: Thu, 17 Jul 2025 21:35:39 +0800 Subject: [PATCH] bump llm-d-modelservice to v0.0.15, epp v0.1.0, routing 0.0.7 && add sync tag cmd Signed-off-by: nicole-lihui --- Makefile | 40 ++- charts/llm-d/Chart.yaml | 2 +- charts/llm-d/README.md | 10 +- charts/llm-d/crds/modelservice-crd.yaml | 286 ++++++++++++++++++ .../templates/modelservice/deployment.yaml | 9 +- .../rbac/manager_clusterrole.yaml | 12 + charts/llm-d/values.schema.json | 6 +- charts/llm-d/values.schema.tmpl.json | 6 +- charts/llm-d/values.yaml | 8 +- 9 files changed, 356 insertions(+), 23 deletions(-) diff --git a/Makefile b/Makefile index 430e490..e2f78b2 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,11 @@ SHELL := /usr/bin/env bash NAMESPACE ?= hc4ai-operator CHART ?= charts/llm-d +MS_VERSION ?= v0.0.15 +EPP_VERSION ?= v0.1.0 +VLLM_VERSION ?= 0.0.8 +ROUTING_PROXY_VERSION ?= 0.0.7 +INFERENCE_SIM_VERSION ?= 0.0.4 .PHONY: help help: ## Print help @@ -45,10 +50,39 @@ helm-uninstall: ## Uninstall the Helm release .Phony: bump-modelservice-crd bump-modelservice-crd: - git clone git@github.com:llm-d/llm-d-model-service.git + git clone git@github.com:llm-d/llm-d-model-service.git -b $(MS_VERSION) --depth=1 kustomize build llm-d-model-service/config/crd > charts/llm-d/crds/modelservice-crd.yaml rm -rf llm-d-model-service -.Phony: bump-chart-version +# Setting SED allows macos users to install GNU sed and use the latter +# instead of the default BSD sed. +ifeq ($(shell command -v gsed 2>/dev/null),) + SED ?= $(shell command -v sed) +else + SED ?= $(shell command -v gsed) +endif +ifeq ($(shell ${SED} --version 2>&1 | grep -q GNU; echo $$?),1) + $(error !!! GNU sed is required. If on OS X, use 'brew install gnu-sed'.) +endif + +VALUES_FILE := charts/llm-d/values.yaml + +.Phony: bump-image-tags +bump-image-tags: + @echo "Updating image tags in $(VALUES_FILE)..." + # Update modelservice.image.tag + $(SED) -i '/^modelservice:/,/^[a-zA-Z]/ { /^ image:/,/^ [a-zA-Z]/ { s/^\( tag: \).*$$/\1"$(MS_VERSION)"/; } }' $(VALUES_FILE) + # Update modelservice.epp.image.tag + $(SED) -i '/^modelservice:/,/^[a-zA-Z]/ { /^ epp:/,/^ [a-zA-Z]/ { /^ image:/,/^ [a-zA-Z]/ { s/^\( tag: \).*$$/\1"$(EPP_VERSION)"/; } } }' $(VALUES_FILE) + # Update modelservice.vllm.image.tag + $(SED) -i '/^modelservice:/,/^[a-zA-Z]/ { /^ vllm:/,/^ [a-zA-Z]/ { /^ image:/,/^ [a-zA-Z]/ { s/^\( tag: \).*$$/\1"$(VLLM_VERSION)"/; } } }' $(VALUES_FILE) + # Update modelservice.routingProxy.image.tag + $(SED) -i '/^modelservice:/,/^[a-zA-Z]/ { /^ routingProxy:/,/^ [a-zA-Z]/ { /^ image:/,/^ [a-zA-Z]/ { s/^\( tag: \).*$$/\1"$(ROUTING_PROXY_VERSION)"/; } } }' $(VALUES_FILE) + # Update modelservice.inferenceSimulator.image.tag + $(SED) -i '/^modelservice:/,/^[a-zA-Z]/ { /^ inferenceSimulator:/,/^ [a-zA-Z]/ { /^ image:/,/^ [a-zA-Z]/ { s/^\( tag: \).*$$/\1"$(INFERENCE_SIM_VERSION)"/; } } }' $(VALUES_FILE) + @echo "Image tags updated successfully!" + +.PHONY: bump-chart-version +# Bump Helm chart version, usage: make bump-chart-version bump_type=[patch|minor|major] bump-chart-version: - helpers/scripts/increment-chart-version.sh + helpers/scripts/increment-chart-version.sh $(bump_type) diff --git a/charts/llm-d/Chart.yaml b/charts/llm-d/Chart.yaml index 4e4aea0..1a00ac0 100644 --- a/charts/llm-d/Chart.yaml +++ b/charts/llm-d/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v2 name: llm-d type: application -version: 1.0.22 +version: 1.0.23 appVersion: "0.1" icon: data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiBlbmNvZGluZz0iVVRGLTgiIHN0YW5kYWxvbmU9Im5vIj8+CjwhLS0gQ3JlYXRlZCB3aXRoIElua3NjYXBlIChodHRwOi8vd3d3Lmlua3NjYXBlLm9yZy8pIC0tPgoKPHN2ZwogICB3aWR0aD0iODBtbSIKICAgaGVpZ2h0PSI4MG1tIgogICB2aWV3Qm94PSIwIDAgODAuMDAwMDA0IDgwLjAwMDAwMSIKICAgdmVyc2lvbj0iMS4xIgogICBpZD0ic3ZnMSIKICAgeG1sOnNwYWNlPSJwcmVzZXJ2ZSIKICAgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIgogICB4bWxuczpzdmc9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48ZGVmcwogICAgIGlkPSJkZWZzMSIgLz48cGF0aAogICAgIHN0eWxlPSJmaWxsOiM0ZDRkNGQ7ZmlsbC1vcGFjaXR5OjE7c3Ryb2tlOiM0ZDRkNGQ7c3Ryb2tlLXdpZHRoOjIuMzQyOTk7c3Ryb2tlLW1pdGVybGltaXQ6MTA7c3Ryb2tlLWRhc2hhcnJheTpub25lIgogICAgIGQ9Im0gNTEuNjI5Nyw0My4wNzY3IGMgLTAuODI1NCwwIC0xLjY1MDgsMC4yMTI4IC0yLjM4ODEsMC42Mzg0IGwgLTEwLjcyNjksNi4xOTI2IGMgLTEuNDc2MywwLjg1MjIgLTIuMzg3MywyLjQzNDUgLTIuMzg3Myw0LjEzNTQgdiAxMi4zODQ3IGMgMCwxLjcwNDEgMC45MTI4LDMuMjg1NCAyLjM4ODUsNC4xMzU4IGwgMTAuNzI1Nyw2LjE5MTggYyAxLjQ3NDcsMC44NTEzIDMuMzAxNSwwLjg1MTMgNC43NzYyLDAgTCA2NC43NDQ3LDcwLjU2MzIgQyA2Ni4yMjEsNjkuNzExIDY3LjEzMiw2OC4xMjg4IDY3LjEzMiw2Ni40Mjc4IFYgNTQuMDQzMSBjIDAsLTEuNzAzNiAtMC45MTIzLC0zLjI4NDggLTIuMzg3MywtNC4xMzU0IGwgLThlLTQsLTRlLTQgLTEwLjcyNjEsLTYuMTkyMiBjIC0wLjczNzQsLTAuNDI1NiAtMS41NjI3LC0wLjYzODQgLTIuMzg4MSwtMC42Mzg0IHogbSAwLDMuNzM5NyBjIDAuMTc3NCwwIDAuMzU0NiwwLjA0NyAwLjUxNjcsMC4xNDA2IGwgMTAuNzI3Niw2LjE5MjUgNGUtNCw0ZS00IGMgMC4zMTkzLDAuMTg0IDAuNTE0MywwLjUyMDMgMC41MTQzLDAuODkzMiB2IDEyLjM4NDcgYyAwLDAuMzcyMSAtMC4xOTI3LDAuNzA3MyAtMC41MTU1LDAuODkzNiBsIC0xMC43MjY4LDYuMTkyMiBjIC0wLjMyNDMsMC4xODcyIC0wLjcwOTEsMC4xODcyIC0xLjAzMzQsMCBsIC0xMC43MjcyLC02LjE5MjYgLThlLTQsLTRlLTQgQyA0MC4wNjU3LDY3LjEzNjcgMzkuODcwNyw2Ni44MDA3IDM5Ljg3MDcsNjYuNDI3OCBWIDU0LjA0MzEgYyAwLC0wLjM3MiAwLjE5MjcsLTAuNzA3NyAwLjUxNTUsLTAuODk0IEwgNTEuMTEzLDQ2Ljk1NyBjIDAuMTYyMSwtMC4wOTQgMC4zMzkzLC0wLjE0MDYgMC41MTY3LC0wLjE0MDYgeiIKICAgICBpZD0icGF0aDEyMiIgLz48cGF0aAogICAgIGlkPSJwYXRoMTI0IgogICAgIHN0eWxlPSJmaWxsOiM0ZDRkNGQ7ZmlsbC1vcGFjaXR5OjE7c3Ryb2tlOiM0ZDRkNGQ7c3Ryb2tlLXdpZHRoOjIuMzQyOTk7c3Ryb2tlLWxpbmVjYXA6cm91bmQ7c3Ryb2tlLW1pdGVybGltaXQ6MTA7c3Ryb2tlLWRhc2hhcnJheTpub25lIgogICAgIGQ9Im0gNjMuMzg5MDE4LDM0LjgxOTk1OCB2IDIyLjM0NDE3NSBhIDEuODcxNTQzLDEuODcxNTQzIDAgMCAwIDEuODcxNTQxLDEuODcxNTQxIDEuODcxNTQzLDEuODcxNTQzIDAgMCAwIDEuODcxNTQxLC0xLjg3MTU0MSBWIDMyLjY1ODY0NyBaIiAvPjxwYXRoCiAgICAgc3R5bGU9ImZpbGw6IzdmMzE3ZjtmaWxsLW9wYWNpdHk6MTtzdHJva2U6IzdmMzE3ZjtzdHJva2Utd2lkdGg6Mi4yNDM7c3Ryb2tlLW1pdGVybGltaXQ6MTA7c3Ryb2tlLWRhc2hhcnJheTpub25lO3N0cm9rZS1vcGFjaXR5OjEiCiAgICAgZD0ibSAzNi43MzQyLDI4LjIzNDggYyAwLjQwOTcsMC43MTY1IDEuMDA0MiwxLjMyNzMgMS43Mzk4LDEuNzU2MSBsIDEwLjcwMSw2LjIzNzIgYyAxLjQ3MjcsMC44NTg0IDMuMjk4NCwwLjg2MzcgNC43NzUsMC4wMTkgbCAxMC43NTA2LC02LjE0ODUgYyAxLjQ3OTMsLTAuODQ2IDIuMzk4NywtMi40MjM0IDIuNDA0NCwtNC4xMjY3IGwgMC4wNSwtMTIuMzg0NCBjIDAuMDEsLTEuNzAyOSAtMC45LC0zLjI4ODYgLTIuMzcxMiwtNC4xNDYxIEwgNTQuMDgzMiwzLjIwNCBDIDUyLjYxMDUsMi4zNDU1IDUwLjc4NDcsMi4zNDAyIDQ5LjMwODIsMy4xODUgTCAzOC41NTc1LDkuMzMzNSBjIC0xLjQ3ODksMC44NDU4IC0yLjM5ODQsMi40MjI3IC0yLjQwNDYsNC4xMjU0IGwgMTBlLTUsOGUtNCAtMC4wNSwxMi4zODUgYyAwLDAuODUxNSAwLjIyMTYsMS42NzM1IDAuNjMxNCwyLjM5IHogbSAzLjI0NjMsLTEuODU2NiBjIC0wLjA4OCwtMC4xNTQgLTAuMTM1MywtMC4zMzExIC0wLjEzNDUsLTAuNTE4MyBsIDAuMDUsLTEyLjM4NjYgMmUtNCwtNmUtNCBjIDAsLTAuMzY4NCAwLjE5NjMsLTAuNzA0NyAwLjUyLC0wLjg4OTkgTCA1MS4xNjY5LDYuNDM0MyBjIDAuMzIyOSwtMC4xODQ3IDAuNzA5NywtMC4xODM4IDEuMDMxNiwwIGwgMTAuNzAwNiw2LjIzNzQgYyAwLjMyMzUsMC4xODg1IDAuNTE0NSwwLjUyMjYgMC41MTMsMC44OTcgbCAtMC4wNSwxMi4zODYyIHYgOWUtNCBjIDAsMC4zNjg0IC0wLjE5NiwwLjcwNDUgLTAuNTE5NywwLjg4OTYgbCAtMTAuNzUwNiw2LjE0ODUgYyAtMC4zMjMsMC4xODQ3IC0wLjcxMDEsMC4xODQgLTEuMDMyLDAgTCA0MC4zNTkyLDI2Ljc1NjcgYyAtMC4xNjE3LC0wLjA5NCAtMC4yOTA1LC0wLjIyNDggLTAuMzc4NSwtMC4zNzg4IHoiCiAgICAgaWQ9InBhdGgxMjYiIC8+PHBhdGgKICAgICBpZD0icGF0aDEyOSIKICAgICBzdHlsZT0iZmlsbDojN2YzMTdmO2ZpbGwtb3BhY2l0eToxO3N0cm9rZTojN2YzMTdmO3N0cm9rZS13aWR0aDoyLjI0MztzdHJva2UtbGluZWNhcDpyb3VuZDtzdHJva2UtbWl0ZXJsaW1pdDoxMDtzdHJva2UtZGFzaGFycmF5Om5vbmU7c3Ryb2tlLW9wYWNpdHk6MSIKICAgICBkPSJNIDIzLjcyODgzNSwyMi4xMjYxODUgNDMuMTI0OTI0LDExLjAzMzIyIEEgMS44NzE1NDMsMS44NzE1NDMgMCAwIDAgNDMuODIwMzkxLDguNDc5NDY2NiAxLjg3MTU0MywxLjg3MTU0MyAwIDAgMCA0MS4yNjY2MzcsNy43ODM5OTk4IEwgMTkuOTk0NDAxLDE5Ljk0OTk2NyBaIiAvPjxwYXRoCiAgICAgc3R5bGU9ImZpbGw6IzdmMzE3ZjtmaWxsLW9wYWNpdHk6MTtzdHJva2U6IzdmMzE3ZjtzdHJva2Utd2lkdGg6Mi4yNDM7c3Ryb2tlLW1pdGVybGltaXQ6MTA7c3Ryb2tlLWRhc2hhcnJheTpub25lO3N0cm9rZS1vcGFjaXR5OjEiCiAgICAgZD0ibSAzMS40NzY2LDQ4LjQ1MDQgYyAwLjQxNDUsLTAuNzEzOCAwLjY0NSwtMS41MzQ0IDAuNjQ3MiwtMi4zODU4IGwgMC4wMzIsLTEyLjM4NiBjIDAsLTEuNzA0NiAtMC45MDY0LC0zLjI4NyAtMi4zNzczLC00LjE0MTIgTCAxOS4wNjg4LDIzLjMxOCBjIC0xLjQ3MzcsLTAuODU1OCAtMy4yOTk1LC0wLjg2MDUgLTQuNzc2LC0wLjAxMSBMIDMuNTUyMSwyOS40NzI3IGMgLTEuNDc2OCwwLjg0NzggLTIuMzk0MiwyLjQyNzUgLTIuMzk4Niw0LjEzMDQgbCAtMC4wMzIsMTIuMzg1NyBjIDAsMS43MDQ3IDAuOTA2MywzLjI4NzEgMi4zNzcyLDQuMTQxMiBsIDEwLjcwOTgsNi4yMTk1IGMgMS40NzMyLDAuODU1NSAzLjI5ODcsMC44NjA2IDQuNzc1LDAuMDEyIGwgNmUtNCwtNGUtNCAxMC43NDEyLC02LjE2NTggYyAwLjczODUsLTAuNDIzOSAxLjMzNjksLTEuMDMwOCAxLjc1MTUsLTEuNzQ0NSB6IG0gLTMuMjM0LC0xLjg3ODEgYyAtMC4wODksMC4xNTM0IC0wLjIxODYsMC4yODMxIC0wLjM4MSwwLjM3NjMgbCAtMTAuNzQyMyw2LjE2NyAtNmUtNCwyZS00IGMgLTAuMzE5NCwwLjE4MzYgLTAuNzA4MiwwLjE4MzQgLTEuMDMwNywwIEwgNS4zNzgyLDQ2Ljg5NjQgQyA1LjA1NjUsNDYuNzA5NiA0Ljg2MzMsNDYuMzc0NSA0Ljg2NDMsNDYuMDAxOSBsIDAuMDMyLC0xMi4zODU4IGMgMCwtMC4zNzQ0IDAuMTk0MiwtMC43MDcyIDAuNTE4OSwtMC44OTM2IGwgMTAuNzQyMiwtNi4xNjY3IDZlLTQsLTRlLTQgYyAwLjMxOTQsLTAuMTgzNyAwLjcwNzgsLTAuMTgzNyAxLjAzMDMsMCBsIDEwLjcwOTgsNi4yMTk0IGMgMC4zMjE3LDAuMTg2OSAwLjUxNTIsMC41MjIxIDAuNTE0MiwwLjg5NDggbCAtMC4wMzIsMTIuMzg1NiBjIC00ZS00LDAuMTg3MiAtMC4wNDksMC4zNjQxIC0wLjEzNzksMC41MTc0IHoiCiAgICAgaWQ9InBhdGgxMzkiIC8+PHBhdGgKICAgICBpZD0icGF0aDE0MSIKICAgICBzdHlsZT0iZmlsbDojN2YzMTdmO2ZpbGwtb3BhY2l0eToxO3N0cm9rZTojN2YzMTdmO3N0cm9rZS13aWR0aDoyLjI0MztzdHJva2UtbGluZWNhcDpyb3VuZDtzdHJva2UtbWl0ZXJsaW1pdDoxMDtzdHJva2UtZGFzaGFycmF5Om5vbmU7c3Ryb2tlLW9wYWNpdHk6MSIKICAgICBkPSJNIDMyLjcxMTI5OSw2Mi43NjU3NDYgMTMuMzg4OTY5LDUxLjU0NDc5OCBhIDEuODcxNTQzLDEuODcxNTQzIDAgMCAwIC0yLjU1ODI5NSwwLjY3ODU2OCAxLjg3MTU0MywxLjg3MTU0MyAwIDAgMCAwLjY3ODU2OSwyLjU1ODI5NiBsIDIxLjE5MTM0NCwxMi4zMDYzMyB6IiAvPjwvc3ZnPgo= description: llm-d is a Kubernetes-native high-performance distributed LLM inference framework diff --git a/charts/llm-d/README.md b/charts/llm-d/README.md index 896fa2d..97fa970 100644 --- a/charts/llm-d/README.md +++ b/charts/llm-d/README.md @@ -1,7 +1,7 @@ # llm-d Helm Chart -![Version: 1.0.22](https://img.shields.io/badge/Version-1.0.22-informational?style=flat-square) +![Version: 1.0.23](https://img.shields.io/badge/Version-1.0.23-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) llm-d is a Kubernetes-native high-performance distributed LLM inference framework @@ -194,7 +194,7 @@ Kubernetes: `>= 1.30.0-0` | modelservice.epp.image.pullSecrets | Optionally specify an array of imagePullSecrets (evaluated as templates) | list | `[]` | | modelservice.epp.image.registry | Endpoint picker image registry | string | `"ghcr.io"` | | modelservice.epp.image.repository | Endpoint picker image repository | string | `"llm-d/llm-d-inference-scheduler"` | -| modelservice.epp.image.tag | Endpoint picker image tag | string | `"0.0.4"` | +| modelservice.epp.image.tag | Endpoint picker image tag | string | `"v0.1.0"` | | modelservice.epp.metrics | Enable metrics gathering via podMonitor / ServiceMonitor | object | `{"enabled":true,"serviceMonitor":{"annotations":{},"interval":"10s","labels":{},"namespaceSelector":{"any":false,"matchNames":[]},"path":"/metrics","port":"metrics","selector":{"matchLabels":{}}}}` | | modelservice.epp.metrics.enabled | Enable metrics scraping from endpoint picker service | bool | `true` | | modelservice.epp.metrics.serviceMonitor | Prometheus ServiceMonitor configuration
Ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md | object | See below | @@ -215,7 +215,7 @@ Kubernetes: `>= 1.30.0-0` | modelservice.image.pullSecrets | Optionally specify an array of imagePullSecrets (evaluated as templates) | list | `[]` | | modelservice.image.registry | Model Service controller image registry | string | `"ghcr.io"` | | modelservice.image.repository | Model Service controller image repository | string | `"llm-d/llm-d-model-service"` | -| modelservice.image.tag | Model Service controller image tag | string | `"0.0.10"` | +| modelservice.image.tag | Model Service controller image tag | string | `"v0.0.15"` | | modelservice.inferenceSimulator | llm-d inference simulator container options | object | See below | | modelservice.inferenceSimulator.containerSecurityContext | Security settings for a Container.
Ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container | object | `{}` | | modelservice.inferenceSimulator.image | llm-d inference simulator image used in ModelService CR presets | object | See below | @@ -253,12 +253,12 @@ Kubernetes: `>= 1.30.0-0` | modelservice.replicas | Number of controller replicas | int | `1` | | modelservice.routingProxy | Routing proxy container options | object | See below | | modelservice.routingProxy.containerSecurityContext | Security settings for a Container.
Ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container | object | `{}` | -| modelservice.routingProxy.image | Routing proxy image used in ModelService CR presets | object | `{"imagePullPolicy":"IfNotPresent","pullSecrets":[],"registry":"ghcr.io","repository":"llm-d/llm-d-routing-sidecar","tag":"0.0.6"}` | +| modelservice.routingProxy.image | Routing proxy image used in ModelService CR presets | object | `{"imagePullPolicy":"IfNotPresent","pullSecrets":[],"registry":"ghcr.io","repository":"llm-d/llm-d-routing-sidecar","tag":"0.0.7"}` | | modelservice.routingProxy.image.imagePullPolicy | Specify a imagePullPolicy | string | `"IfNotPresent"` | | modelservice.routingProxy.image.pullSecrets | Optionally specify an array of imagePullSecrets (evaluated as templates) | list | `[]` | | modelservice.routingProxy.image.registry | Routing proxy image registry | string | `"ghcr.io"` | | modelservice.routingProxy.image.repository | Routing proxy image repository | string | `"llm-d/llm-d-routing-sidecar"` | -| modelservice.routingProxy.image.tag | Routing proxy image tag | string | `"0.0.6"` | +| modelservice.routingProxy.image.tag | Routing proxy image tag | string | `"0.0.7"` | | modelservice.service.enabled | Toggle to deploy a Service resource for Model service controller | bool | `true` | | modelservice.service.port | Port number exposed from Model Service controller | int | `8443` | | modelservice.service.type | Service type | string | `"ClusterIP"` | diff --git a/charts/llm-d/crds/modelservice-crd.yaml b/charts/llm-d/crds/modelservice-crd.yaml index 78571ad..2448c36 100644 --- a/charts/llm-d/crds/modelservice-crd.yaml +++ b/charts/llm-d/crds/modelservice-crd.yaml @@ -348,6 +348,16 @@ spec: description: Image that is used to spawn container if present will override base config type: string + mountModelVolume: + description: |- + Boolean to indicate mounting the model artifacts to this container + For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache + For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference, + and the value is mounted to an environment variable called HF_TOKEN + For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/) + is created and mounted with the mountPath oci-dir + default:false + type: boolean name: description: |- Name of the container specified as a DNS_LABEL. @@ -639,6 +649,16 @@ spec: description: Image that is used to spawn container if present will override base config type: string + mountModelVolume: + description: |- + Boolean to indicate mounting the model artifacts to this container + For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache + For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference, + and the value is mounted to an environment variable called HF_TOKEN + For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/) + is created and mounted with the mountPath oci-dir + default:false + type: boolean name: description: |- Name of the container specified as a DNS_LABEL. @@ -965,6 +985,16 @@ spec: description: Image that is used to spawn container if present will override base config type: string + mountModelVolume: + description: |- + Boolean to indicate mounting the model artifacts to this container + For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache + For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference, + and the value is mounted to an environment variable called HF_TOKEN + For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/) + is created and mounted with the mountPath oci-dir + default:false + type: boolean name: description: |- Name of the container specified as a DNS_LABEL. @@ -1256,6 +1286,16 @@ spec: description: Image that is used to spawn container if present will override base config type: string + mountModelVolume: + description: |- + Boolean to indicate mounting the model artifacts to this container + For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache + For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference, + and the value is mounted to an environment variable called HF_TOKEN + For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/) + is created and mounted with the mountPath oci-dir + default:false + type: boolean name: description: |- Name of the container specified as a DNS_LABEL. @@ -1610,6 +1650,16 @@ spec: description: Image that is used to spawn container if present will override base config type: string + mountModelVolume: + description: |- + Boolean to indicate mounting the model artifacts to this container + For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache + For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference, + and the value is mounted to an environment variable called HF_TOKEN + For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/) + is created and mounted with the mountPath oci-dir + default:false + type: boolean name: description: |- Name of the container specified as a DNS_LABEL. @@ -1901,6 +1951,16 @@ spec: description: Image that is used to spawn container if present will override base config type: string + mountModelVolume: + description: |- + Boolean to indicate mounting the model artifacts to this container + For URIs with pvc:// prefix, a model-storage volume is created and mounted with the mountPath: /cache + For URIs with hf:// prefix, modelArtifact.authSecretName is used as the secret key reference, + and the value is mounted to an environment variable called HF_TOKEN + For URIs with oci:// prefix, an OCI volume with image reference (https://kubernetes.io/blog/2024/08/16/kubernetes-1-31-image-volume-source/) + is created and mounted with the mountPath oci-dir + default:false + type: boolean name: description: |- Name of the container specified as a DNS_LABEL. @@ -2002,6 +2062,226 @@ spec: description: Routing provides information needed to create configuration for routing properties: + gatewayRefs: + description: |- + GatewayRef is merged to baseconfig based on the Name field. + Directly from Gateway API: https://gateway-api.sigs.k8s.io/reference/spec/#commonroutespec + ParentRefs references the resources (usually Gateways) that a Route wants + to be attached to. Note that the referenced parent resource needs to + allow this for the attachment to be complete. For Gateways, that means + the Gateway needs to allow attachment from Routes of this kind and + namespace. For Services, that means the Service must either be in the same + namespace for a "producer" route, or the mesh implementation must support + and allow "consumer" routes for the referenced Service. ReferenceGrant is + not applicable for governing ParentRefs to Services - it is not possible to + create a "producer" route for a Service in a different namespace from the + Route. + + There are two kinds of parent resources with "Core" support: + + * Gateway (Gateway conformance profile) + * Service (Mesh conformance profile, ClusterIP Services only) + + This API may be extended in the future to support additional kinds of parent + resources. + + ParentRefs must be _distinct_. This means either that: + + * They select different objects. If this is the case, then parentRef + entries are distinct. In terms of fields, this means that the + multi-part key defined by `group`, `kind`, `namespace`, and `name` must + be unique across all parentRef entries in the Route. + * They do not select different objects, but for each optional field used, + each ParentRef that selects the same object must set the same set of + optional fields to different values. If one ParentRef sets a + combination of optional fields, all must set the same combination. + + Some examples: + + * If one ParentRef sets `sectionName`, all ParentRefs referencing the + same object must also set `sectionName`. + * If one ParentRef sets `port`, all ParentRefs referencing the same + object must also set `port`. + * If one ParentRef sets `sectionName` and `port`, all ParentRefs + referencing the same object must also set `sectionName` and `port`. + + It is possible to separately reference multiple distinct objects that may + be collapsed by an implementation. For example, some implementations may + choose to merge compatible Gateway Listeners together. If that is the + case, the list of routes attached to those resources should also be + merged. + + Note that for ParentRefs that cross namespace boundaries, there are specific + rules. Cross-namespace references are only valid if they are explicitly + allowed by something in the namespace they are referring to. For example, + Gateway has the AllowedRoutes field, and ReferenceGrant provides a + generic way to enable other kinds of cross-namespace reference. + + + ParentRefs from a Route to a Service in the same namespace are "producer" + routes, which apply default routing rules to inbound connections from + any namespace to the Service. + + ParentRefs from a Route to a Service in a different namespace are + "consumer" routes, and these routing rules are only applied to outbound + connections originating from the same namespace as the Route, for which + the intended destination of the connections are a Service targeted as a + ParentRef of the Route. + + + + + + + items: + description: |- + ParentReference identifies an API object (usually a Gateway) that can be considered + a parent of this resource (usually a route). There are two kinds of parent resources + with "Core" support: + + * Gateway (Gateway conformance profile) + * Service (Mesh conformance profile, ClusterIP Services only) + + This API may be extended in the future to support additional kinds of parent + resources. + + The API object must be valid in the cluster; the Group and Kind must + be registered in the cluster for this reference to be valid. + properties: + group: + default: gateway.networking.k8s.io + description: |- + Group is the group of the referent. + When unspecified, "gateway.networking.k8s.io" is inferred. + To set the core API group (such as for a "Service" kind referent), + Group must be explicitly set to "" (empty string). + + Support: Core + maxLength: 253 + pattern: ^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + kind: + default: Gateway + description: |- + Kind is kind of the referent. + + There are two kinds of parent resources with "Core" support: + + * Gateway (Gateway conformance profile) + * Service (Mesh conformance profile, ClusterIP Services only) + + Support for other resources is Implementation-Specific. + maxLength: 63 + minLength: 1 + pattern: ^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$ + type: string + name: + description: |- + Name is the name of the referent. + + Support: Core + maxLength: 253 + minLength: 1 + type: string + namespace: + description: |- + Namespace is the namespace of the referent. When unspecified, this refers + to the local namespace of the Route. + + Note that there are specific rules for ParentRefs which cross namespace + boundaries. Cross-namespace references are only valid if they are explicitly + allowed by something in the namespace they are referring to. For example: + Gateway has the AllowedRoutes field, and ReferenceGrant provides a + generic way to enable any other kind of cross-namespace reference. + + + ParentRefs from a Route to a Service in the same namespace are "producer" + routes, which apply default routing rules to inbound connections from + any namespace to the Service. + + ParentRefs from a Route to a Service in a different namespace are + "consumer" routes, and these routing rules are only applied to outbound + connections originating from the same namespace as the Route, for which + the intended destination of the connections are a Service targeted as a + ParentRef of the Route. + + + Support: Core + maxLength: 63 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ + type: string + port: + description: |- + Port is the network port this Route targets. It can be interpreted + differently based on the type of parent resource. + + When the parent resource is a Gateway, this targets all listeners + listening on the specified port that also support this kind of Route(and + select this Route). It's not recommended to set `Port` unless the + networking behaviors specified in a Route must apply to a specific port + as opposed to a listener(s) whose port(s) may be changed. When both Port + and SectionName are specified, the name and port of the selected listener + must match both specified values. + + + When the parent resource is a Service, this targets a specific port in the + Service spec. When both Port (experimental) and SectionName are specified, + the name and port of the selected port must match both specified values. + + + Implementations MAY choose to support other parent resources. + Implementations supporting other types of parent resources MUST clearly + document how/if Port is interpreted. + + For the purpose of status, an attachment is considered successful as + long as the parent resource accepts it partially. For example, Gateway + listeners can restrict which Routes can attach to them by Route kind, + namespace, or hostname. If 1 of 2 Gateway listeners accept attachment + from the referencing Route, the Route MUST be considered successfully + attached. If no Gateway listeners accept attachment from this Route, + the Route MUST be considered detached from the Gateway. + + Support: Extended + format: int32 + maximum: 65535 + minimum: 1 + type: integer + sectionName: + description: |- + SectionName is the name of a section within the target resource. In the + following resources, SectionName is interpreted as the following: + + * Gateway: Listener name. When both Port (experimental) and SectionName + are specified, the name and port of the selected listener must match + both specified values. + * Service: Port name. When both Port (experimental) and SectionName + are specified, the name and port of the selected listener must match + both specified values. + + Implementations MAY choose to support attaching Routes to other resources. + If that is the case, they MUST clearly document how SectionName is + interpreted. + + When unspecified (empty string), this will reference the entire resource. + For the purpose of status, an attachment is considered successful if at + least one section in the parent resource accepts it. For example, Gateway + listeners can restrict which Routes can attach to them by Route kind, + namespace, or hostname. If 1 of 2 Gateway listeners accept attachment from + the referencing Route, the Route MUST be considered successfully + attached. If no Gateway listeners accept attachment from this Route, the + Route MUST be considered detached from the Gateway. + + Support: Core + maxLength: 253 + minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string + required: + - name + type: object + maxItems: 32 + type: array modelName: description: |- // CreateInferencePool indicates if inference pool resource will be created @@ -2168,6 +2448,12 @@ spec: if EppRoleBinding is yet to be created, this reference will be nil type: string + httpRouteRef: + description: |- + HTTPRoute identifies the HTTPRoute resource + if HTTPRoute is yet to be created, + this reference will be nil + type: string inferenceModelRef: description: |- InferenceModelRef identifies the inference model resource diff --git a/charts/llm-d/templates/modelservice/deployment.yaml b/charts/llm-d/templates/modelservice/deployment.yaml index 2331650..854a1ec 100644 --- a/charts/llm-d/templates/modelservice/deployment.yaml +++ b/charts/llm-d/templates/modelservice/deployment.yaml @@ -58,7 +58,11 @@ spec: {{- include "common.tplvalues.render" ( dict "value" .Values.modelservice.tolerations "context" $) | nindent 8 }} {{- end }} containers: - - args: + - name: manager + command: + - /manager + - run + args: - --leader-elect=false - --health-probe-bind-address=:8081 - --epp-cluster-role @@ -67,8 +71,6 @@ spec: - {{ include "common.images.renderImagePullSecretsString" (dict "images" (list .Values.modelservice.epp.image) "context" $) }} - --pd-pull-secrets - {{ include "common.images.renderImagePullSecretsString" (dict "images" (list .Values.modelservice.vllm.image) "context" $) }} - command: - - /manager image: {{ include "modelservice.image" . }} imagePullPolicy: {{ .Values.modelservice.image.imagePullPolicy }} {{- if .Values.modelservice.containerSecurityContext }} @@ -81,7 +83,6 @@ spec: port: 8081 initialDelaySeconds: 15 periodSeconds: 20 - name: manager readinessProbe: httpGet: path: /readyz diff --git a/charts/llm-d/templates/modelservice/rbac/manager_clusterrole.yaml b/charts/llm-d/templates/modelservice/rbac/manager_clusterrole.yaml index 33425ff..15ff800 100644 --- a/charts/llm-d/templates/modelservice/rbac/manager_clusterrole.yaml +++ b/charts/llm-d/templates/modelservice/rbac/manager_clusterrole.yaml @@ -77,6 +77,18 @@ rules: - get - list - watch +- apiGroups: + - gateway.networking.k8s.io + resources: + - httproutes + verbs: + - create + - delete + - get + - list + - patch + - update + - watch - apiGroups: - inference.networking.x-k8s.io resources: diff --git a/charts/llm-d/values.schema.json b/charts/llm-d/values.schema.json index 4c948a4..6c0f643 100644 --- a/charts/llm-d/values.schema.json +++ b/charts/llm-d/values.schema.json @@ -4025,7 +4025,7 @@ "title": "repository" }, "tag": { - "default": "0.0.4", + "default": "v0.1.0", "description": "Endpoint picker image tag", "required": [], "title": "tag" @@ -4491,7 +4491,7 @@ "title": "repository" }, "tag": { - "default": "0.0.10", + "default": "v0.0.15", "description": "Model Service controller image tag", "required": [], "title": "tag" @@ -6580,7 +6580,7 @@ "title": "repository" }, "tag": { - "default": "0.0.6", + "default": "0.0.7", "description": "Routing proxy image tag", "required": [], "title": "tag" diff --git a/charts/llm-d/values.schema.tmpl.json b/charts/llm-d/values.schema.tmpl.json index 2fd5e3d..dd183e4 100644 --- a/charts/llm-d/values.schema.tmpl.json +++ b/charts/llm-d/values.schema.tmpl.json @@ -848,7 +848,7 @@ "title": "repository" }, "tag": { - "default": "0.0.4", + "default": "v0.1.0", "description": "Endpoint picker image tag", "required": [], "title": "tag" @@ -1018,7 +1018,7 @@ "title": "repository" }, "tag": { - "default": "0.0.10", + "default": "v0.0.15", "description": "Model Service controller image tag", "required": [], "title": "tag" @@ -1340,7 +1340,7 @@ "title": "repository" }, "tag": { - "default": "0.0.6", + "default": "0.0.7", "description": "Routing proxy image tag", "required": [], "title": "tag" diff --git a/charts/llm-d/values.yaml b/charts/llm-d/values.yaml index 1255c5a..9f40d64 100644 --- a/charts/llm-d/values.yaml +++ b/charts/llm-d/values.yaml @@ -372,7 +372,7 @@ modelservice: repository: llm-d/llm-d-model-service # -- Model Service controller image tag - tag: "0.0.10" + tag: "v0.0.15" # -- Specify a imagePullPolicy imagePullPolicy: "Always" @@ -449,7 +449,7 @@ modelservice: repository: llm-d/llm-d-inference-scheduler # -- Endpoint picker image tag - tag: 0.0.4 + tag: "v0.1.0" # -- Specify a imagePullPolicy imagePullPolicy: "Always" @@ -778,7 +778,7 @@ modelservice: repository: llm-d/llm-d # -- llm-d image tag - tag: 0.0.8 + tag: "0.0.8" # -- Specify a imagePullPolicy imagePullPolicy: "IfNotPresent" @@ -815,7 +815,7 @@ modelservice: repository: llm-d/llm-d-routing-sidecar # -- Routing proxy image tag - tag: "0.0.6" + tag: "0.0.7" # -- Specify a imagePullPolicy imagePullPolicy: "IfNotPresent"