diff --git a/.github/workflows/build-push-images.yml b/.github/workflows/build-push-images.yml
index 9a491385..edfac190 100644
--- a/.github/workflows/build-push-images.yml
+++ b/.github/workflows/build-push-images.yml
@@ -19,6 +19,7 @@ jobs:
           - chat
           - image-analysis
           - flux-image-gen
+          - omni
     permissions:
       contents: read
       id-token: write         # needed for signing the images with GitHub OIDC Token
diff --git a/charts/azimuth-omni-backend/.helmignore b/charts/azimuth-omni-backend/.helmignore
new file mode 100644
index 00000000..1924f397
--- /dev/null
+++ b/charts/azimuth-omni-backend/.helmignore
@@ -0,0 +1,33 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
+
+# Others
+README.md
+Dockerfile
+*kubeconfig.y[a]ml
+venv/
+__pycache__/
+images/
+*.secret
+ci/
diff --git a/charts/azimuth-omni-backend/Chart.yaml b/charts/azimuth-omni-backend/Chart.yaml
new file mode 100644
index 00000000..9ae99ec5
--- /dev/null
+++ b/charts/azimuth-omni-backend/Chart.yaml
@@ -0,0 +1,16 @@
+apiVersion: v2
+name: azimuth-llm-omni-backend
+description: |
+  In-cluster vLLM backends for the Omni multimodal interface
+  (text-to-text / chat, text-to-speech, text-to-image).
+maintainers:
+  - name: "Victor HANG"
+    email: victor@stackhpc.com
+
+type: application
+
+# The version and appVersion are updated by the chart build script
+version: 0.1.0
+appVersion: local
+
+icon: https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg
diff --git a/charts/azimuth-omni-backend/ci/test-values.yaml b/charts/azimuth-omni-backend/ci/test-values.yaml
new file mode 100644
index 00000000..3131f547
--- /dev/null
+++ b/charts/azimuth-omni-backend/ci/test-values.yaml
@@ -0,0 +1,13 @@
+# CI: only exercise the TTT backend with the smallest possible model.
+ttt:
+  enabled: true
+  huggingface:
+    model: HuggingFaceTB/SmolLM2-135M-Instruct
+  api:
+    monitoring:
+      enabled: false
+    gpus: 0
+tts:
+  enabled: false
+tti:
+  enabled: false
diff --git a/charts/azimuth-omni-backend/templates/NOTES.txt b/charts/azimuth-omni-backend/templates/NOTES.txt
new file mode 100644
index 00000000..e11a1eca
--- /dev/null
+++ b/charts/azimuth-omni-backend/templates/NOTES.txt
@@ -0,0 +1,17 @@
+Azimuth Omni backends provide one or more in-cluster vLLM model deployments
+(text-to-text / chat, text-to-speech, text-to-image) for the Omni interface.
+
+Enabled backends in this release:
+{{- range $alias := list "ttt" "tts" "tti" }}
+{{- $sub := index $.Values $alias }}
+{{- if and $sub $sub.enabled }}
+  - {{ $alias }}: {{ $sub.huggingface.model }} (in-cluster at http://{{ $.Release.Name }}-{{ $alias }}.{{ $.Release.Namespace }}.svc)
+{{- else }}
+  - {{ $alias }}: disabled (toggle with `{{ $alias }}.enabled=true`)
+{{- end }}
+{{- end }}
+
+Each enabled backend downloads its model weights from HuggingFace on first
+start, which can take a while.
+
+Release notes: https://github.com/stackhpc/azimuth-llm/releases
diff --git a/charts/azimuth-omni-backend/templates/_backend.tpl b/charts/azimuth-omni-backend/templates/_backend.tpl
new file mode 100644
index 00000000..78d026f1
--- /dev/null
+++ b/charts/azimuth-omni-backend/templates/_backend.tpl
@@ -0,0 +1,338 @@
+{{/*
+Render a full vLLM backend (Deployment + Service + optional
+ingress/httproute/zenith/pdb/servicemonitor) for one modality.
+
+Usage:
+  {{- include "azimuth-omni-backend.backend" (dict "root" . "alias" "ttt") }}
+
+Per-modality config is read from `.Values.<alias>`.
+*/}}
+
+{{/*
+Selector labels, distinct per alias so deployments don't select each
+other's pods.
+*/}}
+{{- define "azimuth-omni-backend.backend-selectorLabels" -}}
+{{- $alias := .alias -}}
+{{- $root := .root -}}
+app.kubernetes.io/name: {{ include "azimuth-omni-backend.name" $root }}
+app.kubernetes.io/instance: {{ $root.Release.Name }}
+app.kubernetes.io/component: backend
+azimuth-omni.stackhpc.com/modality: {{ $alias }}
+{{- end }}
+
+{{/*
+Common labels for a backend resource.
+*/}}
+{{- define "azimuth-omni-backend.backend-labels" -}}
+helm.sh/chart: {{ include "azimuth-omni-backend.chart" .root }}
+{{ include "azimuth-omni-backend.backend-selectorLabels" . }}
+{{- if .root.Chart.AppVersion }}
+app.kubernetes.io/version: {{ .root.Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .root.Release.Service }}
+{{- end }}
+
+{{/*
+Conditional chat-template arg.
+*/}}
+{{- define "azimuth-omni-backend.chatTemplate" -}}
+{{- $cfg := .cfg -}}
+{{- if $cfg.chatTemplate }}
+- --chat-template
+- {{ quote $cfg.chatTemplate }}
+{{- else if contains "WizardCoder" $cfg.huggingface.model }}
+- --chat-template
+- {{ quote "{% for message in messages %}{% if message['role'] == 'system' %}{% endif %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' }}{% endif %}{% if message['role'] == 'assistant' %}{{ '### Response:\n' }}{% endif %}{{ message['content'].strip() }}{% if not loop.last %}{{ '\n\n' }}{% endif %}{% if message['role'] == 'user' and loop.last %}{{ '### Response:\n' }}{% endif %}{% endfor %}" }}
+{{- end -}}
+{{- end }}
+
+{{/*
+Renders every resource for one modality.
+*/}}
+{{- define "azimuth-omni-backend.backend" -}}
+{{- $root := .root -}}
+{{- $alias := .alias -}}
+{{- $cfg := index $root.Values $alias -}}
+{{- if not $cfg }}{{- fail (printf "azimuth-omni-backend: missing values block for backend %q" $alias) }}{{- end }}
+{{- if not $cfg.enabled -}}
+
+{{- else -}}
+{{- $name := printf "%s-%s" $root.Release.Name $alias -}}
+{{- $labelArgs := dict "root" $root "alias" $alias -}}
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ $name }}
+  labels:
+    {{- include "azimuth-omni-backend.backend-labels" $labelArgs | nindent 4 }}
+spec:
+  replicas: {{ default 1 $cfg.api.replicas }}
+  selector:
+    matchLabels:
+      {{- include "azimuth-omni-backend.backend-selectorLabels" $labelArgs | nindent 6 }}
+  strategy:
+    {{- $cfg.api.updateStrategy | toYaml | nindent 4 }}
+  template:
+    metadata:
+      labels:
+        {{- include "azimuth-omni-backend.backend-selectorLabels" $labelArgs | nindent 8 }}
+    spec:
+      containers:
+      - name: api
+        {{- if eq ($cfg.api.gpus | int) 0 }}
+        image: "ghcr.io/stackhpc/vllm-cpu:{{ $cfg.api.image.version }}"
+        {{- else if $cfg.api.intelXPUsEnabled }}
+        image: "ghcr.io/stackhpc/vllm-xpu:{{ $cfg.api.image.version }}"
+        {{- else }}
+        image: "{{ $cfg.api.image.containerImage }}:{{ $cfg.api.image.version }}"
+        {{- end }}
+        {{- if eq $cfg.api.image.containerImage "vllm/vllm-omni" }}
+        command:
+        - vllm
+        - serve
+        {{- end }}
+        ports:
+        - name: api
+          containerPort: 8000
+        volumeMounts:
+        - name: data
+          mountPath: /root/.cache/huggingface
+        - name: shm
+          mountPath: /dev/shm
+        args:
+          {{- if eq $cfg.api.image.containerImage "vllm/vllm-omni" }}
+          - --omni
+          {{- end }}
+          {{- if semverCompare "<v0.13.0" $cfg.api.image.version }}
+          - --model
+          {{- end }}
+          - {{ $cfg.huggingface.model }}
+          {{- include "azimuth-omni-backend.chatTemplate" (dict "cfg" $cfg) | nindent 10 -}}
+          {{- if $cfg.api.modelMaxContextLength }}
+          - --max-model-len
+          - {{ $cfg.api.modelMaxContextLength | quote }}
+          {{- end }}
+          {{- if and (not (has "--tokenizer-mode" $cfg.api.extraArgs)) (hasPrefix "mistralai/" $cfg.huggingface.model) }}
+          - --tokenizer-mode
+          - mistral
+          {{- end }}
+          {{- with $cfg.api.extraArgs }}
+          {{- toYaml . | nindent 10 }}
+          {{- end }}
+        {{- if $cfg.huggingface.secretName }}
+        envFrom:
+        - secretRef:
+            name: {{ $cfg.huggingface.secretName }}
+        {{- end }}
+        env:
+        - name: DO_NOT_TRACK
+          value: "1"
+        {{- with $cfg.api.extraEnv }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}
+        {{- if $cfg.huggingface.token }}
+        - name: HUGGING_FACE_HUB_TOKEN
+          value: {{ quote $cfg.huggingface.token }}
+        {{- end }}
+        readinessProbe:
+          httpGet:
+            port: 8000
+            path: /health
+          periodSeconds: 10
+        {{- if gt ($cfg.api.gpus | int) 0 }}
+        resources:
+          limits:
+            {{- if $cfg.api.intelXPUsEnabled }}
+            gpu.intel.com/i915: {{ $cfg.api.gpus | int }}
+            {{- else }}
+            nvidia.com/gpu: {{ $cfg.api.gpus | int }}
+            {{- end }}
+        {{- end }}
+      volumes:
+        - name: data
+          {{- $cfg.api.cacheVolume | toYaml | nindent 10 }}
+        - name: shm
+          emptyDir:
+            medium: Memory
+            sizeLimit: 1Gi
+      {{- with $cfg.api }}
+      {{- if .azimuthNodeGroupSelector }}
+      nodeSelector:
+        capi.stackhpc.com/node-group: {{ .azimuthNodeGroupSelector | quote }}
+      {{- else if .nodeSelector }}
+      nodeSelector:
+        {{- toYaml .nodeSelector | nindent 8 }}
+      {{- end }}
+      {{- end }}
+      {{- with $cfg.api.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with $cfg.api.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ $name }}
+  labels:
+    {{- include "azimuth-omni-backend.backend-labels" $labelArgs | nindent 4 }}
+spec:
+  ports:
+  - name: api
+    port: 80
+    protocol: TCP
+    targetPort: api
+  type: {{ $cfg.api.service.type }}
+  selector:
+    {{- include "azimuth-omni-backend.backend-selectorLabels" $labelArgs | nindent 4 }}
+{{- if and $cfg.api.monitoring $cfg.api.monitoring.enabled }}
+---
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ $name }}
+  labels:
+    app: {{ $name }}
+    {{- include "azimuth-omni-backend.backend-labels" $labelArgs | nindent 4 }}
+spec:
+  endpoints:
+  - port: api
+    path: /metrics
+  jobLabel: operator
+  namespaceSelector:
+    matchNames:
+    - {{ $root.Release.Namespace }}
+  selector:
+    matchLabels:
+      {{- include "azimuth-omni-backend.backend-selectorLabels" $labelArgs | nindent 6 }}
+{{- end }}
+{{- if and $cfg.api.service.zenith $cfg.api.service.zenith.enabled }}
+---
+apiVersion: zenith.stackhpc.com/v1alpha1
+kind: Client
+metadata:
+  name: {{ $name }}
+  labels:
+    {{- include "azimuth-omni-backend.backend-labels" $labelArgs | nindent 4 }}
+spec:
+  reservationName: {{ $name }}
+  upstream:
+    serviceName: {{ $name }}
+  auth:
+    skip: {{ $cfg.api.service.zenith.skipAuth }}
+---
+apiVersion: zenith.stackhpc.com/v1alpha1
+kind: Reservation
+metadata:
+  name: {{ $name }}
+  labels:
+    {{- include "azimuth-omni-backend.backend-labels" $labelArgs | nindent 4 }}
+  annotations:
+    azimuth.stackhpc.com/service-label: {{ quote $cfg.api.service.zenith.label }}
+    azimuth.stackhpc.com/service-icon-url: {{ $cfg.api.service.zenith.iconUrl }}
+    {{- with $cfg.api.service.zenith.description }}
+    azimuth.stackhpc.com/service-description: {{ quote . }}
+    {{- end }}
+spec:
+  credentialSecretName: {{ $name }}-zenith-credential
+{{- end }}
+{{- if $cfg.api.httpRoute.enabled }}
+---
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: {{ $name }}
+  labels:
+    {{- include "azimuth-omni-backend.backend-labels" $labelArgs | nindent 4 }}
+  {{- with $cfg.api.httpRoute.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  {{- with $cfg.api.httpRoute.parentRefs }}
+  parentRefs:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+  {{- with $cfg.api.httpRoute.hostnames }}
+  hostnames:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+  rules:
+    {{- range $cfg.api.httpRoute.rules }}
+    - {{ with .matches }}matches:
+        {{- toYaml . | nindent 8 }}
+      {{ end }}backendRefs:
+        - name: {{ $name }}
+          port: 80
+    {{- end }}
+{{- end }}
+{{- if $cfg.api.ingress.enabled }}
+---
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: {{ $name }}
+  labels:
+    {{- include "azimuth-omni-backend.backend-labels" $labelArgs | nindent 4 }}
+  {{- with $cfg.api.ingress.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  {{- with $cfg.api.ingress.className }}
+  ingressClassName: {{ . }}
+  {{- end }}
+  {{- if $cfg.api.ingress.tls }}
+  tls:
+    {{- range $cfg.api.ingress.tls }}
+    - hosts:
+        {{- range .hosts }}
+        - {{ . | quote }}
+        {{- end }}
+      secretName: {{ .secretName }}
+    {{- end }}
+  {{- end }}
+  rules:
+    {{- range $cfg.api.ingress.hosts }}
+    - host: {{ .host | quote }}
+      http:
+        paths:
+          {{- range .paths }}
+          - path: {{ .path }}
+            {{- with .pathType }}
+            pathType: {{ . }}
+            {{- end }}
+            backend:
+              service:
+                name: {{ $name }}
+                port:
+                  number: 80
+          {{- end }}
+    {{- end }}
+{{- end }}
+{{- if $cfg.api.pdb.enabled }}
+---
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+  name: {{ $name }}
+  labels:
+    {{- include "azimuth-omni-backend.backend-labels" $labelArgs | nindent 4 }}
+spec:
+  {{- with $cfg.api.pdb.minAvailable }}
+  minAvailable: {{ . }}
+  {{- end }}
+  {{- with $cfg.api.pdb.maxUnavailable }}
+  maxUnavailable: {{ . }}
+  {{- end }}
+  selector:
+    matchLabels:
+      {{- include "azimuth-omni-backend.backend-selectorLabels" $labelArgs | nindent 6 }}
+{{- end }}
+{{- end }}
+{{- end }}
diff --git a/charts/azimuth-omni-backend/templates/_helpers.tpl b/charts/azimuth-omni-backend/templates/_helpers.tpl
new file mode 100644
index 00000000..ee78b750
--- /dev/null
+++ b/charts/azimuth-omni-backend/templates/_helpers.tpl
@@ -0,0 +1,14 @@
+{{/*
+Chart name. Hardcoded rather than derived from .Chart.Name so labels stay
+stable when this chart is pulled in as an aliased subchart.
+*/}}
+{{- define "azimuth-omni-backend.name" -}}
+{{- default "azimuth-llm-omni-backend" .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Chart label.
+*/}}
+{{- define "azimuth-omni-backend.chart" -}}
+{{- printf "%s-%s" (include "azimuth-omni-backend.name" .) .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
diff --git a/charts/azimuth-omni-backend/templates/backend-tti.yml b/charts/azimuth-omni-backend/templates/backend-tti.yml
new file mode 100644
index 00000000..6264bb0e
--- /dev/null
+++ b/charts/azimuth-omni-backend/templates/backend-tti.yml
@@ -0,0 +1 @@
+{{- include "azimuth-omni-backend.backend" (dict "root" . "alias" "tti") }}
diff --git a/charts/azimuth-omni-backend/templates/backend-tts.yml b/charts/azimuth-omni-backend/templates/backend-tts.yml
new file mode 100644
index 00000000..f5e91106
--- /dev/null
+++ b/charts/azimuth-omni-backend/templates/backend-tts.yml
@@ -0,0 +1 @@
+{{- include "azimuth-omni-backend.backend" (dict "root" . "alias" "tts") }}
diff --git a/charts/azimuth-omni-backend/templates/backend-ttt.yml b/charts/azimuth-omni-backend/templates/backend-ttt.yml
new file mode 100644
index 00000000..d202ae55
--- /dev/null
+++ b/charts/azimuth-omni-backend/templates/backend-ttt.yml
@@ -0,0 +1 @@
+{{- include "azimuth-omni-backend.backend" (dict "root" . "alias" "ttt") }}
diff --git a/charts/azimuth-omni-backend/values.schema.json b/charts/azimuth-omni-backend/values.schema.json
new file mode 100644
index 00000000..1fda2170
--- /dev/null
+++ b/charts/azimuth-omni-backend/values.schema.json
@@ -0,0 +1,88 @@
+{
+    "$schema": "http://json-schema.org/draft-07/schema#",
+    "type": "object",
+    "definitions": {
+        "backend": {
+            "type": "object",
+            "properties": {
+                "enabled": {
+                    "type": "boolean",
+                    "title": "Deploy in-cluster backend",
+                    "description": "If true, a vLLM Deployment + Service is created to serve this modality. Disable to omit this backend (e.g. to point the omni UI at an external, off-cluster backend).",
+                    "default": false
+                },
+                "huggingface": {
+                    "type": "object",
+                    "properties": {
+                        "model": {
+                            "type": "string",
+                            "title": "Model",
+                            "description": "The [HuggingFace model](https://huggingface.co/models) to deploy for this modality."
+                        },
+                        "token": {
+                            "type": ["string", "null"],
+                            "title": "Access Token",
+                            "description": "A HuggingFace [access token](https://huggingface.co/docs/hub/security-tokens). Required for [gated models](https://huggingface.co/docs/hub/en/models-gated)."
+                        }
+                    }
+                },
+                "api": {
+                    "type": "object",
+                    "properties": {
+                        "image": {
+                            "type": "object",
+                            "properties": {
+                                "containerImage": {
+                                    "type": "string",
+                                    "title": "vLLM Container Image",
+                                    "description": "Container to use as API backend. 'vllm/vllm-openai' for text/vision/image models, 'vllm/vllm-omni' for TTS/multimodal-omni models.",
+                                    "default": "vllm/vllm-openai",
+                                    "enum": [
+                                        "vllm/vllm-openai",
+                                        "vllm/vllm-omni"
+                                    ]
+                                },
+                                "version": {
+                                    "type": "string",
+                                    "title": "Backend vLLM version",
+                                    "description": "Tag from https://github.com/vllm-project/vllm/tags (or vllm-omni/tags).",
+                                    "default": "v0.11.0"
+                                }
+                            }
+                        },
+                        "gpus": {
+                            "type": "integer",
+                            "title": "GPUs",
+                            "description": "Number of GPUs to request per backend pod. Set to 0 to fall back to a CPU vLLM image (testing only).",
+                            "minimum": 0,
+                            "default": 1
+                        },
+                        "modelMaxContextLength": {
+                            "title": "Model Context Length",
+                            "description": "Override the maximum context length, if the model's default is unsuitable."
+                        },
+                        "azimuthNodeGroupSelector": {
+                            "type": "string",
+                            "title": "Node Group",
+                            "description": "(Optional) Require that the model runs on a particular node group."
+                        }
+                    }
+                }
+            }
+        }
+    },
+    "properties": {
+        "ttt": {
+            "title": "Text-to-Text backend (chat)",
+            "allOf": [{ "$ref": "#/definitions/backend" }]
+        },
+        "tts": {
+            "title": "Text-to-Speech backend",
+            "allOf": [{ "$ref": "#/definitions/backend" }]
+        },
+        "tti": {
+            "title": "Text-to-Image backend",
+            "allOf": [{ "$ref": "#/definitions/backend" }]
+        }
+    }
+}
diff --git a/charts/azimuth-omni-backend/values.yaml b/charts/azimuth-omni-backend/values.yaml
new file mode 100644
index 00000000..e4f72b3b
--- /dev/null
+++ b/charts/azimuth-omni-backend/values.yaml
@@ -0,0 +1,239 @@
+# In-cluster vLLM backends for the Omni interface. Each modality (ttt / tts /
+# tti) is its own vLLM Deployment + Service. Enable only the ones you need.
+
+# Text-to-text backend (chat / multimodal understanding).
+ttt:
+  enabled: true
+  huggingface:
+    model: Qwen/Qwen2.5-7B-Instruct
+    # Jinja chat template passed to vLLM via --chat-template.
+    chatTemplate:
+    # Pre-existing secret containing HUGGING_FACE_HUB_TOKEN.
+    secretName:
+    # Or inline the token here (testing only):
+    token:
+  api:
+    replicas: 1
+    image:
+      containerImage: vllm/vllm-openai
+      version: v0.11.0
+    monitoring:
+      enabled: true
+    service:
+      type: ClusterIP
+      zenith:
+        enabled: false
+        skipAuth: false
+        label: TTT (chat) backend
+        iconUrl: https://raw.githubusercontent.com/vllm-project/vllm/v0.2.7/docs/source/assets/logos/vllm-logo-only-light.png
+        description: |
+          The OpenAI-compatible API for the TTT (chat) backend.
+    # Paths default to the endpoints this modality serves so the backends can
+    # share an ingress host. Override paths/rules for broader matches like /v1.
+    ingress:
+      enabled: false
+      className: ""
+      annotations: {}
+      hosts:
+        - host: chart-example.local
+          paths:
+            - path: /v1/chat
+              pathType: Prefix
+            - path: /v1/completions
+              pathType: Prefix
+            - path: /v1/models
+              pathType: Prefix
+            - path: /health
+              pathType: Prefix
+      tls: []
+    httpRoute:
+      enabled: false
+      annotations: {}
+      parentRefs:
+        - name: my-gateway
+          namespace: default
+          sectionName: https
+      hostnames:
+        - chart-example.local
+      rules:
+        - matches:
+            - path:
+                type: PathPrefix
+                value: /v1/chat
+            - path:
+                type: PathPrefix
+                value: /v1/completions
+            - path:
+                type: PathPrefix
+                value: /v1/models
+            - path:
+                type: PathPrefix
+                value: /health
+    cacheVolume:
+      hostPath:
+        path: /tmp/llm/huggingface-cache
+    gpus: 1
+    intelXPUsEnabled: false
+    updateStrategy:
+      type: Recreate
+    modelMaxContextLength:
+    extraArgs: []
+    extraEnv: []
+    azimuthNodeGroupSelector: ""
+    nodeSelector: {}
+    tolerations: []
+    affinity: {}
+    pdb:
+      enabled: false
+
+# Text-to-speech backend.
+tts:
+  enabled: false
+  huggingface:
+    model: mistralai/Voxtral-4B-TTS-2603
+    chatTemplate:
+    secretName:
+    token:
+  api:
+    replicas: 1
+    image:
+      containerImage: vllm/vllm-omni
+      version: v0.11.0
+    monitoring:
+      enabled: true
+    service:
+      type: ClusterIP
+      zenith:
+        enabled: false
+        skipAuth: false
+        label: TTS backend
+        iconUrl: https://raw.githubusercontent.com/vllm-project/vllm/v0.2.7/docs/source/assets/logos/vllm-logo-only-light.png
+        description: |
+          The OpenAI-compatible API for the TTS backend.
+    # Paths default to the endpoints this modality serves. Override to broaden.
+    ingress:
+      enabled: false
+      className: ""
+      annotations: {}
+      hosts:
+        - host: chart-example.local
+          paths:
+            - path: /v1/audio
+              pathType: Prefix
+            - path: /v1/models
+              pathType: Prefix
+            - path: /health
+              pathType: Prefix
+      tls: []
+    httpRoute:
+      enabled: false
+      annotations: {}
+      parentRefs:
+        - name: my-gateway
+          namespace: default
+          sectionName: https
+      hostnames:
+        - chart-example.local
+      rules:
+        - matches:
+            - path:
+                type: PathPrefix
+                value: /v1/audio
+            - path:
+                type: PathPrefix
+                value: /v1/models
+            - path:
+                type: PathPrefix
+                value: /health
+    cacheVolume:
+      hostPath:
+        path: /tmp/llm/huggingface-cache
+    gpus: 1
+    intelXPUsEnabled: false
+    updateStrategy:
+      type: Recreate
+    modelMaxContextLength:
+    extraArgs: []
+    extraEnv: []
+    azimuthNodeGroupSelector: ""
+    nodeSelector: {}
+    tolerations: []
+    affinity: {}
+    pdb:
+      enabled: false
+
+# Text-to-image backend.
+tti:
+  enabled: false
+  huggingface:
+    model: Qwen/Qwen-Image
+    chatTemplate:
+    secretName:
+    token:
+  api:
+    replicas: 1
+    image:
+      containerImage: vllm/vllm-openai
+      version: v0.11.0
+    monitoring:
+      enabled: true
+    service:
+      type: ClusterIP
+      zenith:
+        enabled: false
+        skipAuth: false
+        label: TTI backend
+        iconUrl: https://raw.githubusercontent.com/vllm-project/vllm/v0.2.7/docs/source/assets/logos/vllm-logo-only-light.png
+        description: |
+          The OpenAI-compatible API for the TTI backend.
+    # Paths default to the endpoints this modality serves. Override to broaden.
+    ingress:
+      enabled: false
+      className: ""
+      annotations: {}
+      hosts:
+        - host: chart-example.local
+          paths:
+            - path: /v1/images
+              pathType: Prefix
+            - path: /v1/models
+              pathType: Prefix
+            - path: /health
+              pathType: Prefix
+      tls: []
+    httpRoute:
+      enabled: false
+      annotations: {}
+      parentRefs:
+        - name: my-gateway
+          namespace: default
+          sectionName: https
+      hostnames:
+        - chart-example.local
+      rules:
+        - matches:
+            - path:
+                type: PathPrefix
+                value: /v1/images
+            - path:
+                type: PathPrefix
+                value: /v1/models
+            - path:
+                type: PathPrefix
+                value: /health
+    cacheVolume:
+      hostPath:
+        path: /tmp/llm/huggingface-cache
+    gpus: 1
+    intelXPUsEnabled: false
+    updateStrategy:
+      type: Recreate
+    modelMaxContextLength:
+    extraArgs: []
+    extraEnv: []
+    azimuthNodeGroupSelector: ""
+    nodeSelector: {}
+    tolerations: []
+    affinity: {}
+    pdb:
+      enabled: false
diff --git a/charts/azimuth-omni/.helmignore b/charts/azimuth-omni/.helmignore
new file mode 100644
index 00000000..1924f397
--- /dev/null
+++ b/charts/azimuth-omni/.helmignore
@@ -0,0 +1,33 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
+
+# Others
+README.md
+Dockerfile
+*kubeconfig.y[a]ml
+venv/
+__pycache__/
+images/
+*.secret
+ci/
diff --git a/charts/azimuth-omni/Chart.yaml b/charts/azimuth-omni/Chart.yaml
new file mode 100644
index 00000000..0ba14cfa
--- /dev/null
+++ b/charts/azimuth-omni/Chart.yaml
@@ -0,0 +1,29 @@
+apiVersion: v2
+name: azimuth-llm-omni
+description: |
+  Omni multimodal web interface (chat, text-to-speech, text-to-image)
+  with optional in-cluster vLLM backends for each modality.
+maintainers:
+  - name: "Victor HANG"
+    email: victor@stackhpc.com
+
+type: application
+
+# The version and appVersion are updated by the chart build script
+version: 0.1.0
+appVersion: local
+
+icon: https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.svg
+
+annotations:
+  azimuth.stackhpc.com/label: HuggingFace Omni
+
+dependencies:
+  # The in-cluster vLLM backends (ttt / tts / tti) live in a separate chart and
+  # are pulled in here under the `backend` alias. Disable the whole subchart
+  # (backend.enabled=false) to run the UI against external backends only.
+  - name: azimuth-llm-omni-backend
+    alias: backend
+    version: ">=0-0"
+    repository: "file://../azimuth-omni-backend/"
+    condition: backend.enabled
diff --git a/charts/azimuth-omni/azimuth-ui.schema.yaml b/charts/azimuth-omni/azimuth-ui.schema.yaml
new file mode 100644
index 00000000..85f3fe26
--- /dev/null
+++ b/charts/azimuth-omni/azimuth-ui.schema.yaml
@@ -0,0 +1,87 @@
+controls:
+  /backend/ttt/enabled:
+    type: SwitchControl
+  /backend/ttt/huggingface/model:
+    type: TextControl
+  /backend/ttt/huggingface/token:
+    type: TextControl
+    secret: true
+  /backend/ttt/api/modelMaxContextLength:
+    type: IntegerControl
+    minimum: 100
+    required: false
+
+  /backend/tts/enabled:
+    type: SwitchControl
+  /backend/tts/huggingface/model:
+    type: TextControl
+  /backend/tts/huggingface/token:
+    type: TextControl
+    secret: true
+  /backend/tts/api/modelMaxContextLength:
+    type: IntegerControl
+    minimum: 100
+    required: false
+
+  /backend/tti/enabled:
+    type: SwitchControl
+  /backend/tti/huggingface/model:
+    type: TextControl
+  /backend/tti/huggingface/token:
+    type: TextControl
+    secret: true
+  /backend/tti/api/modelMaxContextLength:
+    type: IntegerControl
+    minimum: 100
+    required: false
+
+  # When a backend is in-cluster the omni UI auto-derives model_name from
+  # the matching huggingface.model in the configmap template; mirror it in
+  # the UI for visibility.
+  /ui/appSettings/ttt/model_name:
+    type: MirrorControl
+    path: /backend/ttt/huggingface/model
+    visuallyHidden: true
+  /ui/appSettings/tts/model_name:
+    type: MirrorControl
+    path: /backend/tts/huggingface/model
+    visuallyHidden: true
+  /ui/appSettings/tti/model_name:
+    type: MirrorControl
+    path: /backend/tti/huggingface/model
+    visuallyHidden: true
+
+sortOrder:
+  - /ui/appSettings/page_title
+  - /ui/appSettings/page_description
+
+  - /backend/ttt/enabled
+  - /backend/ttt/huggingface/model
+  - /backend/ttt/huggingface/token
+  - /backend/ttt/api/azimuthNodeGroupSelector
+  - /backend/ttt/api/image/containerImage
+  - /backend/ttt/api/image/version
+  - /backend/ttt/api/gpus
+  - /backend/ttt/api/modelMaxContextLength
+  - /ui/appSettings/ttt/system_prompt
+  - /ui/appSettings/ttt/backend_url
+
+  - /backend/tts/enabled
+  - /backend/tts/huggingface/model
+  - /backend/tts/huggingface/token
+  - /backend/tts/api/azimuthNodeGroupSelector
+  - /backend/tts/api/image/containerImage
+  - /backend/tts/api/image/version
+  - /backend/tts/api/gpus
+  - /backend/tts/api/modelMaxContextLength
+  - /ui/appSettings/tts/backend_url
+
+  - /backend/tti/enabled
+  - /backend/tti/huggingface/model
+  - /backend/tti/huggingface/token
+  - /backend/tti/api/azimuthNodeGroupSelector
+  - /backend/tti/api/image/containerImage
+  - /backend/tti/api/image/version
+  - /backend/tti/api/gpus
+  - /backend/tti/api/modelMaxContextLength
+  - /ui/appSettings/tti/backend_url
diff --git a/charts/azimuth-omni/ci/test-values.yaml b/charts/azimuth-omni/ci/test-values.yaml
new file mode 100644
index 00000000..ab582e1a
--- /dev/null
+++ b/charts/azimuth-omni/ci/test-values.yaml
@@ -0,0 +1,26 @@
+# CI: only exercise the TTT backend with the smallest possible model.
+backend:
+  enabled: true
+  ttt:
+    enabled: true
+    huggingface:
+      model: HuggingFaceTB/SmolLM2-135M-Instruct
+    api:
+      monitoring:
+        enabled: false
+      gpus: 0
+  tts:
+    enabled: false
+  tti:
+    enabled: false
+ui:
+  service:
+    zenith:
+      enabled: false
+  appSettings:
+    # model_name is auto-derived from backend.ttt.huggingface.model
+    ttt:
+      params:
+        max_tokens: 32
+        temperature: 0.1
+        top_p: 0.15
diff --git a/charts/azimuth-omni/templates/NOTES.txt b/charts/azimuth-omni/templates/NOTES.txt
new file mode 100644
index 00000000..b4cb98d2
--- /dev/null
+++ b/charts/azimuth-omni/templates/NOTES.txt
@@ -0,0 +1,23 @@
+Azimuth Omni provides a single multimodal web interface (chat, text-to-speech, text-to-image) backed by one or more vLLM model deployments.
+
+{{- if .Values.backend.enabled }}
+In-cluster backends (from the azimuth-llm-omni-backend subchart) in this release:
+{{- range $alias := list "ttt" "tts" "tti" }}
+{{- $sub := index $.Values.backend $alias }}
+{{- if and $sub $sub.enabled }}
+  - {{ $alias }}: {{ $sub.huggingface.model }} (in-cluster at http://{{ $.Release.Name }}-{{ $alias }}.{{ $.Release.Namespace }}.svc)
+{{- else }}
+  - {{ $alias }}: disabled (toggle with `backend.{{ $alias }}.enabled=true` or point `ui.appSettings.{{ $alias }}.backend_url` at an external backend)
+{{- end }}
+{{- end }}
+
+Each enabled backend downloads its model weights from HuggingFace on first start, which can take a while.
+{{- else }}
+In-cluster backends are disabled (`backend.enabled=false`). The UI will only
+expose tabs for modalities whose `ui.appSettings.<alias>.backend_url` +
+`model_name` point at an external backend.
+{{- end }}
+
+If `ui.service.zenith.enabled` is true the omni UI is exposed through Zenith; otherwise enable `ui.ingress` or `ui.httpRoute` to expose it via standard Kubernetes networking.
+
+Release notes: https://github.com/stackhpc/azimuth-llm/releases
diff --git a/charts/azimuth-omni/templates/_helpers.tpl b/charts/azimuth-omni/templates/_helpers.tpl
new file mode 100644
index 00000000..be2e1b28
--- /dev/null
+++ b/charts/azimuth-omni/templates/_helpers.tpl
@@ -0,0 +1,58 @@
+{{/*
+Chart name.
+*/}}
+{{- define "azimuth-omni.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Fully qualified app name.
+*/}}
+{{- define "azimuth-omni.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Chart label.
+*/}}
+{{- define "azimuth-omni.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels.
+*/}}
+{{- define "azimuth-omni.labels" -}}
+helm.sh/chart: {{ include "azimuth-omni.chart" . }}
+{{ include "azimuth-omni.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+App selector labels.
+*/}}
+{{- define "azimuth-omni.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "azimuth-omni.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+UI component selector labels.
+*/}}
+{{- define "azimuth-omni.ui-selectorLabels" -}}
+{{ include "azimuth-omni.selectorLabels" . }}
+app.kubernetes.io/component: ui
+{{- end }}
+
diff --git a/charts/azimuth-omni/templates/app-config-map.yml b/charts/azimuth-omni/templates/app-config-map.yml
new file mode 100644
index 00000000..0c1155d5
--- /dev/null
+++ b/charts/azimuth-omni/templates/app-config-map.yml
@@ -0,0 +1,35 @@
+{{- if .Values.ui.enabled -}}
+{{/*
+Build the omni overrides.yml. For each modality (ttt/tts/tti): if its backend
+is enabled, inject backend_url + model_name (user-supplied values win). If the
+backend is disabled but the user gave a backend_url + model_name, keep that
+block. Otherwise drop the modality so its tab does not appear.
+*/}}
+{{- $appSettings := deepCopy .Values.ui.appSettings -}}
+{{- $backend := default (dict) .Values.backend -}}
+{{- $backendEnabled := and $backend (ne $backend.enabled false) -}}
+{{- range $alias := list "ttt" "tts" "tti" }}
+  {{- $cfg := index $backend $alias -}}
+  {{- $userBlock := default (dict) (index $appSettings $alias) -}}
+  {{- if and $backendEnabled $cfg $cfg.enabled }}
+    {{- $defaults := dict
+        "backend_url" (printf "http://%s-%s.%s.svc" $.Release.Name $alias $.Release.Namespace)
+        "model_name"  $cfg.huggingface.model -}}
+    {{- $merged := merge $userBlock $defaults -}}
+    {{- $_ := set $appSettings $alias $merged -}}
+  {{- else }}
+    {{- if not (and (hasKey $userBlock "backend_url") (hasKey $userBlock "model_name")) }}
+      {{- $_ := unset $appSettings $alias -}}
+    {{- end }}
+  {{- end }}
+{{- end }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ .Release.Name }}-omni-web-app
+  labels:
+    {{- include "azimuth-omni.labels" . | nindent 4 }}
+data:
+  overrides.yml: |
+    {{- $appSettings | toYaml | nindent 4 }}
+{{- end -}}
diff --git a/charts/azimuth-omni/templates/deployment.yml b/charts/azimuth-omni/templates/deployment.yml
new file mode 100644
index 00000000..8e9feb12
--- /dev/null
+++ b/charts/azimuth-omni/templates/deployment.yml
@@ -0,0 +1,62 @@
+{{- if .Values.ui.enabled -}}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ .Release.Name }}-omni-ui
+  labels:
+    {{- include "azimuth-omni.labels" . | nindent 4 }}
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      {{- include "azimuth-omni.ui-selectorLabels" . | nindent 6 }}
+  strategy:
+    {{- .Values.ui.updateStrategy | toYaml | nindent 4 }}
+  template:
+    metadata:
+      labels:
+        {{- include "azimuth-omni.ui-selectorLabels" . | nindent 8 }}
+      # Restart deployment when settings config map changes
+      # https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
+      annotations:
+        checksum/config: {{ include (print $.Template.BasePath "/app-config-map.yml") . | sha256sum }}
+    spec:
+      containers:
+      - name: omni-ui
+        {{- with .Values.ui.image }}
+        image: {{ printf "%s:%s" .repository (default $.Chart.AppVersion .tag) }}
+        {{- if .imagePullPolicy }}
+        imagePullPolicy: {{ .imagePullPolicy }}
+        {{- end -}}
+        {{- end }}
+        ports:
+        - name: ui
+          containerPort: 7860
+        volumeMounts:
+        - name: app
+          mountPath: /etc/web-app
+        env:
+        - name: PYTHONUNBUFFERED
+          value: "1"
+        tty: true # Make stdout from python visible in k8s logs
+        readinessProbe:
+          tcpSocket:
+            port: 7860
+          periodSeconds: 5
+      volumes:
+        - name: app
+          configMap:
+            name: {{ .Release.Name }}-omni-web-app
+      {{- with $.Values.ui.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with $.Values.ui.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with $.Values.ui.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+{{- end -}}
diff --git a/charts/azimuth-omni/templates/httproute.yml b/charts/azimuth-omni/templates/httproute.yml
new file mode 100644
index 00000000..26ac1a1c
--- /dev/null
+++ b/charts/azimuth-omni/templates/httproute.yml
@@ -0,0 +1,29 @@
+{{- if and .Values.ui.enabled .Values.ui.httpRoute.enabled -}}
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: {{ printf "%s-omni-ui" .Release.Name }}
+  labels:
+    {{- include "azimuth-omni.labels" . | nindent 4 }}
+  {{- with .Values.ui.httpRoute.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  {{- with .Values.ui.httpRoute.parentRefs }}
+  parentRefs:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+  {{- with .Values.ui.httpRoute.hostnames }}
+  hostnames:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+  rules:
+    {{- range .Values.ui.httpRoute.rules }}
+    - {{ with .matches }}matches:
+        {{- toYaml . | nindent 8 }}
+      {{ end }}backendRefs:
+        - name: {{ $.Release.Name }}-omni-ui
+          port: 80
+    {{- end }}
+{{- end }}
diff --git a/charts/azimuth-omni/templates/ingress.yml b/charts/azimuth-omni/templates/ingress.yml
new file mode 100644
index 00000000..62fe9529
--- /dev/null
+++ b/charts/azimuth-omni/templates/ingress.yml
@@ -0,0 +1,43 @@
+{{- if and .Values.ui.enabled .Values.ui.ingress.enabled -}}
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: {{ printf "%s-omni-ui" .Release.Name }}
+  labels:
+    {{- include "azimuth-omni.labels" . | nindent 4 }}
+  {{- with .Values.ui.ingress.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  {{- with .Values.ui.ingress.className }}
+  ingressClassName: {{ . }}
+  {{- end }}
+  {{- if .Values.ui.ingress.tls }}
+  tls:
+    {{- range .Values.ui.ingress.tls }}
+    - hosts:
+        {{- range .hosts }}
+        - {{ . | quote }}
+        {{- end }}
+      secretName: {{ .secretName }}
+    {{- end }}
+  {{- end }}
+  rules:
+    {{- range .Values.ui.ingress.hosts }}
+    - host: {{ .host | quote }}
+      http:
+        paths:
+          {{- range .paths }}
+          - path: {{ .path }}
+            {{- with .pathType }}
+            pathType: {{ . }}
+            {{- end }}
+            backend:
+              service:
+                name: {{ $.Release.Name }}-omni-ui
+                port:
+                  number: 80
+          {{- end }}
+    {{- end }}
+{{- end }}
diff --git a/charts/azimuth-omni/templates/pod-disruption-budget.yml b/charts/azimuth-omni/templates/pod-disruption-budget.yml
new file mode 100644
index 00000000..59d7b520
--- /dev/null
+++ b/charts/azimuth-omni/templates/pod-disruption-budget.yml
@@ -0,0 +1,18 @@
+{{- if and .Values.ui.enabled .Values.ui.pdb.enabled }}
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+  name: {{ .Release.Name }}-omni-ui
+  labels:
+    {{- include "azimuth-omni.labels" . | nindent 4 }}
+spec:
+  {{- with .Values.ui.pdb.minAvailable }}
+  minAvailable: {{ . }}
+  {{- end }}
+  {{- with .Values.ui.pdb.maxUnavailable }}
+  maxUnavailable: {{ . }}
+  {{- end }}
+  selector:
+    matchLabels:
+      {{- include "azimuth-omni.ui-selectorLabels" . | nindent 6 }}
+{{- end }}
diff --git a/charts/azimuth-omni/templates/service.yml b/charts/azimuth-omni/templates/service.yml
new file mode 100644
index 00000000..4ff01615
--- /dev/null
+++ b/charts/azimuth-omni/templates/service.yml
@@ -0,0 +1,17 @@
+{{- if .Values.ui.enabled -}}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ .Release.Name }}-omni-ui
+  labels:
+    {{- include "azimuth-omni.labels" . | nindent 4 }}
+spec:
+  ports:
+  - name: ui
+    port: 80
+    protocol: TCP
+    targetPort: ui
+  type: {{ .Values.ui.service.type }}
+  selector:
+    {{- include "azimuth-omni.ui-selectorLabels" . | nindent 4 }}
+{{- end -}}
diff --git a/charts/azimuth-omni/templates/ui-zenith-client.yml b/charts/azimuth-omni/templates/ui-zenith-client.yml
new file mode 100644
index 00000000..0f8acca7
--- /dev/null
+++ b/charts/azimuth-omni/templates/ui-zenith-client.yml
@@ -0,0 +1,16 @@
+{{- if .Values.ui.enabled -}}
+{{- if .Values.ui.service.zenith.enabled -}}
+apiVersion: zenith.stackhpc.com/v1alpha1
+kind: Client
+metadata:
+  name: {{ .Release.Name }}-omni-ui
+  labels:
+    {{- include "azimuth-omni.labels" . | nindent 4 }}
+spec:
+  reservationName: {{ .Release.Name }}-omni-ui
+  upstream:
+    serviceName: {{ .Release.Name }}-omni-ui
+  auth:
+    skip: {{ .Values.ui.service.zenith.skipAuth }}
+{{- end -}}
+{{- end -}}
diff --git a/charts/azimuth-omni/templates/ui-zenith-reservation.yml b/charts/azimuth-omni/templates/ui-zenith-reservation.yml
new file mode 100644
index 00000000..1ff7e2b3
--- /dev/null
+++ b/charts/azimuth-omni/templates/ui-zenith-reservation.yml
@@ -0,0 +1,18 @@
+{{- if .Values.ui.enabled -}}
+{{- if .Values.ui.service.zenith.enabled -}}
+apiVersion: zenith.stackhpc.com/v1alpha1
+kind: Reservation
+metadata:
+  name: {{ .Release.Name }}-omni-ui
+  labels:
+    {{- include "azimuth-omni.labels" . | nindent 4 }}
+  annotations:
+    azimuth.stackhpc.com/service-label: {{ quote .Values.ui.service.zenith.label }}
+    azimuth.stackhpc.com/service-icon-url: {{ .Values.ui.service.zenith.iconUrl }}
+    {{- with .Values.ui.service.zenith.description }}
+    azimuth.stackhpc.com/service-description: {{ quote . }}
+    {{- end }}
+spec:
+  credentialSecretName: {{ .Release.Name }}-omni-ui-zenith-credential
+{{- end -}}
+{{- end -}}
diff --git a/charts/azimuth-omni/values.schema.json b/charts/azimuth-omni/values.schema.json
new file mode 100644
index 00000000..8d15bb77
--- /dev/null
+++ b/charts/azimuth-omni/values.schema.json
@@ -0,0 +1,174 @@
+{
+    "$schema": "http://json-schema.org/draft-07/schema#",
+    "type": "object",
+    "definitions": {
+        "backend": {
+            "type": "object",
+            "properties": {
+                "enabled": {
+                    "type": "boolean",
+                    "title": "Deploy in-cluster backend",
+                    "description": "If true, a vLLM Deployment + Service is created to serve this modality. Disable to omit the tab or to point the omni UI at an external (off-cluster) backend.",
+                    "default": false
+                },
+                "huggingface": {
+                    "type": "object",
+                    "properties": {
+                        "model": {
+                            "type": "string",
+                            "title": "Model",
+                            "description": "The [HuggingFace model](https://huggingface.co/models) to deploy for this modality."
+                        },
+                        "token": {
+                            "type": ["string", "null"],
+                            "title": "Access Token",
+                            "description": "A HuggingFace [access token](https://huggingface.co/docs/hub/security-tokens). Required for [gated models](https://huggingface.co/docs/hub/en/models-gated)."
+                        }
+                    }
+                },
+                "api": {
+                    "type": "object",
+                    "properties": {
+                        "image": {
+                            "type": "object",
+                            "properties": {
+                                "containerImage": {
+                                    "type": "string",
+                                    "title": "vLLM Container Image",
+                                    "description": "Container to use as API backend. 'vllm/vllm-openai' for text/vision/image models, 'vllm/vllm-omni' for TTS/multimodal-omni models.",
+                                    "default": "vllm/vllm-openai",
+                                    "enum": [
+                                        "vllm/vllm-openai",
+                                        "vllm/vllm-omni"
+                                    ]
+                                },
+                                "version": {
+                                    "type": "string",
+                                    "title": "Backend vLLM version",
+                                    "description": "Tag from https://github.com/vllm-project/vllm/tags (or vllm-omni/tags).",
+                                    "default": "v0.11.0"
+                                }
+                            }
+                        },
+                        "gpus": {
+                            "type": "integer",
+                            "title": "GPUs",
+                            "description": "Number of GPUs to request per backend pod. Set to 0 to fall back to a CPU vLLM image (testing only).",
+                            "minimum": 0,
+                            "default": 1
+                        },
+                        "modelMaxContextLength": {
+                            "title": "Model Context Length",
+                            "description": "Override the maximum context length, if the model's default is unsuitable."
+                        },
+                        "azimuthNodeGroupSelector": {
+                            "type": "string",
+                            "title": "Node Group",
+                            "description": "(Optional) Require that the model runs on a particular node group."
+                        }
+                    }
+                }
+            }
+        }
+    },
+    "properties": {
+        "backend": {
+            "type": "object",
+            "title": "In-cluster vLLM backends",
+            "description": "Settings forwarded to the azimuth-llm-omni-backend subchart.",
+            "properties": {
+                "enabled": {
+                    "type": "boolean",
+                    "title": "Deploy in-cluster backends",
+                    "description": "If false, no in-cluster backends are deployed and the UI must target external backends.",
+                    "default": true
+                },
+                "ttt": {
+                    "title": "Text-to-Text backend (chat)",
+                    "allOf": [{ "$ref": "#/definitions/backend" }]
+                },
+                "tts": {
+                    "title": "Text-to-Speech backend",
+                    "allOf": [{ "$ref": "#/definitions/backend" }]
+                },
+                "tti": {
+                    "title": "Text-to-Image backend",
+                    "allOf": [{ "$ref": "#/definitions/backend" }]
+                }
+            }
+        },
+        "ui": {
+            "type": "object",
+            "properties": {
+                "appSettings": {
+                    "type": "object",
+                    "properties": {
+                        "page_title": {
+                            "type": "string",
+                            "title": "Page Title",
+                            "description": "The title shown at the top of the omni interface.",
+                            "default": "Omni Interface"
+                        },
+                        "page_description": {
+                            "type": "string",
+                            "title": "Page Description",
+                            "description": "Subtitle markdown shown under the page title.",
+                            "default": "A unified interface for multimodal AI."
+                        },
+                        "ttt": {
+                            "type": "object",
+                            "title": "Chat (TTT) UI settings",
+                            "properties": {
+                                "model_name": {
+                                    "type": "string",
+                                    "title": "Model name",
+                                    "description": "Model identifier sent to the chat backend. Mirrors backend.ttt.huggingface.model when ttt is in-cluster."
+                                },
+                                "system_prompt": {
+                                    "type": "string",
+                                    "title": "System prompt",
+                                    "description": "Initial system message. `{date}` is substituted at request time."
+                                },
+                                "backend_url": {
+                                    "type": "string",
+                                    "title": "External backend URL",
+                                    "description": "Only needed when backend.ttt.enabled is false. OpenAI-compatible base URL without /v1."
+                                }
+                            }
+                        },
+                        "tts": {
+                            "type": "object",
+                            "title": "TTS UI settings",
+                            "properties": {
+                                "model_name": {
+                                    "type": "string",
+                                    "title": "Model name"
+                                },
+                                "backend_url": {
+                                    "type": "string",
+                                    "title": "External backend URL",
+                                    "description": "Only needed when backend.tts.enabled is false."
+                                }
+                            }
+                        },
+                        "tti": {
+                            "type": "object",
+                            "title": "Image (TTI) UI settings",
+                            "properties": {
+                                "model_name": {
+                                    "type": "string",
+                                    "title": "Model name"
+                                },
+                                "backend_url": {
+                                    "type": "string",
+                                    "title": "External backend URL",
+                                    "description": "Only needed when backend.tti.enabled is false."
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/charts/azimuth-omni/values.yaml b/charts/azimuth-omni/values.yaml
new file mode 100644
index 00000000..6097038f
--- /dev/null
+++ b/charts/azimuth-omni/values.yaml
@@ -0,0 +1,108 @@
+# In-cluster vLLM backends, provided by the azimuth-llm-omni-backend subchart
+# aliased here under `backend`. Values set here pass through to that subchart
+# (see charts/azimuth-omni-backend/values.yaml for per-modality options).
+# Set backend.enabled: false to use external backends via ui.appSettings.<alias>.
+backend:
+  enabled: true
+  # Text-to-text backend (chat / multimodal understanding).
+  ttt:
+    enabled: true
+  # Text-to-speech backend.
+  tts:
+    enabled: false
+  # Text-to-image backend.
+  tti:
+    enabled: false
+
+# Omni UI - the web app that talks to the enabled backends above (or to
+# external URLs you provide).
+ui:
+  enabled: true
+  image:
+    repository: ghcr.io/stackhpc/azimuth-llm-omni-ui
+    # Defaults to chart's appVersion
+    tag:
+    imagePullPolicy:
+
+  # Settings written to /etc/web-app/overrides.yml inside the UI container.
+  # Format matches web-apps/omni/defaults.yml. For each enabled backend the
+  # chart fills in backend_url and model_name, so you only need to set them
+  # when pointing at an external backend.
+  appSettings:
+    page_title: Omni Interface
+    page_description: A unified interface for multimodal AI.
+    # Use local system fonts by default to avoid GDPR issues with Gradio's
+    # default fonts (which require fetching from the Google fonts API).
+    theme_params:
+      font:
+        - sans-serif
+        - Arial
+      font_mono:
+        - sans-serif
+        - Arial
+
+    # Per-backend UI/inference config. To point a tab at an out-of-cluster
+    # backend, disable backend.<alias>.enabled and set backend_url + model_name
+    # here. Other keys (system_prompt, params, ui, ...) are forwarded as-is.
+    ttt:
+      system_prompt: "You are a helpful AI assistant. Today's date is {date}."
+      params:
+        max_tokens: 1024
+        temperature: 0.7
+        top_p: 0.9
+    tts:
+      params:
+        voice: casual_male
+        response_format: wav
+      # ui:
+      #   voice_choices: ["casual_male", "casual_female"]
+      #   format_choices: ["wav", "mp3", "ogg", "aac", "flac"]
+    tti:
+      params:
+        size: 1024x1024
+      ui:
+        size_choices: ["1024x1024", "768x1024", "1024x768"]
+        show_negative_prompt: true
+
+  service:
+    type: ClusterIP
+    zenith:
+      enabled: true
+      skipAuth: false
+      label: Omni Interface
+      iconUrl: https://raw.githubusercontent.com/gradio-app/gradio/5524e590577769b0444a5332b8d444aafb0c5c12/js/app/public/static/img/logo.svg
+      description: |
+        A unified multimodal web interface (chat / text-to-speech / text-to-image).
+  ingress:
+    enabled: false
+    className: ""
+    annotations: {}
+    hosts:
+      - host: chart-example.local
+        paths:
+          - path: /
+            pathType: ImplementationSpecific
+    tls: []
+  httpRoute:
+    enabled: false
+    annotations: {}
+    parentRefs:
+      - name: my-gateway
+        namespace: default
+        sectionName: https
+    hostnames:
+      - chart-example.local
+    rules:
+      - matches:
+          - path:
+              type: PathPrefix
+              value: /
+  updateStrategy:
+    rollingUpdate:
+      maxSurge: 25%
+      maxUnavailable: 25%
+  nodeSelector: {}
+  tolerations: []
+  affinity: {}
+  pdb:
+    enabled: false
diff --git a/scripts/perf-test/stress.py b/scripts/perf-test/stress.py
index 6af6426f..425f132e 100644
--- a/scripts/perf-test/stress.py
+++ b/scripts/perf-test/stress.py
@@ -16,11 +16,12 @@
 prompts = [
     "Hi, how are you?",
     "What's the weather like with you?",
-    "Who's the best footballer of all time?"
+    "Who's the best footballer of all time?",
 ]
 
 client_count = 3
-request_count = 5 # Requests per client
+request_count = 5  # Requests per client
+
 
 def make_requests(client_id: int):
     client = Client(url)
@@ -32,7 +33,12 @@ def make_requests(client_id: int):
         timings.append(time.time() - start_time)
     return timings
 
-results = list(Parallel(n_jobs=client_count)(delayed(make_requests)(i) for i in range(1, client_count+1)))
+
+results = list(
+    Parallel(n_jobs=client_count)(
+        delayed(make_requests)(i) for i in range(1, client_count + 1)
+    )
+)
 all_timings = []
 for client_timings in results:
     all_timings += client_timings
diff --git a/web-apps/chat/Dockerfile b/web-apps/chat/Dockerfile
index 5b7be0c8..abb137dd 100644
--- a/web-apps/chat/Dockerfile
+++ b/web-apps/chat/Dockerfile
@@ -1,6 +1,11 @@
-FROM ghcr.io/astral-sh/uv:python3.11-bookworm-slim
-
-RUN apt-get update && apt-get install -y libssl3=3.0.19-1~deb12u2 openssl=3.0.19-1~deb12u2 && rm -rf /var/lib/apt/lists/*
+FROM ghcr.io/astral-sh/uv:python3.11-trixie-slim
+
+RUN apt-get update && apt-get install -y \
+      libssl3t64=3.5.6-1~deb13u2 \
+      openssl=3.5.6-1~deb13u2 \
+      libsqlite3-0=3.46.1-7+deb13u1 \
+      libgnutls30t64=3.8.9-3+deb13u4 \
+    && rm -rf /var/lib/apt/lists/*
 
 ARG DIR=chat
 
diff --git a/web-apps/chat/app.py b/web-apps/chat/app.py
index 82fff79b..1c84ded2 100644
--- a/web-apps/chat/app.py
+++ b/web-apps/chat/app.py
@@ -62,6 +62,7 @@ class PossibleSystemPromptException(Exception):
     streaming=True,
 )
 
+
 def inference(latest_message, history):
     # Allow mutating global variable
     global BACKEND_INITIALISED
@@ -69,16 +70,18 @@ def inference(latest_message, history):
 
     try:
         context = []
-        model_instruction = settings.model_instruction.replace("{date}", f"{date.today()}")
+        model_instruction = settings.model_instruction.replace(
+            "{date}", f"{date.today()}"
+        )
         if INCLUDE_SYSTEM_PROMPT:
             context.append(SystemMessage(content=model_instruction))
         elif history and len(history) > 0:
             # Mimic system prompt by prepending it to first human message
-            history[0]['content'] = f"{model_instruction}\n\n{history[0]['content']}"
+            history[0]["content"] = f"{model_instruction}\n\n{history[0]['content']}"
 
         for message in history:
-            role = message['role']
-            content = message['content']
+            role = message["role"]
+            content = message["content"]
             if role == "user":
                 context.append(HumanMessage(content=content))
             else:
@@ -102,10 +105,10 @@ def inference(latest_message, history):
             # The "think" tags mark the chatbot's reasoning. Remove the content
             # and replace with "Thinking..." until the closing tag is found.
             content = chunk.content
-            if '<think>' in content or thinking:
+            if "<think>" in content or thinking:
                 thinking = True
                 response = "Thinking..."
-                if '</think>' in content:
+                if "</think>" in content:
                     thinking = False
                     response = ""
             else:
@@ -175,7 +178,7 @@ def inference_wrapper(*args):
     js=settings.custom_javascript,
     title=settings.page_title,
 ) as demo:
-    gr.Markdown('# ' + settings.page_title)
+    gr.Markdown("# " + settings.page_title)
     gr.ChatInterface(
         inference_wrapper,
         type="messages",
@@ -187,10 +190,10 @@ def inference_wrapper(*args):
             sanitize_html=True,
             autoscroll=False,
             latex_delimiters=[
-                {"left": "$$", "right": "$$", "display": True },
-                {"left": "$", "right": "$", "display": False }
-                ],
-            ),
+                {"left": "$$", "right": "$$", "display": True},
+                {"left": "$", "right": "$", "display": False},
+            ],
+        ),
     )
 
 
diff --git a/web-apps/chat/gradio-client-test.py b/web-apps/chat/gradio-client-test.py
index 723852dc..1943da6b 100644
--- a/web-apps/chat/gradio-client-test.py
+++ b/web-apps/chat/gradio-client-test.py
@@ -6,7 +6,7 @@
 gradio_host = sys.argv[1]
 
 retries = 60
-for n in range(1, retries+1):
+for n in range(1, retries + 1):
     try:
         client = Client(gradio_host)
         result = client.predict("Hi", api_name="/chat")
diff --git a/web-apps/chat/requirements.txt b/web-apps/chat/requirements.txt
index a448a54f..013d67a8 100644
--- a/web-apps/chat/requirements.txt
+++ b/web-apps/chat/requirements.txt
@@ -1,7 +1,7 @@
 gradio<6
 gradio_client
 openai
-langchain<1.0
+langchain>=0.3,<1.0
 langchain_openai
 pydantic
 structlog
diff --git a/web-apps/chat/test.py b/web-apps/chat/test.py
index 29bb5738..05e1a480 100644
--- a/web-apps/chat/test.py
+++ b/web-apps/chat/test.py
@@ -7,6 +7,7 @@
 url = os.environ.get("GRADIO_URL", "http://localhost:7860")
 client = Client(url)
 
+
 class TestSuite(unittest.TestCase):
 
     def test_gradio_api(self):
@@ -19,5 +20,6 @@ def test_gradio_api(self):
     #         # mock_response.assert_called_once_with("Hi", [])
     #         self.assertEqual(result, "Mocked")
 
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/web-apps/flux-image-gen/Dockerfile b/web-apps/flux-image-gen/Dockerfile
index c530f4c0..a3a703e8 100644
--- a/web-apps/flux-image-gen/Dockerfile
+++ b/web-apps/flux-image-gen/Dockerfile
@@ -1,13 +1,50 @@
 FROM ghcr.io/astral-sh/uv:python3.11-trixie
 
-# https://stackoverflow.com/questions/55313610/importerror-libgl-so-1-cannot-open-shared-object-file-no-such-file-or-directo
+ARG IMAGEMAGICK_VERSION=8:7.1.1.43+dfsg1-1+deb13u9
+ARG LIBUNBOUND_VERSION=1.22.0-2+deb13u3
+ARG KRB5_VERSION=1.21.3-5+deb13u1
+ARG LIBGCRYPT_VERSION=1.11.0-7+deb13u1
 RUN apt-get update && \
-    apt-get install -y ffmpeg libsm6 libxext6 && \
+    apt-get install -y --no-install-recommends \
+        ffmpeg \
+        libsm6 \
+        libxext6 \
+        "imagemagick=${IMAGEMAGICK_VERSION}" \
+        "imagemagick-7-common=${IMAGEMAGICK_VERSION}" \
+        "imagemagick-7.q16=${IMAGEMAGICK_VERSION}" \
+        "libmagickcore-7-arch-config=${IMAGEMAGICK_VERSION}" \
+        "libmagickcore-7-headers=${IMAGEMAGICK_VERSION}" \
+        "libmagickcore-7.q16-10=${IMAGEMAGICK_VERSION}" \
+        "libmagickcore-7.q16-10-extra=${IMAGEMAGICK_VERSION}" \
+        "libmagickcore-7.q16-dev=${IMAGEMAGICK_VERSION}" \
+        "libmagickcore-dev=${IMAGEMAGICK_VERSION}" \
+        "libmagickwand-7-headers=${IMAGEMAGICK_VERSION}" \
+        "libmagickwand-7.q16-10=${IMAGEMAGICK_VERSION}" \
+        "libmagickwand-7.q16-dev=${IMAGEMAGICK_VERSION}" \
+        "libmagickwand-dev=${IMAGEMAGICK_VERSION}" \
+        "libunbound8=${LIBUNBOUND_VERSION}" \
+        "krb5-multidev=${KRB5_VERSION}" \
+        "libgssapi-krb5-2=${KRB5_VERSION}" \
+        "libgssrpc4t64=${KRB5_VERSION}" \
+        "libk5crypto3=${KRB5_VERSION}" \
+        "libkadm5clnt-mit12=${KRB5_VERSION}" \
+        "libkadm5srv-mit12=${KRB5_VERSION}" \
+        "libkdb5-10t64=${KRB5_VERSION}" \
+        "libkrb5-3=${KRB5_VERSION}" \
+        "libkrb5-dev=${KRB5_VERSION}" \
+        "libkrb5support0=${KRB5_VERSION}" \
+        "libgcrypt20=${LIBGCRYPT_VERSION}" && \
+    apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
 
 ARG DIR=flux-image-gen
 
+RUN uv pip install --system --no-cache-dir --upgrade \
+        pip \
+        setuptools \
+        wheel
+
 COPY $DIR/requirements.txt requirements.txt
 RUN uv pip install --system --no-cache-dir -r requirements.txt
 
diff --git a/web-apps/flux-image-gen/api_server.py b/web-apps/flux-image-gen/api_server.py
index 1d89e3f8..7ba8880c 100644
--- a/web-apps/flux-image-gen/api_server.py
+++ b/web-apps/flux-image-gen/api_server.py
@@ -31,10 +31,12 @@ class ImageGenInput(BaseModel):
     prompt: str
     add_sampling_metadata: bool
 
+
 @app.get("/")
 def health_check():
     return "Server is running"
 
+
 @app.get("/model")
 async def get_model():
     return {"model": model}
@@ -61,7 +63,9 @@ async def generate_image(input: ImageGenInput):
             add_sampling_metadata=input.add_sampling_metadata,
         )
         if not image:
-            return JSONResponse({"error": {"message": msg, "seed": seed}}, status_code=400)
+            return JSONResponse(
+                {"error": {"message": msg, "seed": seed}}, status_code=400
+            )
     # Convert image to bytes response
     buffer = io.BytesIO()
     image.save(buffer, format="jpeg")
diff --git a/web-apps/flux-image-gen/gradio_ui.py b/web-apps/flux-image-gen/gradio_ui.py
index 658b5e4b..1cd84ef6 100644
--- a/web-apps/flux-image-gen/gradio_ui.py
+++ b/web-apps/flux-image-gen/gradio_ui.py
@@ -16,13 +16,13 @@ class Model(BaseModel):
     name: str
     address: HttpUrl
 
+
 class AppSettings(BaseModel):
     models: List[Model]
     example_prompt: str = "Yoda riding a skateboard."
     title: str = "Flux Image Generation Demo"
 
 
-
 settings_path = pathlib.Path("/etc/gradio-app/gradio_config.yaml")
 if not settings_path.exists():
     print("No settings overrides found at", settings_path)
@@ -38,7 +38,14 @@ class AppSettings(BaseModel):
 # Disable analytics for GDPR compliance
 os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
 
-def save_image(model_name: str, prompt: str, seed: int, add_sampling_metadata: bool, image: Image.Image):
+
+def save_image(
+    model_name: str,
+    prompt: str,
+    seed: int,
+    add_sampling_metadata: bool,
+    image: Image.Image,
+):
     filename = f"output/gradio/{uuid.uuid4()}.jpg"
     os.makedirs(os.path.dirname(filename), exist_ok=True)
     exif_data = Image.Exif()
@@ -95,22 +102,43 @@ async def generate_image(
 
         return image, seed, filename, None
 
+
 with gr.Blocks(title=settings.title) as demo:
     gr.Markdown(f"# {settings.title}")
 
     with gr.Row():
         with gr.Column():
-            model = gr.Dropdown(MODEL_NAMES, value=MODEL_NAMES[0], label="Model", interactive=len(MODEL_NAMES) > 1)
+            model = gr.Dropdown(
+                MODEL_NAMES,
+                value=MODEL_NAMES[0],
+                label="Model",
+                interactive=len(MODEL_NAMES) > 1,
+            )
             prompt = gr.Textbox(label="Prompt", value=settings.example_prompt)
 
             with gr.Accordion("Advanced Options", open=False):
                 # TODO: Make min/max slide values configurable
                 width = gr.Slider(128, 8192, 1360, step=16, label="Width")
                 height = gr.Slider(128, 8192, 768, step=16, label="Height")
-                num_steps = gr.Slider(1, 50, 4 if model.value == "flux-schnell" else 50, step=1, label="Number of steps")
-                guidance = gr.Slider(1.0, 10.0, 3.5, step=0.1, label="Guidance", interactive=not model.value == "flux-schnell")
+                num_steps = gr.Slider(
+                    1,
+                    50,
+                    4 if model.value == "flux-schnell" else 50,
+                    step=1,
+                    label="Number of steps",
+                )
+                guidance = gr.Slider(
+                    1.0,
+                    10.0,
+                    3.5,
+                    step=0.1,
+                    label="Guidance",
+                    interactive=not model.value == "flux-schnell",
+                )
                 seed = gr.Textbox("-1", label="Seed (-1 for random)")
-                add_sampling_metadata = gr.Checkbox(label="Add sampling parameters to metadata?", value=True)
+                add_sampling_metadata = gr.Checkbox(
+                    label="Add sampling parameters to metadata?", value=True
+                )
 
             generate_btn = gr.Button("Generate")
 
@@ -122,7 +150,16 @@ async def generate_image(
 
     generate_btn.click(
         fn=generate_image,
-        inputs=[model, width, height, num_steps, guidance, seed, prompt, add_sampling_metadata],
+        inputs=[
+            model,
+            width,
+            height,
+            num_steps,
+            guidance,
+            seed,
+            prompt,
+            add_sampling_metadata,
+        ],
         outputs=[output_image, seed_output, download_btn, warning_text],
     )
     demo.launch(enable_monitoring=False)
diff --git a/web-apps/flux-image-gen/image_gen.py b/web-apps/flux-image-gen/image_gen.py
index 28585d1b..d0cffe37 100644
--- a/web-apps/flux-image-gen/image_gen.py
+++ b/web-apps/flux-image-gen/image_gen.py
@@ -16,14 +16,18 @@
 
 NSFW_THRESHOLD = 0.85
 
+
 def get_models(name: str, device: torch.device, offload: bool, is_schnell: bool):
     t5 = load_t5(device, max_length=256 if is_schnell else 512)
     clip = load_clip(device)
     model = load_flow_model(name, device="cpu" if offload else device)
     ae = load_ae(name, device="cpu" if offload else device)
-    nsfw_classifier = pipeline("image-classification", model="Falconsai/nsfw_image_detection", device=device)
+    nsfw_classifier = pipeline(
+        "image-classification", model="Falconsai/nsfw_image_detection", device=device
+    )
     return model, ae, t5, clip, nsfw_classifier
 
+
 class FluxGenerator:
     def __init__(self, model_name: str, device: str, offload: bool):
         self.device = torch.device(device)
@@ -69,10 +73,14 @@ def generate_image(
 
         if init_image is not None:
             if isinstance(init_image, np.ndarray):
-                init_image = torch.from_numpy(init_image).permute(2, 0, 1).float() / 255.0
+                init_image = (
+                    torch.from_numpy(init_image).permute(2, 0, 1).float() / 255.0
+                )
                 init_image = init_image.unsqueeze(0)
             init_image = init_image.to(self.device)
-            init_image = torch.nn.functional.interpolate(init_image, (opts.height, opts.width))
+            init_image = torch.nn.functional.interpolate(
+                init_image, (opts.height, opts.width)
+            )
             if self.offload:
                 self.ae.encoder.to(self.device)
             init_image = self.ae.encode(init_image.to())
@@ -137,7 +145,7 @@ def generate_image(
         x = rearrange(x[0], "c h w -> h w c")
 
         img = Image.fromarray((127.5 * (x + 1.0)).cpu().byte().numpy())
-        nsfw_score = [x["score"] for x in self.nsfw_classifier(img) if x["label"] == "nsfw"][0] # type: ignore
+        nsfw_score = [x["score"] for x in self.nsfw_classifier(img) if x["label"] == "nsfw"][0]  # type: ignore
 
         if nsfw_score < NSFW_THRESHOLD:
             exif_data = Image.Exif()
@@ -152,4 +160,8 @@ def generate_image(
 
             return img, str(opts.seed), None
         else:
-            return None, str(opts.seed), "Your generated image may contain NSFW content."
+            return (
+                None,
+                str(opts.seed),
+                "Your generated image may contain NSFW content.",
+            )
diff --git a/web-apps/flux-image-gen/requirements.txt b/web-apps/flux-image-gen/requirements.txt
index 069c65c0..d29104a2 100644
--- a/web-apps/flux-image-gen/requirements.txt
+++ b/web-apps/flux-image-gen/requirements.txt
@@ -1,4 +1,6 @@
 flux[gradio] @ git+https://github.com/black-forest-labs/flux@478338d
 fastapi[standard]
 httpx
+urllib3>=2.7.0
+idna>=3.15
 # ../utils
diff --git a/web-apps/flux-image-gen/test_client.py b/web-apps/flux-image-gen/test_client.py
index 21d8a693..7e4b9738 100644
--- a/web-apps/flux-image-gen/test_client.py
+++ b/web-apps/flux-image-gen/test_client.py
@@ -5,16 +5,16 @@
 model = os.environ.get("FLUX_MODEL", "flux-schnell")
 client = Client(address)
 web_page, seed, file_name, err = client.predict(
-		model_name=model,
-		# width=1360,
-		width=3888,
-		# height=768,
-		height=2544,
-		num_steps=4,
-		guidance=3.5,
-		seed="-1",
-		prompt="Yoda riding a skateboard",
-		add_sampling_metadata=True,
-		api_name="/generate_image"
+    model_name=model,
+    # width=1360,
+    width=3888,
+    # height=768,
+    height=2544,
+    num_steps=4,
+    guidance=3.5,
+    seed="-1",
+    prompt="Yoda riding a skateboard",
+    add_sampling_metadata=True,
+    api_name="/generate_image",
 )
-print('Result saved to:', file_name)
+print("Result saved to:", file_name)
diff --git a/web-apps/image-analysis/Dockerfile b/web-apps/image-analysis/Dockerfile
index a5d6d169..78ff66f7 100644
--- a/web-apps/image-analysis/Dockerfile
+++ b/web-apps/image-analysis/Dockerfile
@@ -1,8 +1,11 @@
-FROM ghcr.io/astral-sh/uv:python3.11-bookworm-slim
-
-RUN apt-get update && \
-    apt-get install -y --only-upgrade libssl3=3.0.19-1~deb12u2 openssl=3.0.19-1~deb12u2 && \
-    rm -rf /var/lib/apt/lists/*
+FROM ghcr.io/astral-sh/uv:python3.11-trixie-slim
+
+RUN apt-get update && apt-get install -y \
+      libssl3t64=3.5.6-1~deb13u2 \
+      openssl=3.5.6-1~deb13u2 \
+      libsqlite3-0=3.46.1-7+deb13u1 \
+      libgnutls30t64=3.8.9-3+deb13u4 \
+    && rm -rf /var/lib/apt/lists/*
 
 ARG DIR=image-analysis
 
diff --git a/web-apps/omni/Dockerfile b/web-apps/omni/Dockerfile
new file mode 100644
index 00000000..fb07391c
--- /dev/null
+++ b/web-apps/omni/Dockerfile
@@ -0,0 +1,26 @@
+FROM ghcr.io/astral-sh/uv:python3.11-trixie-slim
+
+RUN apt-get update && apt-get install -y \
+      libssl3t64=3.5.6-1~deb13u2 \
+      openssl=3.5.6-1~deb13u2 \
+      libsqlite3-0=3.46.1-7+deb13u1 \
+      libgnutls30t64=3.8.9-3+deb13u4 \
+    && rm -rf /var/lib/apt/lists/*
+
+ARG DIR=omni
+
+COPY $DIR/requirements.txt requirements.txt
+RUN sed -i s$../utils$./utils$ requirements.txt
+COPY utils utils
+RUN uv pip install --system --no-cache-dir -r requirements.txt
+
+COPY purge-google-fonts.sh purge-google-fonts.sh
+RUN bash purge-google-fonts.sh
+
+WORKDIR /app
+
+COPY $DIR/*.py .
+
+COPY $DIR/defaults.yml .
+
+ENTRYPOINT ["python3", "app.py"]
diff --git a/web-apps/omni/app.py b/web-apps/omni/app.py
new file mode 100644
index 00000000..fa035f72
--- /dev/null
+++ b/web-apps/omni/app.py
@@ -0,0 +1,602 @@
+"""Azimuth Omni: ttt-first UI with optional tts/tti backends."""
+
+import base64
+import gradio as gr
+import httpx
+import io
+import tempfile
+import threading
+import time
+import utils
+
+from datetime import date
+from openai import OpenAI
+from pathlib import Path
+from PIL import Image
+from pydantic import BaseModel, ConfigDict
+from scipy.io import wavfile
+from typing import Dict, List, Any, Optional
+from urllib.parse import urljoin
+
+log = utils.get_logger()
+log.info(f"Gradio version: {gr.__version__}")
+
+
+# Param classes hold the known UI defaults. Extra keys in a backend's params
+# are kept and forwarded to the backend via the OpenAI SDK extra_body.
+
+
+class ChatParams(BaseModel):
+    max_tokens: int = 1024
+    temperature: float = 0.7
+    top_p: float = 0.9
+    model_config = ConfigDict(extra="allow")
+
+
+class TTSParams(BaseModel):
+    voice: str = "casual_male"
+    response_format: str = "wav"
+    model_config = ConfigDict(extra="allow")
+
+
+class ImageGenParams(BaseModel):
+    size: str = "1024x1024"
+    style: Optional[str] = None
+    quality: Optional[str] = None
+    model_config = ConfigDict(extra="allow")
+
+
+# Per-backend UI config: dropdown choices and slider ranges. All optional.
+
+
+class ChatUI(BaseModel):
+    model_config = ConfigDict(extra="allow")
+
+
+class TTSUI(BaseModel):
+    voice_choices: Optional[List[str]] = None
+    format_choices: List[str] = ["wav", "mp3", "ogg", "aac", "flac"]
+    model_config = ConfigDict(extra="allow")
+
+
+class ImageUI(BaseModel):
+    size_choices: List[str] = [
+        "1024x1024",
+        "1024x1792",
+        "1792x1024",
+        "512x512",
+        "256x256",
+    ]
+    style_choices: Optional[List[str]] = None
+    quality_choices: Optional[List[str]] = None
+    show_negative_prompt: bool = True
+    model_config = ConfigDict(extra="allow")
+
+
+class BackendConfig(BaseModel):
+    backend_url: str
+    model_name: str
+    system_prompt: Optional[str] = None
+    params: Dict[str, Any] = {}
+    ui: Dict[str, Any] = {}
+    # OpenAI client request timeout (seconds). Falls back to AppConfig.
+    request_timeout_s: Optional[float] = None
+    model_config = ConfigDict(extra="allow")
+
+
+class AppConfig(BaseModel):
+    probe_timeout_s: float = 2.0
+    probe_interval_s: float = 10.0
+    concurrency_limit: int = 5
+    # Default OpenAI client timeout (seconds) for inference requests.
+    request_timeout_s: float = 1800.0
+    model_config = ConfigDict(extra="allow")
+
+
+class AppSettings(BaseModel):
+    host_address: str = "0.0.0.0"
+    page_title: str = "Omni Interface"
+    page_description: str = ""
+    app: AppConfig = AppConfig()
+    ttt: Optional[BackendConfig] = None
+    tts: Optional[BackendConfig] = None
+    tti: Optional[BackendConfig] = None
+    theme_params: Dict[str, Any] = {}
+    theme_params_extended: Dict[str, Any] = {}
+    css_overrides: Optional[str] = None
+    custom_javascript: Optional[str] = None
+    model_config = ConfigDict(protected_namespaces=(), extra="allow")
+
+
+def load_settings() -> dict:
+    """Merge defaults.yml with overrides (k8s mount path, then local fallback)."""
+    defaults = utils.load_yaml("./defaults.yml")
+    for candidate in ("/etc/web-app/overrides.yml", "./overrides.yml"):
+        if Path(candidate).exists():
+            return {**defaults, **utils.load_yaml(candidate)}
+    return defaults
+
+
+settings = AppSettings(**load_settings())
+
+
+BACKEND_NAMES = ("ttt", "tts", "tti")
+
+clients: Dict[str, OpenAI] = {}
+probe_urls: Dict[str, str] = {}
+for name in BACKEND_NAMES:
+    cfg: Optional[BackendConfig] = getattr(settings, name)
+    if cfg is None:
+        continue
+    base = cfg.backend_url.rstrip("/") + "/"
+    timeout_s = (
+        cfg.request_timeout_s
+        if cfg.request_timeout_s is not None
+        else settings.app.request_timeout_s
+    )
+    clients[name] = OpenAI(
+        base_url=urljoin(base, "v1"),
+        api_key="not-needed",
+        timeout=timeout_s,
+    )
+    probe_urls[name] = urljoin(base, "v1/models")
+    log.info(f"  {name}: client request_timeout_s={timeout_s}")
+
+enabled = list(clients.keys())
+log.info(f"Enabled backends: {enabled}")
+if not enabled:
+    raise RuntimeError(
+        f"No backends configured. Set at least one of: {', '.join(BACKEND_NAMES)}"
+    )
+
+
+# A background thread probes GET /v1/models for every backend and writes the
+# results to `health`. UI refresh and inference guards read from it.
+
+PROBE_TIMEOUT_S = settings.app.probe_timeout_s
+PROBE_INTERVAL_S = settings.app.probe_interval_s
+
+_probe_http = httpx.Client(timeout=PROBE_TIMEOUT_S)
+health: Dict[str, bool] = {name: False for name in enabled}
+
+
+def _probe_once() -> None:
+    for name in enabled:
+        try:
+            health[name] = _probe_http.get(probe_urls[name]).is_success
+        except httpx.HTTPError as e:
+            health[name] = False
+            log.debug(f"Health probe failed for {name}: {e}")
+
+
+def _probe_loop() -> None:
+    while True:
+        time.sleep(PROBE_INTERVAL_S)
+        _probe_once()
+
+
+# Prime synchronously so the first page load sees real values.
+_probe_once()
+threading.Thread(target=_probe_loop, name="health-probe", daemon=True).start()
+
+
+def _status_markdown(name: str) -> str:
+    url = getattr(settings, name).backend_url
+    if health[name]:
+        return f"**Status:** reachable - `{url}`"
+    return (
+        f"**Status:** unreachable - `{url}` "
+        f"(retrying every {int(PROBE_INTERVAL_S)}s; inputs disabled)"
+    )
+
+
+def file_to_base64(file_path: str) -> tuple[str, str]:
+    """Convert a file to base64, return (data_uri, mime_type)."""
+    path = Path(file_path)
+    suffix = path.suffix.lower()
+    mime_types = {
+        ".png": "image/png",
+        ".jpg": "image/jpeg",
+        ".jpeg": "image/jpeg",
+        ".gif": "image/gif",
+        ".webp": "image/webp",
+        ".bmp": "image/bmp",
+        ".wav": "audio/wav",
+        ".mp3": "audio/mpeg",
+        ".ogg": "audio/ogg",
+        ".flac": "audio/flac",
+        ".m4a": "audio/mp4",
+        ".mp4": "video/mp4",
+        ".webm": "video/webm",
+        ".avi": "video/x-msvideo",
+        ".mov": "video/quicktime",
+    }
+    mime_type = mime_types.get(suffix, "application/octet-stream")
+    with open(file_path, "rb") as f:
+        data = base64.b64encode(f.read()).decode()
+    return f"data:{mime_type};base64,{data}", mime_type
+
+
+def build_message_content(text: str, files: List[str]) -> List[Dict]:
+    content = []
+    if text:
+        content.append({"type": "text", "text": text})
+    for fp in files:
+        uri, mt = file_to_base64(fp)
+        if mt.startswith("image/"):
+            content.append({"type": "image_url", "image_url": {"url": uri}})
+        elif mt.startswith("audio/"):
+            content.append(
+                {
+                    "type": "input_audio",
+                    "input_audio": {
+                        "data": uri.split(",")[1],
+                        "format": mt.split("/")[1],
+                    },
+                }
+            )
+        elif mt.startswith("video/"):
+            content.append({"type": "video_url", "video_url": {"url": uri}})
+    return content or [{"type": "text", "text": ""}]
+
+
+_CHAT_NATIVE_KEYS = {"max_tokens", "temperature", "top_p"}
+
+
+def _split_native(params: Dict[str, Any], native: set) -> tuple[Dict, Dict]:
+    """Partition params into (SDK-native kwargs, extra_body for the rest)."""
+    native_kwargs = {k: v for k, v in params.items() if k in native}
+    extra = {k: v for k, v in params.items() if k not in native}
+    return native_kwargs, extra
+
+
+def chat_inference(message, history):
+    cfg = settings.ttt
+    client = clients["ttt"]
+    raw_params = {**ChatParams().model_dump(), **cfg.params}
+    native_kwargs, extra_body = _split_native(raw_params, _CHAT_NATIVE_KEYS)
+
+    if not health["ttt"]:
+        yield (
+            "Chat backend is currently unreachable. The status banner above "
+            "will update once it comes back online."
+        )
+        return
+
+    try:
+        messages = []
+        if cfg.system_prompt:
+            sp = cfg.system_prompt.replace("{date}", str(date.today()))
+            messages.append({"role": "system", "content": sp})
+
+        for msg in history:
+            content = msg.get("content", "")
+            if isinstance(content, dict) and "path" in content:
+                content = build_message_content("", [content["path"]])
+            elif not isinstance(content, (str, list)):
+                continue
+            messages.append({"role": msg.get("role", "user"), "content": content})
+
+        if isinstance(message, dict):
+            text = message.get("text", "") or ""
+            files = message.get("files", [])
+            content = build_message_content(text, files)
+            messages.append({"role": "user", "content": content})
+        else:
+            messages.append({"role": "user", "content": str(message)})
+
+        create_kwargs: Dict[str, Any] = dict(
+            model=cfg.model_name,
+            messages=messages,
+            stream=True,
+            **native_kwargs,
+        )
+        if extra_body:
+            create_kwargs["extra_body"] = extra_body
+        stream = client.chat.completions.create(**create_kwargs)
+
+        response = ""
+        for chunk in stream:
+            if chunk.choices and chunk.choices[0].delta.content:
+                response += chunk.choices[0].delta.content
+                yield response
+
+    except Exception as e:
+        log.error(f"Chat error: {e}")
+        yield f"Error: {e}"
+
+
+_TTS_NATIVE_KEYS = {"voice", "response_format", "speed"}
+
+
+def tts_inference(text: str, voice: str, response_format: str):
+    cfg = settings.tts
+    client = clients["tts"]
+
+    if not health["tts"]:
+        raise gr.Error("TTS backend is currently unreachable.")
+
+    raw_params = {**TTSParams().model_dump(), **cfg.params}
+    raw_params["response_format"] = response_format
+    if voice:
+        raw_params["voice"] = voice
+    else:
+        raw_params.pop("voice", None)
+    native_kwargs, extra_body = _split_native(raw_params, _TTS_NATIVE_KEYS)
+
+    try:
+        create_kwargs: Dict[str, Any] = dict(
+            model=cfg.model_name,
+            input=text,
+            **native_kwargs,
+        )
+        if extra_body:
+            create_kwargs["extra_body"] = extra_body
+        response = client.audio.speech.create(**create_kwargs)
+        audio_bytes = response.read()
+
+        if response_format == "wav":
+            sr, audio = wavfile.read(io.BytesIO(audio_bytes))
+            return (sr, audio)
+        with tempfile.NamedTemporaryFile(
+            suffix=f".{response_format}", delete=False
+        ) as f:
+            f.write(audio_bytes)
+            return f.name
+
+    except Exception as e:
+        log.error(f"TTS error: {e}")
+        raise gr.Error(f"TTS error: {e}")
+
+
+_IMAGE_NATIVE_KEYS = {"size", "style", "quality", "n", "response_format"}
+
+
+def image_inference(
+    prompt: str,
+    negative_prompt: Optional[str] = None,
+    size: Optional[str] = None,
+    style: Optional[str] = None,
+    quality: Optional[str] = None,
+):
+    cfg = settings.tti
+    client = clients["tti"]
+
+    if not health["tti"]:
+        raise gr.Error("Image backend is currently unreachable.")
+
+    raw_params: Dict[str, Any] = {**cfg.params}
+    # Live UI values override config; None/empty means don't send.
+    for key, val in (("size", size), ("style", style), ("quality", quality)):
+        if val:
+            raw_params[key] = val
+    raw_params.setdefault("response_format", "b64_json")
+    if negative_prompt:
+        raw_params["negative_prompt"] = negative_prompt
+
+    native_kwargs, extra_body = _split_native(raw_params, _IMAGE_NATIVE_KEYS)
+
+    try:
+        create_kwargs: Dict[str, Any] = dict(
+            model=cfg.model_name,
+            prompt=prompt,
+            **native_kwargs,
+        )
+        if extra_body:
+            create_kwargs["extra_body"] = extra_body
+        response = client.images.generate(**create_kwargs)
+        if response.data and response.data[0].b64_json:
+            return Image.open(io.BytesIO(base64.b64decode(response.data[0].b64_json)))
+        raise gr.Error("No image data received from model")
+
+    except gr.Error:
+        raise
+    except Exception as e:
+        log.error(f"Image generation error: {e}")
+        raise gr.Error(f"Image generation error: {e}")
+
+
+theme = gr.themes.Default(**settings.theme_params)
+if settings.theme_params_extended:
+    theme.set(**settings.theme_params_extended)
+
+blocks_kwargs = {
+    "fill_height": True,
+    "title": settings.page_title,
+    "theme": theme,
+    "css": settings.css_overrides,
+    "js": settings.custom_javascript,
+}
+launch_kwargs = {"server_name": settings.host_address}
+
+
+with gr.Blocks(**blocks_kwargs) as demo:
+    gr.Markdown(f"# {settings.page_title}")
+    if settings.page_description:
+        gr.Markdown(settings.page_description)
+
+    # {backend_name: (status_markdown, [widgets to toggle when unreachable])}
+    health_widgets: Dict[str, tuple] = {}
+
+    with gr.Tabs():
+        if settings.ttt:
+            with gr.Tab("Chat"):
+                gr.Markdown(f"**Model:** `{settings.ttt.model_name}`")
+                chat_status = gr.Markdown(_status_markdown("ttt"))
+
+                chatbot = gr.Chatbot(
+                    type="messages",
+                    height="65vh",
+                    resizable=True,
+                    sanitize_html=True,
+                    autoscroll=True,
+                    show_copy_button=True,
+                    allow_tags=False,
+                    latex_delimiters=[
+                        {"left": "$$", "right": "$$", "display": True},
+                        {"left": "$", "right": "$", "display": False},
+                    ],
+                )
+                textbox = gr.MultimodalTextbox(
+                    file_types=["image", "audio", "video"],
+                    file_count="multiple",
+                    placeholder="Type a message or upload files...",
+                    show_label=False,
+                )
+                gr.ChatInterface(
+                    fn=chat_inference,
+                    type="messages",
+                    multimodal=True,
+                    chatbot=chatbot,
+                    textbox=textbox,
+                    analytics_enabled=False,
+                )
+                health_widgets["ttt"] = (chat_status, [textbox])
+
+        if settings.tts:
+            tts_defaults = TTSParams(**settings.tts.params)
+            tts_ui = TTSUI(**(settings.tts.ui or {}))
+
+            with gr.Tab("Text-to-Speech"):
+                gr.Markdown(f"**Model:** `{settings.tts.model_name}`")
+                tts_status = gr.Markdown(_status_markdown("tts"))
+
+                with gr.Row():
+                    with gr.Column(scale=2):
+                        tts_input = gr.Textbox(
+                            label="Text to Speak",
+                            placeholder="Enter the text you want to convert to speech...",
+                            lines=5,
+                        )
+                        tts_output = gr.Audio(
+                            label="Generated Audio",
+                            show_download_button=True,
+                        )
+                    with gr.Column(scale=1):
+                        if tts_ui.voice_choices:
+                            tts_voice = gr.Dropdown(
+                                choices=tts_ui.voice_choices,
+                                value=(
+                                    tts_defaults.voice
+                                    if tts_defaults.voice in tts_ui.voice_choices
+                                    else tts_ui.voice_choices[0]
+                                ),
+                                label="Voice",
+                            )
+                        else:
+                            tts_voice = gr.Textbox(
+                                value=tts_defaults.voice,
+                                label="Voice",
+                                info="Voice name supported by the model",
+                            )
+                        tts_format = gr.Dropdown(
+                            choices=tts_ui.format_choices,
+                            value=(
+                                tts_defaults.response_format
+                                if tts_defaults.response_format in tts_ui.format_choices
+                                else tts_ui.format_choices[0]
+                            ),
+                            label="Format",
+                        )
+                        tts_btn = gr.Button("Generate Speech", variant="primary")
+
+                tts_btn.click(
+                    tts_inference, [tts_input, tts_voice, tts_format], tts_output
+                )
+                health_widgets["tts"] = (
+                    tts_status,
+                    [tts_input, tts_voice, tts_format, tts_btn],
+                )
+
+        if settings.tti:
+            img_defaults = ImageGenParams(**settings.tti.params)
+            img_ui = ImageUI(**(settings.tti.ui or {}))
+
+            with gr.Tab("Image Generation"):
+                gr.Markdown(f"**Model:** `{settings.tti.model_name}`")
+                image_status = gr.Markdown(_status_markdown("tti"))
+
+                with gr.Row():
+                    with gr.Column(scale=2):
+                        img_prompt = gr.Textbox(
+                            label="Prompt", placeholder="Describe the image...", lines=3
+                        )
+                        img_negative = gr.Textbox(
+                            label="Negative Prompt (optional)",
+                            lines=2,
+                            visible=img_ui.show_negative_prompt,
+                        )
+                        img_output = gr.Image(label="Generated Image", height=512)
+                    with gr.Column(scale=1):
+                        img_size = gr.Dropdown(
+                            choices=img_ui.size_choices,
+                            value=(
+                                img_defaults.size
+                                if img_defaults.size in img_ui.size_choices
+                                else (
+                                    img_ui.size_choices[0]
+                                    if img_ui.size_choices
+                                    else None
+                                )
+                            ),
+                            label="Size",
+                        )
+                        # style/quality hidden unless configured (DALL-E-3-specific).
+                        img_style = gr.Dropdown(
+                            choices=img_ui.style_choices or [],
+                            value=img_defaults.style,
+                            label="Style",
+                            visible=bool(img_ui.style_choices),
+                        )
+                        img_quality = gr.Dropdown(
+                            choices=img_ui.quality_choices or [],
+                            value=img_defaults.quality,
+                            label="Quality",
+                            visible=bool(img_ui.quality_choices),
+                        )
+                        img_btn = gr.Button("Generate Image", variant="primary")
+
+                img_btn.click(
+                    image_inference,
+                    [img_prompt, img_negative, img_size, img_style, img_quality],
+                    img_output,
+                )
+                health_widgets["tti"] = (
+                    image_status,
+                    [
+                        img_prompt,
+                        img_negative,
+                        img_size,
+                        img_style,
+                        img_quality,
+                        img_btn,
+                    ],
+                )
+
+    # Order must match refresh_health: status_md then inputs, per backend.
+    health_outputs = [
+        w
+        for name in enabled
+        for w in (health_widgets[name][0], *health_widgets[name][1])
+    ]
+
+    def refresh_health() -> List[Any]:
+        updates: List[Any] = []
+        for name in enabled:
+            _, inputs = health_widgets[name]
+            updates.append(gr.update(value=_status_markdown(name)))
+            updates.extend(gr.update(interactive=health[name]) for _ in inputs)
+        return updates
+
+    # Timer is per-session and only drives the UI; probing is global.
+    demo.load(refresh_health, inputs=None, outputs=health_outputs)
+    gr.Timer(PROBE_INTERVAL_S).tick(refresh_health, inputs=None, outputs=health_outputs)
+
+
+if __name__ == "__main__":
+    for name in enabled:
+        cfg = getattr(settings, name)
+        log.info(f"  {name}: model={cfg.model_name} url={cfg.backend_url}")
+    demo.queue(default_concurrency_limit=settings.app.concurrency_limit).launch(
+        **launch_kwargs
+    )
diff --git a/web-apps/omni/defaults.yml b/web-apps/omni/defaults.yml
new file mode 100644
index 00000000..8c06df4d
--- /dev/null
+++ b/web-apps/omni/defaults.yml
@@ -0,0 +1,64 @@
+host_address: 0.0.0.0
+
+page_title: Omni Interface
+page_description: A unified interface for multimodal AI.
+
+# Global app tunables
+app:
+  probe_timeout_s: 2.0
+  probe_interval_s: 10.0
+  concurrency_limit: 5
+  # Default OpenAI client request timeout (seconds) for inference calls.
+  request_timeout_s: 1800.0
+
+# Each backend is optional; only configured backends show in the UI.
+#
+# Schema per backend:
+#   backend_url:  OpenAI-compatible base URL (without /v1)
+#   model_name:   Model identifier accepted by the backend
+#   system_prompt: (ttt only) optional system message; {date} is substituted
+#   params:       Defaults sent on every request. Unknown keys go via extra_body.
+#   ui:           Optional dropdown choices and slider ranges (see below).
+#   request_timeout_s: Optional per-backend override of app.request_timeout_s.
+
+# TTT (text-to-text) backend - for text chat and multimodal understanding
+# ttt:
+#   backend_url: http://localhost:8000
+#   model_name: Qwen/Qwen2.5-Omni-7B
+#   system_prompt: "You are a helpful AI assistant. Today's date is {date}."
+#   params:
+#     max_tokens: 1024
+#     temperature: 0.7
+#     top_p: 0.9
+#     # Any extras (forwarded via extra_body):
+#     # repetition_penalty: 1.05
+#     # seed: 42
+
+# TTS backend - for text-to-speech
+tts:
+  backend_url: http://localhost:8000
+  model_name: mistralai/Voxtral-4B-TTS-2603
+  params:
+    voice: casual_male
+    response_format: wav
+  # ui:
+  #   voice_choices: ["casual_male", "casual_female"]
+  #   format_choices: ["wav", "mp3", "ogg", "aac", "flac"]
+
+# TTI (text-to-image) generation backend
+# tti:
+#   backend_url: http://localhost:8002
+#   model_name: Tongyi-MAI/Z-Image-Turbo
+#   params:
+#     size: 1024x1024
+#   ui:
+#     size_choices: ["1024x1024", "768x1024", "1024x768"]
+#     # style_choices: ["vivid", "natural"]
+#     # quality_choices: ["standard", "hd"]
+#     show_negative_prompt: true
+
+# Gradio theme
+theme_params: {}
+theme_params_extended: {}
+css_overrides:
+custom_javascript:
diff --git a/web-apps/omni/requirements.txt b/web-apps/omni/requirements.txt
new file mode 100644
index 00000000..ffe1c299
--- /dev/null
+++ b/web-apps/omni/requirements.txt
@@ -0,0 +1,10 @@
+gradio<6
+gradio_client
+httpx
+openai
+pydantic
+structlog
+pillow
+numpy
+scipy
+../utils
diff --git a/web-apps/utils/utils.py b/web-apps/utils/utils.py
index cb99776b..cf8db30a 100644
--- a/web-apps/utils/utils.py
+++ b/web-apps/utils/utils.py
@@ -24,8 +24,10 @@ def get_logger():
     structlog.configure(wrapper_class=structlog.make_filtering_bound_logger(log_level))
     return structlog.get_logger()
 
+
 log = get_logger()
 
+
 class LLMParams(BaseModel):
     """
     Parameters for vLLM API requests. For details see