Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
7a46c9c
Add observability settings to profiles
cjnolan May 5, 2026
c570586
Rename file
cjnolan May 6, 2026
ef4ed4a
Fix typos
cjnolan May 6, 2026
7f86eab
Merge branch 'EMF_HELMDEPLOY_2026_01' into add-o11y-changes
Copilot May 7, 2026
e3321de
Delete post-orch/environments/profile-coder.yaml.gotmpl
sunil-parida May 7, 2026
dd1dcb1
Merge branch 'EMF_HELMDEPLOY_2026_01' into add-o11y-changes
sunil-parida May 7, 2026
adc3474
Merge branch 'EMF_HELMDEPLOY_2026_01' into add-o11y-changes
sunil-parida May 7, 2026
6eedcb6
Add chart definitions for observability charts
cjnolan May 7, 2026
687fb9a
Fix typos
cjnolan May 7, 2026
901745e
Add alerting-monitor config as well as edgenode-observability fixes
cjnolan May 8, 2026
28673a2
Add missing prometheus-crd install
cjnolan May 8, 2026
c8cd6ff
Fix typos
cjnolan May 11, 2026
3de0173
Add workaround for Loki backend pod affinity settings
cjnolan May 11, 2026
42b1c1e
Fix mimir post install job for minio configuration
cjnolan May 11, 2026
0687660
Add pod affinity settings for other loki pods
cjnolan May 12, 2026
ac8fe00
Fix typo for mimir store gateway resources
cjnolan May 12, 2026
1c61695
Disable sre for observability tenant controller
cjnolan May 12, 2026
2e48dfa
Remove duplicated alerting-monitor settings
cjnolan May 12, 2026
d8e5f54
Update observability tenant controller enablement to match argo confi…
cjnolan May 12, 2026
4b07a9b
Merge branch 'EMF_HELMDEPLOY_2026_01' into add-o11y-changes
cjnolan May 12, 2026
c07795b
Address review comments
cjnolan May 13, 2026
e006671
Add separate enable flag for prometheus-crd install
cjnolan May 13, 2026
8a73303
Merge branch 'EMF_HELMDEPLOY_2026_01' into add-o11y-changes
cjnolan May 13, 2026
2874222
Merge branch 'EMF_HELMDEPLOY_2026_01' into add-o11y-changes
cjnolan May 14, 2026
c8d5106
Merge branch 'EMF_HELMDEPLOY_2026_01' into add-o11y-changes
cjnolan May 14, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions post-orch/environments/defaults-disabled.yaml.gotmpl
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,24 @@ cluster-manager:

intel-infra-provider:
enabled: false
observability-crds:
enabled: false
orchestrator-observability:
enabled: false
orchestrator-dashboards:
enabled: false
orchestrator-prometheus-agent:
enabled: false
alerting-monitor:
enabled: false
edgenode-observability:
enabled: false
edgenode-dashboards:
enabled: false
observability-tenant-controller:
enabled: false # only enable if alerting-monitor and edgenode-observability enabled
tempo:
enabled: false
web-ui-app-orch:
enabled: false
web-ui-cluster-orch:
Expand Down
8 changes: 4 additions & 4 deletions post-orch/environments/onprem-eim-settings.yaml.gotmpl
Original file line number Diff line number Diff line change
Expand Up @@ -151,10 +151,10 @@ pxe:
{{- end }}

# ─── O11y replica settings ──────────────────────────────────────────────────
o11y:
alertingMonitor:
alerting-monitor:
minReplicas: {{ env "EMF_ALERTING_MIN_REPLICAS" | default "1" }}
maxReplicas: {{ env "EMF_ALERTING_MAX_REPLICAS" | default "1" }}
alertmanagerReplicas: {{ env "EMF_ALERTMANAGER_REPLICAS" | default "1" }}
smtpInitialize: {{ env "EMF_SMTP_INITIALIZE" | default "false" }}
replicas: {{ env "EMF_ALERTMANAGER_REPLICAS" | default "1" }}
smtp:
initialize: {{ env "EMF_SMTP_INITIALIZE" | default "false" }}

10 changes: 10 additions & 0 deletions post-orch/environments/profile-vpro.yaml.gotmpl
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,13 @@ infra-external:
enabled: false
loca-templates-manager:
enabled: false

# ═══════════════════════════════════════════════════════════════════════════════
# Optional Releases to ENABLE (disabled in defaults, enable if required)
# ═══════════════════════════════════════════════════════════════════════════════
observability-crds:
enabled: false
edgenode-observability:
enabled: false
edgenode-dashboards:
enabled: false
136 changes: 134 additions & 2 deletions post-orch/helmfile.yaml.gotmpl
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ repositories:
{{- end }}
- name: stakater
url: https://stakater.github.io/stakater-charts
- name: prometheus
url: https://prometheus-community.github.io/helm-charts
- name: cnpg
oci: true
url: ghcr.io/cloudnative-pg/charts
Expand Down Expand Up @@ -211,6 +213,19 @@ releases:
- ns-label/namespace-label


- name: prometheus-crd
labels:
app: promtheus-crd
wave: "100"
namespace: orch-platform
chart: prometheus/prometheus-operator-crds
wait: true
version: 24.0.1
condition: observability-crds.enabled
needs:
- ns-label/namespace-label


#############################################################################
# Wave 105 — Kyverno policies (needs all of Wave 100)
#############################################################################
Expand Down Expand Up @@ -541,7 +556,7 @@ releases:


#############################################################################
# Wave 1000 — Traefik pre-config (needs all of Wave 180)
# Wave 1000 — Traefik pre-config + Observability (needs all of Wave 180)
#############################################################################

- name: traefik-pre
Expand All @@ -561,6 +576,86 @@ releases:
- orch-infra/copy-keycloak-admin-to-infra


- name: edgenode-observability
labels:
app: edgenode-observability
wave: "1000"
namespace: orch-infra
chart: edge-orch/o11y/charts/edgenode-observability
wait: true
version: 0.10.21
values:
- values/edgenode-observability.yaml.gotmpl
- values/resource-overrides.yaml
condition: edgenode-observability.enabled
needs:
- cattle-system/copy-ca-cert-gateway-to-cattle
- orch-infra/copy-ca-cert-gateway-to-infra
- orch-infra/copy-keycloak-admin-to-infra
- orch-platform/prometheus-crd


- name: edgenode-dashboards
labels:
app: edgenode-dashboards
wave: "1000"
namespace: orch-infra
chart: edge-orch/o11y/charts/edgenode-dashboards
wait: true
version: 0.3.10
condition: edgenode-dashboards.enabled
needs:
- orch-infra/edgenode-observability


- name: orchestrator-observability
labels:
app: orchestrator-observability
wave: "1000"
namespace: orch-platform
chart: edge-orch/o11y/charts/orchestrator-observability
wait: true
version: 0.5.18
values:
- values/orchestrator-observability.yaml.gotmpl
- values/resource-overrides.yaml
condition: orchestrator-observability.enabled
needs:
- cattle-system/copy-ca-cert-gateway-to-cattle
- orch-infra/copy-ca-cert-gateway-to-infra
- orch-infra/copy-keycloak-admin-to-infra
- orch-platform/prometheus-crd


- name: orchestrator-dashboards
labels:
app: orchestrator-dashboards
wave: "1000"
namespace: orch-platform
chart: edge-orch/o11y/charts/orchestrator-dashboards
wait: true
version: 0.3.2
condition: orchestrator-dashboards.enabled
needs:
- orch-platform/orchestrator-observability


- name: orchestrator-prometheus-agent
labels:
app: orchestrator-prometheus-agent
wave: "1000"
namespace: orch-platform
chart: prometheus/kube-prometheus-stack
wait: true
version: 79.7.1
values:
- values/orchestrator-prometheus-agent.yaml.gotmpl
- values/resource-overrides.yaml
condition: orchestrator-prometheus-agent.enabled
needs:
- orch-platform/orchestrator-observability


#############################################################################
# Wave 1100 — Traefik + HAProxy + Kyverno policies (needs all of Wave 1000)
#############################################################################
Expand Down Expand Up @@ -846,6 +941,26 @@ releases:
- orch-secret/token-fs


#############################################################################
# Wave 1900 — alerting-monitor (needs all of Wave 1400)
#############################################################################

- name: alerting-monitor
labels:
app: alerting-monitor
wave: "1900"
namespace: orch-infra
chart: edge-orch/o11y/charts/alerting-monitor
wait: true
version: 1.7.7
values:
- values/alerting-monitor.yaml.gotmpl
- values/resource-overrides.yaml
condition: alerting-monitor.enabled
needs:
- orch-infra/edgenode-observability


#############################################################################
# Wave 2000 — Core services (needs all of Wave 1400)
#############################################################################
Expand Down Expand Up @@ -900,7 +1015,7 @@ releases:


#############################################################################
# Wave 2005 — Auth service (needs all of Wave 2000)
# Wave 2005 — Auth service + o11y tenant controller (needs all of Wave 2000)
#############################################################################

- name: auth-service
Expand All @@ -921,6 +1036,23 @@ releases:
- orch-ui/metadata-broker


- name: observability-tenant-controller
labels:
app: observability-tenant-controller
wave: "2005"
namespace: orch-platform
chart: edge-orch/o11y/charts/observability-tenant-controller
wait: true
version: 0.7.1
values:
- values/observability-tenant-controller.yaml.gotmpl
- values/resource-overrides.yaml
condition: observability-tenant-controller.enabled
needs:
- orch-infra/edgenode-observability
- orch-infra/alerting-monitor


#############################################################################
# Wave 2100 — Infra services (needs all of Wave 2005)
#############################################################################
Expand Down
121 changes: 121 additions & 0 deletions post-orch/values/alerting-monitor.yaml.gotmpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
# SPDX-FileCopyrightText: 2026 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

---
initialRules:
hostAggregationWindowSeconds: 240

openPolicyAgent:
image:
tag: 1.10.1-static
config:
profile: "compressed"

{{- if eq (.Values.istio.enabled | toString) "true" }}
istio-proxy:
securityContext:
readOnlyRootFilesystem: true
{{- end }}

# https://doc.traefik.io/traefik/migrate/v2-to-v3-details/#kubernetes-crds-api-group-traefikcontainous
traefikApiGroup: "traefik.io/v1alpha1"

alertmanager:
configmapReload:
image:
tag: v0.89.0

{{- $am := index .Values "alerting-monitor" | default dict }}
{{- if hasKey $am "minReplicas" }}
minReplicas: {{ $am.minReplicas }}
{{- else }}
minReplicas: 2
{{- end }}

{{- if hasKey $am "maxReplicas" }}
maxReplicas: {{ $am.maxReplicas }}
{{- else }}
maxReplicas: 5
{{- end }}

image:
registry: {{ .Values.containerRegistryURL }}
management:
registry: {{ .Values.containerRegistryURL }}

{{- if .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 2 }}
{{- else }}
imagePullSecrets: []
{{- end }}

caSecretName: gateway-ca-cert
{{- if hasKey $am "caSecretKey" }}
caSecretKey: {{ $am.caSecretKey }}
{{- else }}
caSecretKey: ca.crt
{{- end }}

{{- if hasKey $am "smtp" }}
smtp:
{{- if hasKey $am.smtp "initialize" }}
initialize: {{ $am.smtp.initialize }}
{{- end }}
{{- if hasKey $am.smtp "configSecret" }}
configSecret: {{ $am.smtp.configSecret }}
{{- end }}
userPasswordAuth: {{ dig "smtp" "userPasswordAuth" false $am }}
{{- if hasKey $am.smtp "userPasswordAuth" }}
{{- if eq ($am.smtp.userPasswordAuth | toString) "true" }}
passwordSecret:
name: {{ $am.smtp.passwordSecret.name }}
key: {{ $am.smtp.passwordSecret.key }}
{{- end }}
{{- end }}
requireTls: {{ dig "smtp" "requireTls" false $am }}
{{- if hasKey $am.smtp "requireTls" }}
{{- if eq ($am.smtp.requireTls | toString) "true" }}
insecureSkipVerify: {{ dig "smtp" "insecureSkipVerify" false $am }}
{{- end }}
{{- end }}
{{- end }}

{{- if hasKey $am "initialRules" }}
initialRules:
hostRules: {{ $am.initialRules.hostRules }}
appDeploymentRules: {{ $am.initialRules.appDeploymentRules }}
clusterRules: {{ $am.initialRules.clusterRules }}
{{- end }}

authentication:
oidcServer: https://keycloak.{{ .Values.clusterDomain }}:443
oidcServerRealm: master
webUIAddress: "https://web-ui.{{ .Values.clusterDomain }}"
observabilityUIAddress: "https://observability-ui.{{ .Values.clusterDomain }}"

traefik:
matchRoute: Host(`alerting-monitor.{{ .Values.clusterDomain }}`)
{{- $tr := index .Values "traefik" | default dict }}
{{- if eq ($tr.enabled | toString) "true" }}
tlsOption: {{ $tr.tlsOption | default "" | quote }}
{{- end }}

database:
databaseSecret: alerting-{{ .Values.database.type }}-postgresql
ssl: {{ .Values.database.ssl }}

{{- if hasKey $am "commonConfig" }}
alertmanager:
replicaCount: {{ $am.replicas | default 2 }}
{{- if hasKey $am.commonConfig "storageClass" }}
persistence:
storageClass: {{ $am.commonConfig.storageClass }}
{{- end }}
{{- end }}

{{- if hasKey $am "config" }}
alertManagerConfig:
groupInterval: {{ $am.config.groupInterval }}
{{- end }}
Loading
Loading