From 01b332f69101702caef80dbb2b1910c484c3ec67 Mon Sep 17 00:00:00 2001 From: Doug Holt Date: Thu, 2 Jul 2026 06:44:06 -0600 Subject: [PATCH 1/2] chore(release): bump component versions for 26.07 --- roles/alertmanager/defaults/main.yml | 2 +- roles/grafana/defaults/main.yml | 2 +- roles/nvidia-gpu-operator/defaults/main.yml | 2 +- roles/nvidia-k8s-gpu-device-plugin/defaults/main.yml | 2 +- roles/nvidia-k8s-gpu-feature-discovery/defaults/main.yml | 2 +- roles/nvidia-mig-manager/defaults/main.yml | 4 ++-- roles/nvidia-network-operator/vars/main.yaml | 2 +- roles/prometheus/defaults/main.yml | 2 +- roles/slurm/defaults/main.yml | 2 +- roles/spack/defaults/main.yml | 2 +- scripts/k8s/deploy_monitoring.sh | 2 +- 11 files changed, 12 insertions(+), 12 deletions(-) diff --git a/roles/alertmanager/defaults/main.yml b/roles/alertmanager/defaults/main.yml index 8bfdbc49d..7862048f9 100644 --- a/roles/alertmanager/defaults/main.yml +++ b/roles/alertmanager/defaults/main.yml @@ -1,6 +1,6 @@ alertmanager_config_dir: /etc/alertmanager alertmanager_config_src: templates/alertmanager.yml.j2 -alertmanager_container: "prom/alertmanager:v0.32.1" +alertmanager_container: "prom/alertmanager:v0.33.0" alertmanager_svc_name: "docker.alertmanager.service" alertmanager_docker_volume_name: "deepops_alertmanager_metrics" alertmanager_state: started diff --git a/roles/grafana/defaults/main.yml b/roles/grafana/defaults/main.yml index 920a37943..0299dae4c 100644 --- a/roles/grafana/defaults/main.yml +++ b/roles/grafana/defaults/main.yml @@ -2,7 +2,7 @@ grafana_config_dir: /etc/grafana grafana_config_template: templates/grafana.ini.j2 grafana_data_dir: /var/lib/grafana grafana_user_id: 472 -grafana_container: "grafana/grafana:13.0.1" +grafana_container: "grafana/grafana:13.1.0" grafana_svc_name: "docker.grafana.service" grafana_state: started grafana_enabled: yes diff --git a/roles/nvidia-gpu-operator/defaults/main.yml b/roles/nvidia-gpu-operator/defaults/main.yml index ebecc5d60..fb9dc7144 100644 --- a/roles/nvidia-gpu-operator/defaults/main.yml +++ b/roles/nvidia-gpu-operator/defaults/main.yml @@ -12,7 +12,7 @@ gpu_operator_nvaie_helm_repo: "https://helm.ngc.nvidia.com/nvaie" gpu_operator_nvaie_chart_name: "nvaie/gpu-operator" # NVAIE GPU Operator may require different version, check NGC enterprise collection. -gpu_operator_chart_version: "v26.3.1" +gpu_operator_chart_version: "v26.3.3" k8s_gpu_mig_strategy: "mixed" diff --git a/roles/nvidia-k8s-gpu-device-plugin/defaults/main.yml b/roles/nvidia-k8s-gpu-device-plugin/defaults/main.yml index 021d9a3f2..6b4306af7 100644 --- a/roles/nvidia-k8s-gpu-device-plugin/defaults/main.yml +++ b/roles/nvidia-k8s-gpu-device-plugin/defaults/main.yml @@ -2,6 +2,6 @@ k8s_gpu_plugin_helm_repo: "https://nvidia.github.io/k8s-device-plugin" k8s_gpu_plugin_chart_name: "nvdp/nvidia-device-plugin" k8s_gpu_plugin_release_name: "nvidia-device-plugin" -k8s_gpu_plugin_chart_version: "0.19.1" +k8s_gpu_plugin_chart_version: "0.19.3" k8s_gpu_plugin_init_error: "false" k8s_gpu_mig_strategy: "mixed" diff --git a/roles/nvidia-k8s-gpu-feature-discovery/defaults/main.yml b/roles/nvidia-k8s-gpu-feature-discovery/defaults/main.yml index 77111b3ff..ec7bbd0bf 100644 --- a/roles/nvidia-k8s-gpu-feature-discovery/defaults/main.yml +++ b/roles/nvidia-k8s-gpu-feature-discovery/defaults/main.yml @@ -2,5 +2,5 @@ k8s_gpu_feature_discovery_helm_repo: "https://nvidia.github.io/k8s-device-plugin" k8s_gpu_feature_discovery_chart_name: "nvgfd/gpu-feature-discovery" k8s_gpu_feature_discovery_release_name: "gpu-feature-discovery" -k8s_gpu_feature_discovery_chart_version: "0.19.1" +k8s_gpu_feature_discovery_chart_version: "0.19.3" k8s_gpu_mig_strategy: "mixed" diff --git a/roles/nvidia-mig-manager/defaults/main.yml b/roles/nvidia-mig-manager/defaults/main.yml index 80d0c60f4..fde9e0f02 100644 --- a/roles/nvidia-mig-manager/defaults/main.yml +++ b/roles/nvidia-mig-manager/defaults/main.yml @@ -1,3 +1,3 @@ --- -mig_manager_url_deb: https://github.com/NVIDIA/mig-parted/releases/download/v0.14.1/nvidia-mig-manager_0.14.1-1_amd64.deb -mig_manager_url_rpm: https://github.com/NVIDIA/mig-parted/releases/download/v0.14.1/nvidia-mig-manager-0.14.1-1.x86_64.rpm +mig_manager_url_deb: https://github.com/NVIDIA/mig-parted/releases/download/v0.14.2/nvidia-mig-manager_0.14.2-1_amd64.deb +mig_manager_url_rpm: https://github.com/NVIDIA/mig-parted/releases/download/v0.14.2/nvidia-mig-manager-0.14.2-1.x86_64.rpm diff --git a/roles/nvidia-network-operator/vars/main.yaml b/roles/nvidia-network-operator/vars/main.yaml index 7be8ee253..f7976df43 100644 --- a/roles/nvidia-network-operator/vars/main.yaml +++ b/roles/nvidia-network-operator/vars/main.yaml @@ -6,7 +6,7 @@ # if_name must match k8s network annotation name # -nvidia_network_operator_version: "26.1.1" +nvidia_network_operator_version: "26.4.0" nvidia_network_operator_image_tag: "network-operator-v{{ nvidia_network_operator_version }}" nvidia_network_operator_namespace: "network-operator" nvidia_network_operator_ipam_type: "nv-ipam" diff --git a/roles/prometheus/defaults/main.yml b/roles/prometheus/defaults/main.yml index 7349da1b3..fe354a7b8 100644 --- a/roles/prometheus/defaults/main.yml +++ b/roles/prometheus/defaults/main.yml @@ -1,7 +1,7 @@ prometheus_config_dir: /etc/prometheus prometheus_config_src: templates/prometheus.yml.j2 prometheus_alert_rules_src: templates/alert_rules.yml.j2 -prometheus_container: "prom/prometheus:v3.11.3" +prometheus_container: "prom/prometheus:v3.13.0" prometheus_svc_name: "docker.prometheus.service" prometheus_docker_volume_name: "deepops_prometheus_metrics" prometheus_state: started diff --git a/roles/slurm/defaults/main.yml b/roles/slurm/defaults/main.yml index 735ae16ee..ac0435c0e 100644 --- a/roles/slurm/defaults/main.yml +++ b/roles/slurm/defaults/main.yml @@ -7,7 +7,7 @@ hwloc_build_dir: /opt/deepops/build/hwloc pmix_build_dir: /opt/deepops/build/pmix slurm_workflow_build: yes -slurm_version: "25.11.6" +slurm_version: "26.05.1" slurm_src_url: "https://download.schedmd.com/slurm/slurm-{{ slurm_version }}.tar.bz2" slurm_build_make_clean: no slurm_build_dir_cleanup: no diff --git a/roles/spack/defaults/main.yml b/roles/spack/defaults/main.yml index 76e356c63..63192c63b 100644 --- a/roles/spack/defaults/main.yml +++ b/roles/spack/defaults/main.yml @@ -1,7 +1,7 @@ --- spack_repo: "https://github.com/spack/spack.git" spack_install_dir: "/sw/spack" -spack_version: "v1.1.1" +spack_version: "v1.2.0" spack_user: "root" spack_group: "root" diff --git a/scripts/k8s/deploy_monitoring.sh b/scripts/k8s/deploy_monitoring.sh index 05a5851a7..51bdd7569 100755 --- a/scripts/k8s/deploy_monitoring.sh +++ b/scripts/k8s/deploy_monitoring.sh @@ -24,7 +24,7 @@ if [ ! -d "${DEEPOPS_CONFIG_DIR}" ]; then fi HELM_CHARTS_REPO_PROMETHEUS="${HELM_CHARTS_REPO_PROMETHEUS:-https://prometheus-community.github.io/helm-charts}" -HELM_PROMETHEUS_CHART_VERSION="${HELM_PROMETHEUS_CHART_VERSION:-85.0.3}" +HELM_PROMETHEUS_CHART_VERSION="${HELM_PROMETHEUS_CHART_VERSION:-87.5.1}" ingress_name="ingress-nginx" PROMETHEUS_YAML_CONFIG="${PROMETHEUS_YAML_CONFIG:-${DEEPOPS_CONFIG_DIR}/helm/monitoring.yml}" From 3dfe40ec62844ae747d0a13767b946cc2af55983 Mon Sep 17 00:00:00 2001 From: Doug Holt Date: Thu, 2 Jul 2026 06:44:06 -0600 Subject: [PATCH 2/2] fix: derive DEEPOPS_VERSION for script debug output scripts/common.sh printed an always-empty DEEPOPS_VERSION unless a user happened to set it in config/env.sh. Default it to the checkout's git tag description (git describe --tags --always) with an unknown fallback, keep the env.sh override, and document the variable in config.example/env.sh. --- config.example/env.sh | 4 ++++ scripts/common.sh | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/config.example/env.sh b/config.example/env.sh index 288e585db..02ce5b85e 100644 --- a/config.example/env.sh +++ b/config.example/env.sh @@ -4,3 +4,7 @@ # See deepops/scripts/common.sh for implementation details DEEPOPS_EXAMPLE_VAR="" + +# DeepOps version reported in script debug output. +# Defaults to the checkout's git tag description; uncomment to override. +# DEEPOPS_VERSION="" diff --git a/scripts/common.sh b/scripts/common.sh index 024f0b690..a08174d38 100644 --- a/scripts/common.sh +++ b/scripts/common.sh @@ -19,5 +19,11 @@ else source ${DEEPOPS_CONFIG_DIR}/env.sh fi +# Determine the DeepOps version for debug output. +# Can be overridden in config/env.sh; defaults to the checkout's git tag description. +if [ -z "${DEEPOPS_VERSION}" ]; then + DEEPOPS_VERSION="$(git -C "${ROOT_DIR}" describe --tags --always 2>/dev/null || echo unknown)" +fi + # Print out base debug echo "Starting '${0}'; DeepOps version '${DEEPOPS_VERSION}'"