Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions config.example/env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,7 @@
# See deepops/scripts/common.sh for implementation details

DEEPOPS_EXAMPLE_VAR=""

# DeepOps version reported in script debug output.
# Defaults to the checkout's git tag description; uncomment to override.
# DEEPOPS_VERSION=""
2 changes: 1 addition & 1 deletion roles/alertmanager/defaults/main.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
alertmanager_config_dir: /etc/alertmanager
alertmanager_config_src: templates/alertmanager.yml.j2
alertmanager_container: "prom/alertmanager:v0.32.1"
alertmanager_container: "prom/alertmanager:v0.33.0"
alertmanager_svc_name: "docker.alertmanager.service"
alertmanager_docker_volume_name: "deepops_alertmanager_metrics"
alertmanager_state: started
Expand Down
2 changes: 1 addition & 1 deletion roles/grafana/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ grafana_config_dir: /etc/grafana
grafana_config_template: templates/grafana.ini.j2
grafana_data_dir: /var/lib/grafana
grafana_user_id: 472
grafana_container: "grafana/grafana:13.0.1"
grafana_container: "grafana/grafana:13.1.0"
grafana_svc_name: "docker.grafana.service"
grafana_state: started
grafana_enabled: yes
Expand Down
2 changes: 1 addition & 1 deletion roles/nvidia-gpu-operator/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ gpu_operator_nvaie_helm_repo: "https://helm.ngc.nvidia.com/nvaie"
gpu_operator_nvaie_chart_name: "nvaie/gpu-operator"

# NVAIE GPU Operator may require different version, check NGC enterprise collection.
gpu_operator_chart_version: "v26.3.1"
gpu_operator_chart_version: "v26.3.3"

k8s_gpu_mig_strategy: "mixed"

Expand Down
2 changes: 1 addition & 1 deletion roles/nvidia-k8s-gpu-device-plugin/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
k8s_gpu_plugin_helm_repo: "https://nvidia.github.io/k8s-device-plugin"
k8s_gpu_plugin_chart_name: "nvdp/nvidia-device-plugin"
k8s_gpu_plugin_release_name: "nvidia-device-plugin"
k8s_gpu_plugin_chart_version: "0.19.1"
k8s_gpu_plugin_chart_version: "0.19.3"
k8s_gpu_plugin_init_error: "false"
k8s_gpu_mig_strategy: "mixed"
2 changes: 1 addition & 1 deletion roles/nvidia-k8s-gpu-feature-discovery/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
k8s_gpu_feature_discovery_helm_repo: "https://nvidia.github.io/k8s-device-plugin"
k8s_gpu_feature_discovery_chart_name: "nvgfd/gpu-feature-discovery"
k8s_gpu_feature_discovery_release_name: "gpu-feature-discovery"
k8s_gpu_feature_discovery_chart_version: "0.19.1"
k8s_gpu_feature_discovery_chart_version: "0.19.3"
k8s_gpu_mig_strategy: "mixed"
4 changes: 2 additions & 2 deletions roles/nvidia-mig-manager/defaults/main.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
---
mig_manager_url_deb: https://github.com/NVIDIA/mig-parted/releases/download/v0.14.1/nvidia-mig-manager_0.14.1-1_amd64.deb
mig_manager_url_rpm: https://github.com/NVIDIA/mig-parted/releases/download/v0.14.1/nvidia-mig-manager-0.14.1-1.x86_64.rpm
mig_manager_url_deb: https://github.com/NVIDIA/mig-parted/releases/download/v0.14.2/nvidia-mig-manager_0.14.2-1_amd64.deb
mig_manager_url_rpm: https://github.com/NVIDIA/mig-parted/releases/download/v0.14.2/nvidia-mig-manager-0.14.2-1.x86_64.rpm
2 changes: 1 addition & 1 deletion roles/nvidia-network-operator/vars/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# if_name must match k8s network annotation name
#

nvidia_network_operator_version: "26.1.1"
nvidia_network_operator_version: "26.4.0"
nvidia_network_operator_image_tag: "network-operator-v{{ nvidia_network_operator_version }}"
nvidia_network_operator_namespace: "network-operator"
nvidia_network_operator_ipam_type: "nv-ipam"
Expand Down
2 changes: 1 addition & 1 deletion roles/prometheus/defaults/main.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
prometheus_config_dir: /etc/prometheus
prometheus_config_src: templates/prometheus.yml.j2
prometheus_alert_rules_src: templates/alert_rules.yml.j2
prometheus_container: "prom/prometheus:v3.11.3"
prometheus_container: "prom/prometheus:v3.13.0"
prometheus_svc_name: "docker.prometheus.service"
prometheus_docker_volume_name: "deepops_prometheus_metrics"
prometheus_state: started
Expand Down
2 changes: 1 addition & 1 deletion roles/slurm/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ hwloc_build_dir: /opt/deepops/build/hwloc
pmix_build_dir: /opt/deepops/build/pmix

slurm_workflow_build: yes
slurm_version: "25.11.6"
slurm_version: "26.05.1"
slurm_src_url: "https://download.schedmd.com/slurm/slurm-{{ slurm_version }}.tar.bz2"
slurm_build_make_clean: no
slurm_build_dir_cleanup: no
Expand Down
2 changes: 1 addition & 1 deletion roles/spack/defaults/main.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
spack_repo: "https://github.com/spack/spack.git"
spack_install_dir: "/sw/spack"
spack_version: "v1.1.1"
spack_version: "v1.2.0"
spack_user: "root"
spack_group: "root"

Expand Down
6 changes: 6 additions & 0 deletions scripts/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,11 @@ else
source ${DEEPOPS_CONFIG_DIR}/env.sh
fi

# Determine the DeepOps version for debug output.
# Can be overridden in config/env.sh; defaults to the checkout's git tag description.
if [ -z "${DEEPOPS_VERSION}" ]; then
DEEPOPS_VERSION="$(git -C "${ROOT_DIR}" describe --tags --always 2>/dev/null || echo unknown)"
fi

# Print out base debug
echo "Starting '${0}'; DeepOps version '${DEEPOPS_VERSION}'"
2 changes: 1 addition & 1 deletion scripts/k8s/deploy_monitoring.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ if [ ! -d "${DEEPOPS_CONFIG_DIR}" ]; then
fi

HELM_CHARTS_REPO_PROMETHEUS="${HELM_CHARTS_REPO_PROMETHEUS:-https://prometheus-community.github.io/helm-charts}"
HELM_PROMETHEUS_CHART_VERSION="${HELM_PROMETHEUS_CHART_VERSION:-85.0.3}"
HELM_PROMETHEUS_CHART_VERSION="${HELM_PROMETHEUS_CHART_VERSION:-87.5.1}"
ingress_name="ingress-nginx"

PROMETHEUS_YAML_CONFIG="${PROMETHEUS_YAML_CONFIG:-${DEEPOPS_CONFIG_DIR}/helm/monitoring.yml}"
Expand Down
Loading