Skip to content

Commit f8396e0

Browse files
authored
Merge pull request #2126 from stackhpc/rl-9.7-caracal
Caracal upgrade to Rocky Linux 9.7
2 parents 9dc7301 + 1e34959 commit f8396e0

File tree

21 files changed

+568
-210
lines changed

21 files changed

+568
-210
lines changed

.github/workflows/stackhpc-all-in-one.yml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,26 @@ jobs:
306306
env:
307307
KAYOBE_AUTOMATION_SSH_PRIVATE_KEY: ${{ steps.ssh_key.outputs.ssh_key }}
308308

309+
- name: Pull container images
310+
run: |
311+
# Try up to 3 times to pull container images.
312+
for attempt in $(seq 3); do
313+
docker run -t --rm \
314+
-v $(pwd):/stack/kayobe-automation-env/src/kayobe-config \
315+
-e KAYOBE_ENVIRONMENT -e KAYOBE_VAULT_PASSWORD -e KAYOBE_AUTOMATION_SSH_PRIVATE_KEY \
316+
${{ steps.kayobe_image.outputs.kayobe_image }} \
317+
/stack/kayobe-automation-env/src/kayobe-config/.automation/pipeline/overcloud-container-image-pull.sh
318+
if [ "$?" = "0" ]; then
319+
echo "Pulled container images on attempt $attempt"
320+
exit 0
321+
fi
322+
echo "Failed to pull container images on attempt $attempt"
323+
done
324+
echo "Failed to pull container imagers after $attempt attempts"
325+
exit 1
326+
env:
327+
KAYOBE_AUTOMATION_SSH_PRIVATE_KEY: ${{ steps.ssh_key.outputs.ssh_key }}
328+
309329
- name: Service deploy
310330
run: |
311331
docker run -t --rm \

.github/workflows/stackhpc-container-image-build.yml

Lines changed: 27 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,12 @@ on:
3838
type: boolean
3939
required: false
4040
default: true
41-
push-dirty:
41+
sbom:
42+
description: Generate SBOM?
43+
type: boolean
44+
required: false
45+
default: true
46+
push-critical:
4247
description: Push scanned images that have critical vulnerabilities?
4348
type: boolean
4449
required: false
@@ -82,14 +87,14 @@ jobs:
8287
id: openstack_release
8388
run: |
8489
BRANCH=$(awk -F'=' '/defaultbranch/ {print $2}' .gitreview)
85-
echo "openstack_release=${BRANCH}" | sed -E "s,(stable|unmaintained)/,," >> $GITHUB_OUTPUT
90+
echo "openstack_release=${BRANCH}" | sed -E "s,(stable|unmaintained)/,," | tee -a "$GITHUB_OUTPUT"
8691
8792
# Generate a tag to apply to all built container images.
8893
# Without this, each kayobe * container image build command would use a different tag.
8994
- name: Generate container datetime tag
9095
id: datetime_tag
9196
run: |
92-
echo "datetime_tag=$(date +%Y%m%dT%H%M%S)" >> $GITHUB_OUTPUT
97+
echo "datetime_tag=$(date +%Y%m%dT%H%M%S)" | tee -a "$GITHUB_OUTPUT"
9398
9499
# Dynamically define job matrix.
95100
# We need a separate matrix entry for each distribution, when the relevant input is true.
@@ -113,7 +118,7 @@ jobs:
113118
# remove trailing comma
114119
output="${output%,}"
115120
output+="]}"
116-
echo "matrix=$output" >> $GITHUB_OUTPUT
121+
echo "matrix=$output" | tee -a "$GITHUB_OUTPUT"
117122
118123
- name: Display container datetime tag
119124
run: |
@@ -187,7 +192,7 @@ jobs:
187192
188193
- name: Get Kolla tag
189194
id: write-kolla-tag
190-
run: echo "kolla-tag=${{ needs.generate-tag.outputs.openstack_release }}-${{ matrix.distro.name }}-${{ matrix.distro.release }}-${{ needs.generate-tag.outputs.datetime_tag }}" >> $GITHUB_OUTPUT
195+
run: echo "kolla-tag=${{ needs.generate-tag.outputs.openstack_release }}-${{ matrix.distro.name }}-${{ matrix.distro.release }}-${{ needs.generate-tag.outputs.datetime_tag }}" | tee -a "$GITHUB_OUTPUT"
191196

192197
- name: Configure localhost as a seed
193198
run: |
@@ -198,17 +203,6 @@ jobs:
198203
localhost ansible_connection=local ansible_python_interpreter=/usr/bin/python3
199204
EOF
200205
201-
# See etc/kayobe/ansible/roles/pulp_auth_proxy/README.md for details.
202-
# NOTE: We override pulp_auth_proxy_conf_path to a path shared by the
203-
# runner and dind containers.
204-
- name: Deploy an authenticating package repository mirror proxy
205-
run: |
206-
source venvs/kayobe/bin/activate &&
207-
source src/kayobe-config/kayobe-env --environment ci-builder &&
208-
kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/pulp-auth-proxy.yml -e pulp_auth_proxy_conf_path=/home/runner/_work/pulp_proxy
209-
env:
210-
KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD_CI_BUILDER }}
211-
212206
- name: Create build logs output directory
213207
run: mkdir image-build-logs
214208

@@ -230,6 +224,9 @@ jobs:
230224
args="$args -e stackhpc_repo_mirror_auth_proxy_enabled=true"
231225
args="$args -e kolla_build_log_path=$GITHUB_WORKSPACE/image-build-logs/kolla-build-overcloud.log"
232226
args="$args -e base_path=$GITHUB_WORKSPACE/opt/kayobe"
227+
# NOTE: We override pulp_auth_proxy_conf_path to a path shared by the
228+
# runner and dind containers.
229+
args="$args -e pulp_auth_proxy_conf_path=/home/runner/_work/pulp_proxy"
233230
source venvs/kayobe/bin/activate &&
234231
source src/kayobe-config/kayobe-env --environment ci-builder &&
235232
kayobe overcloud container image build $args
@@ -261,20 +258,21 @@ jobs:
261258
if: inputs.seed && matrix.distro.arch == 'amd64'
262259

263260
- name: Get built container images
264-
run: docker image ls --filter "reference=ark.stackhpc.com/stackhpc-dev/*:${{ steps.write-kolla-tag.outputs.kolla-tag }}*" > ${{ matrix.distro.name }}-${{ matrix.distro.release }}-container-images
261+
run: docker image ls --filter "reference=ark.stackhpc.com/stackhpc-dev/*:${{ steps.write-kolla-tag.outputs.kolla-tag }}*" | tee "${{ matrix.distro.name }}-${{ matrix.distro.release }}-container-images"
265262

266263
- name: Fail if no images have been built
267264
run: if [ $(wc -l < ${{ matrix.distro.name }}-${{ matrix.distro.release }}-container-images) -le 1 ]; then exit 1; fi
268265

269266
- name: Scan built container images
270-
run: src/kayobe-config/tools/scan-images.sh ${{ matrix.distro.name }}-${{ matrix.distro.release }} ${{ steps.write-kolla-tag.outputs.kolla-tag }}
267+
run: src/kayobe-config/tools/scan-images.sh ${{ matrix.distro.name }}-${{ matrix.distro.release }} ${{ steps.write-kolla-tag.outputs.kolla-tag }} ${{ inputs.sbom && '--sbom' }}
271268

272269
- name: Move image scan logs to output artifact
273270
run: mv image-scan-output image-build-logs/image-scan-output
271+
if: ${{ !cancelled() }}
274272

275-
- name: Fail if no images have passed scanning
276-
run: if [ $(wc -l < image-build-logs/image-scan-output/critical-images.txt) -gt 0 ]; then exit 1; fi
277-
if: ${{ !inputs.push-dirty }}
273+
- name: Fail if any images have critical vulnerabilities
274+
run: if [ -e image-build-logs/image-scan-output/critical-images.txt ] && [ $(wc -l < image-build-logs/image-scan-output/critical-images.txt) -gt 0 ]; then exit 1; fi
275+
if: ${{ !inputs.push-critical }}
278276

279277
- name: Copy clean images to push-attempt-images list
280278
run: cp image-build-logs/image-scan-output/clean-images.txt image-build-logs/push-attempt-images.txt
@@ -284,13 +282,13 @@ jobs:
284282
# This should be reverted when it's decided to filter high level CVEs as well.
285283
- name: Append dirty images to push list
286284
run: |
287-
cat image-build-logs/image-scan-output/dirty-images.txt >> image-build-logs/push-attempt-images.txt
285+
cat image-build-logs/image-scan-output/high-images.txt >> image-build-logs/push-attempt-images.txt
288286
if: ${{ inputs.push }}
289287

290288
- name: Append images with critical vulnerabilities to push list
291289
run: |
292290
cat image-build-logs/image-scan-output/critical-images.txt >> image-build-logs/push-attempt-images.txt
293-
if: ${{ inputs.push && inputs.push-dirty }}
291+
if: ${{ inputs.push && inputs.push-critical }}
294292

295293
- name: Push images
296294
run: |
@@ -332,19 +330,19 @@ jobs:
332330
if: ${{ steps.build_overcloud_images.outcome == 'failure' || steps.build_seed_images.outcome == 'failure' }}
333331

334332
- name: Fail when images failed to push
335-
run: if [ $(wc -l < image-build-logs/push-failed-images.txt) -gt 0 ]; then cat image-build-logs/push-failed-images.txt && exit 1; fi
336-
if: ${{ !cancelled() }}
333+
run: if [ -e image-build-logs/push-failed-images.txt ] && [ $(wc -l < image-build-logs/push-failed-images.txt) -gt 0 ]; then cat image-build-logs/push-failed-images.txt && exit 1; fi
334+
if: ${{ inputs.push && !cancelled() }}
337335

338336
# NOTE(seunghun1ee): Currently we want to mark the job fail only when critical CVEs are detected.
339337
# This can be used again instead of "Fail when critical vulnerabilities are found" when it's
340338
# decided to fail the job on detecting high CVEs as well.
341339
# - name: Fail when images failed scanning
342-
# run: if [ $(wc -l < image-build-logs/image-scan-output/dirty-images.txt) -gt 0 ]; then cat image-build-logs/image-scan-output/dirty-images.txt && exit 1; fi
343-
# if: ${{ !inputs.push-dirty && !cancelled() }}
340+
# run: if [ $(wc -l < image-build-logs/image-scan-output/high-images.txt) -gt 0 ]; then cat image-build-logs/image-scan-output/high-images.txt && exit 1; fi
341+
# if: ${{ !inputs.push-critical && !cancelled() }}
344342

345343
- name: Fail when critical vulnerabilities are found
346-
run: if [ $(wc -l < image-build-logs/image-scan-output/critical-images.txt) -gt 0 ]; then cat image-build-logs/image-scan-output/critical-images.txt && exit 1; fi
347-
if: ${{ !inputs.push-dirty && !cancelled() }}
344+
run: if [ -e image-build-logs/image-scan-output/critical-images.txt ] && [ $(wc -l < image-build-logs/image-scan-output/critical-images.txt) -gt 0 ]; then cat image-build-logs/image-scan-output/critical-images.txt && exit 1; fi
345+
if: ${{ !inputs.push-critical && !cancelled() }}
348346

349347
- name: Remove locally built images for this run
350348
if: always() && runner.arch == 'ARM64'

etc/kayobe/ansible/install-doca.yml

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,26 +3,50 @@
33
become: true
44
hosts: mlnx
55
gather_facts: true
6+
vars:
7+
# we don't build kernel modules for each version, eg 5.14.0-611.13.1 has been built,
8+
# but not 5.14.0-611.20.1
9+
doca_kernel_version: "{{ stackhpc_doca_kernel_version_matrix[stackhpc_pulp_repo_rocky_9_minor_version] }}"
610
tasks:
7-
- name: Get running kernel
8-
ansible.builtin.command:
9-
cmd: "uname -r"
10-
register: kernel
11-
1211
- name: Install kernel repo
1312
ansible.builtin.dnf:
1413
name: doca-kernel-repo
1514
state: latest
1615
update_cache: true
1716

17+
# not the same as doca_kernel_version: some dots changed to underscore or dash
18+
- name: Discover kernel repo filename
19+
ansible.builtin.shell: |
20+
set -o pipefail
21+
rpm -ql doca-kernel-repo | grep /etc/yum.repos.d/
22+
register: kernel_repo_filename
23+
changed_when: false
24+
1825
- name: Ensure correct priority for DOCA modules
1926
ansible.builtin.lineinfile:
2027
line: "priority=-2"
2128
insertafter: EOF
22-
path: "/etc/yum.repos.d/doca-kernel-{{ kernel.stdout }}.repo"
29+
path: "{{ kernel_repo_filename.stdout }}"
30+
31+
# This is required by mlnx-ofa_kernel, and comes from the doca repository.
32+
# It is already present when doca-ofed is installed, but will be upgraded.
33+
- name: Ensure mlnx-tools is installed
34+
ansible.builtin.dnf:
35+
name: mlnx-tools
36+
state: latest
37+
update_cache: true
38+
become: true
39+
40+
- name: Ensure appropriate doca-kernel is installed
41+
ansible.builtin.dnf:
42+
name: "doca-kernel-{{ doca_kernel_version }}"
43+
state: latest
44+
disablerepo: doca
45+
become: true
2346

24-
- name: Install DOCA OFED
47+
- name: Ensure DOCA OFED userspace is installed
2548
ansible.builtin.dnf:
26-
name: doca-ofed
49+
name:
50+
- doca-ofed-userspace
2751
state: latest
2852
update_cache: true
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
---
2+
# This playbook performs the system upgrade from RockyLinux 9.6 to 9.7 on hosts
3+
# using doca ofed kernel modules and utilities.
4+
# It prevents the install of the dkms toolchain and dkms modules that would be
5+
# used instead or our precompiled modules if a direct `dnf upgrade` was run.
6+
#
7+
# It must be run after new package snapshots have been merged and
8+
# `pulp-repo-sync.yml` and `pulp-repo-publish.yml` have been run.
9+
# Also `kayobe overcloud host configure -t dnf` must have been run for the new
10+
# `doca.repo` to be present (the doca version is in the url) on the mlnx hosts.
11+
12+
- name: Prepare upgrade from Rocky Linux 9.6 to 9.7
13+
hosts: mlnx
14+
serial: "{{ lookup('env', 'ANSIBLE_SERIAL') | default(1, true) }}"
15+
gather_facts: true
16+
tags: pre
17+
vars:
18+
# we don't build kernel modules for each version, eg 5.14.0-611.13.1 has been built,
19+
# but not 5.14.0-611.20.1.
20+
doca_kernel_version: "{{ stackhpc_doca_kernel_version_matrix[stackhpc_pulp_repo_rocky_9_minor_version] }}"
21+
tasks:
22+
- name: Assert that hosts are running Rocky Linux 9.6
23+
ansible.builtin.assert:
24+
that:
25+
- ansible_facts.distribution == 'Rocky'
26+
- ansible_facts.distribution_version == '9.6'
27+
- os_distribution == 'rocky'
28+
fail_msg: >-
29+
This playbook is only designed for Rocky Linux 9.6 hosts. Ensure
30+
that you are limiting it to only run on Rocky Linux 9.6 hosts and
31+
os_distribution is set to rocky.
32+
33+
- name: Ensure doca kernel repo is up to date
34+
ansible.builtin.dnf:
35+
name: doca-kernel-repo
36+
state: latest
37+
update_cache: true
38+
become: true
39+
40+
# This is required by mlnx-ofa_kernel, and comes from the doca repository.
41+
# It is already present when doca-ofed is installed, but will be upgraded.
42+
- name: Ensure mlnx-tools is installed
43+
ansible.builtin.dnf:
44+
name: mlnx-tools
45+
state: latest
46+
update_cache: true
47+
become: true
48+
49+
- name: Ensure appropriate doca-kernel is installed
50+
ansible.builtin.dnf:
51+
name: "doca-kernel-{{ doca_kernel_version }}"
52+
state: latest
53+
disablerepo: doca
54+
become: true
55+
56+
# doca-ofed 3.2 starts to depend on the dkms modules. It was not the case
57+
# in doca-ofed 2.9.3.
58+
- name: Ensure doca-ofed is not present (upgrading it brings dkms)
59+
ansible.builtin.dnf:
60+
name: doca-ofed
61+
state: absent
62+
autoremove: false
63+
become: true
64+
65+
- name: Ensure latest doca-ofed-userspace instead of doca-ofed
66+
ansible.builtin.dnf:
67+
name: doca-ofed-userspace
68+
state: latest
69+
become: true
70+
71+
- name: Upgrade all
72+
ansible.builtin.dnf:
73+
name: "*"
74+
state: latest
75+
become: true
76+
77+
- name: Fix potential grub config preventing new kernel from being used
78+
ansible.builtin.import_playbook: reset-bls-entries.yml
79+
vars:
80+
reset_bls_hosts: mlnx
81+
82+
- name: Reboot to apply updates
83+
ansible.builtin.import_playbook: reboot.yml
84+
vars:
85+
reboot_hosts: mlnx
86+
87+
- name: Confirm the host is upgraded to Rocky Linux 9.7
88+
hosts: mlnx
89+
tags: post
90+
tasks:
91+
- name: Update distribution facts
92+
ansible.builtin.setup:
93+
filter: "{{ kayobe_ansible_setup_filter }}"
94+
gather_subset: "{{ kayobe_ansible_setup_gather_subset }}"
95+
96+
# Can fail (eg in multinode) when there are bad entries in grub config
97+
# Fixed by `kayobe playbook run ansible/maintenance/reset-bls-entries.yml`
98+
# and manual reboot
99+
- name: Assert that hosts are now using Rocky Linux 9.7
100+
ansible.builtin.assert:
101+
that:
102+
- ansible_facts.distribution == 'Rocky'
103+
- ansible_facts.distribution_version == '9.7'
104+
- os_distribution == 'rocky'

etc/kayobe/ansible/stackhpc-cloud-tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@
142142
# Inclusive min
143143
sct_docker_version_min: "24.0.0"
144144
# Exclusive max
145-
sct_docker_version_max: "28.1.0"
145+
sct_docker_version_max: "30.0.0"
146146
sct_selinux_state: "{{ selinux_state }}"
147147
failed_when: host_results.rc not in [0, 1]
148148
register: host_results

etc/kayobe/docker.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,15 @@ docker_registry_insecure: "{{ 'https' not in stackhpc_repo_mirror_url }}"
3838
# Enable live-restore on docker daemon
3939
docker_daemon_live_restore: true
4040

41+
# Avoid docker 29 for the moment in Caracal
42+
docker_packages_version: "{{ '-28.*' if os_distribution == 'rocky' else ('=5:28*' if os_release == 'noble' else '=5:27*') }}"
43+
# variable from https://github.com/stackhpc/ansible-role-docker/blob/master/defaults/main.yml
44+
docker_packages:
45+
- "docker-ce{{ docker_packages_version }}"
46+
- "docker-ce-cli{{ docker_packages_version }}"
47+
- "docker-ce-rootless-extras{{ docker_packages_version }}"
48+
- "containerd.io"
49+
4150
###############################################################################
4251
# Dummy variable to allow Ansible to accept this file.
4352
workaround_ansible_issue_8743: yes
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../../../ansible/pulp-auth-proxy.yml
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../../../ansible/pulp-auth-proxy.yml
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../../../ansible/pulp-auth-proxy.yml

etc/kayobe/environments/ci-builder/stackhpc-ci.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ kolla_build_neutron_ovs: true
3838
# Host and port of a package repository mirror.
3939
# Build against the development Pulp service repositories.
4040
# Use Ark's package repositories to install packages.
41+
stackhpc_repo_mirror_auth_proxy_enabled: true
4142
stackhpc_repo_mirror_url: "{{ stackhpc_repo_mirror_auth_proxy_url if stackhpc_repo_mirror_auth_proxy_enabled | bool else stackhpc_release_pulp_url }}"
4243
stackhpc_repo_mirror_username: "skc-ci-builder-reader"
4344
stackhpc_repo_mirror_password: !vault |

0 commit comments

Comments
 (0)