Skip to content

Commit 952d852

Browse files
authored
Merge branch 'stackhpc/2025.1' into os-capacity-binding
2 parents fc829f6 + 9e3ba0c commit 952d852

17 files changed

+226
-18
lines changed

.github/workflows/overcloud-host-image-build.yml

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ on:
77
description: Build Rocky Linux 9
88
type: boolean
99
default: true
10+
rocky10:
11+
description: Build Rocky Linux 10
12+
type: boolean
13+
default: true
1014
ubuntu-noble:
1115
description: Build Ubuntu 24.04 Noble
1216
type: boolean
@@ -53,7 +57,7 @@ jobs:
5357
steps:
5458
- name: Validate inputs
5559
run: |
56-
if [[ ${{ inputs.rocky9 }} == 'false' && ${{ inputs.ubuntu-noble }} == 'false' ]]; then
60+
if [[ ${{ inputs.rocky9 }} == 'false' && ${{ inputs.rocky10 }} == 'false' && ${{ inputs.ubuntu-noble }} == 'false' ]]; then
5761
echo "At least one distribution must be selected"
5862
exit 1
5963
fi
@@ -292,6 +296,74 @@ jobs:
292296
KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD_CI_BUILDER }}
293297
if: inputs.rocky9 && steps.build_rocky_9.outcome == 'success'
294298

299+
- name: Build a Rocky Linux 10 overcloud host image
300+
id: build_rocky_10
301+
continue-on-error: true
302+
run: |
303+
source venvs/kayobe/bin/activate &&
304+
source src/kayobe-config/kayobe-env --environment ci-builder &&
305+
kayobe overcloud host image build --force-rebuild \
306+
-e os_distribution="rocky" \
307+
-e os_release="10" \
308+
-e stackhpc_overcloud_dib_name=overcloud-rocky-10
309+
env:
310+
KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD_CI_BUILDER }}
311+
if: inputs.rocky10
312+
313+
- name: Show last error logs
314+
continue-on-error: true
315+
run: |
316+
source venvs/kayobe/bin/activate &&
317+
source src/kayobe-config/kayobe-env --environment ci-builder &&
318+
kayobe seed host command run --command "tail -200 /opt/kayobe/images/overcloud-rocky-10/overcloud-rocky-10.stdout" --show-output
319+
env:
320+
KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD_CI_BUILDER }}
321+
if: steps.build_rocky_10.outcome == 'failure'
322+
323+
- name: Upload Rocky Linux 10 overcloud host image to current Dev Cloud (SMS/Leafcloud)
324+
run: |
325+
source venvs/kayobe/bin/activate &&
326+
source src/kayobe-config/kayobe-env --environment ci-builder &&
327+
kayobe playbook run \
328+
src/kayobe-config/etc/kayobe/ansible/tools/openstack-host-image-upload.yml \
329+
-e local_image_path="/opt/kayobe/images/overcloud-rocky-10/overcloud-rocky-10.qcow2" \
330+
-e image_name=overcloud-rocky-10-${{ steps.host_image_tag.outputs.host_image_tag }}
331+
env:
332+
CLOUDS_YAML: ${{ secrets.CLOUDS_YAML }}
333+
OS_APPLICATION_CREDENTIAL_ID: ${{ secrets.OS_APPLICATION_CREDENTIAL_ID }}
334+
OS_APPLICATION_CREDENTIAL_SECRET: ${{ secrets.OS_APPLICATION_CREDENTIAL_SECRET }}
335+
if: inputs.rocky10 && steps.build_rocky_10.outcome == 'success'
336+
337+
- name: Upload Rocky Linux 10 overcloud host image to other Dev Cloud (Leafcloud/SMS)
338+
run: |
339+
source venvs/kayobe/bin/activate &&
340+
source src/kayobe-config/kayobe-env --environment ci-builder &&
341+
kayobe playbook run \
342+
src/kayobe-config/etc/kayobe/ansible/tools/openstack-host-image-upload.yml \
343+
-e local_image_path="/opt/kayobe/images/overcloud-rocky-10/overcloud-rocky-10.qcow2" \
344+
-e image_name=overcloud-rocky-10-${{ steps.host_image_tag.outputs.host_image_tag }}
345+
env:
346+
CLOUDS_YAML: ${{ secrets.CLOUDS_YAML_OTHER_CLOUD }}
347+
OS_APPLICATION_CREDENTIAL_ID: ${{ secrets.OS_APPLICATION_CREDENTIAL_ID_OTHER_CLOUD }}
348+
OS_APPLICATION_CREDENTIAL_SECRET: ${{ secrets.OS_APPLICATION_CREDENTIAL_SECRET_OTHER_CLOUD }}
349+
if: inputs.rocky10 && steps.build_rocky_10.outcome == 'success'
350+
351+
- name: Upload Rocky Linux 10 overcloud host image to Ark
352+
run: |
353+
source venvs/kayobe/bin/activate &&
354+
source src/kayobe-config/kayobe-env --environment ci-builder &&
355+
kayobe playbook run \
356+
src/kayobe-config/etc/kayobe/ansible/pulp/pulp-artifact-upload.yml \
357+
-e artifact_path=/opt/kayobe/images/overcloud-rocky-10 \
358+
-e artifact_tag=${{ steps.host_image_tag.outputs.host_image_tag }} \
359+
-e artifact_type="kayobe-images" \
360+
-e file_regex="*.qcow2" \
361+
-e os_distribution="rocky" \
362+
-e os_release="10"
363+
env:
364+
KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD_CI_BUILDER }}
365+
if: inputs.rocky10 && steps.build_rocky_10.outcome == 'success'
366+
295367
- name: Build an Ubuntu Noble 24.04 overcloud host image
296368
id: build_ubuntu_noble
297369
continue-on-error: true
@@ -373,6 +445,7 @@ jobs:
373445
echo "Builds failed. See workflow artifacts for details." &&
374446
exit 1
375447
if: steps.build_rocky_9.outcome == 'failure' ||
448+
steps.build_rocky_10.outcome == 'failure' ||
376449
steps.build_ubuntu_noble.outcome == 'failure'
377450

378451
- name: Upload logs artifact
@@ -398,6 +471,7 @@ jobs:
398471
--repo stackhpc/stackhpc-kayobe-config \
399472
--ref $BRANCH_NAME \
400473
$(if [[ "${{ inputs.rocky9 }}" == "true" ]]; then echo "-f rocky9_tag=${{ steps.host_image_tag.outputs.host_image_tag }}"; fi) \
474+
$(if [[ "${{ inputs.rocky10 }}" == "true" ]]; then echo "-f rocky10_tag=${{ steps.host_image_tag.outputs.host_image_tag }}"; fi) \
401475
$(if [[ "${{ inputs.ubuntu-noble }}" == "true" ]]; then echo "-f ubuntu_noble_tag=${{ steps.host_image_tag.outputs.host_image_tag }}"; fi)
402476
env:
403477
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

.github/workflows/overcloud-host-image-promote.yml

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ on:
77
description: Promote Rocky Linux 9
88
type: boolean
99
default: true
10+
rocky10:
11+
description: Promote Rocky Linux 10
12+
type: boolean
13+
default: true
1014
ubuntu-noble:
1115
description: Promote Ubuntu 24.04 Noble
1216
type: boolean
@@ -21,7 +25,7 @@ jobs:
2125
steps:
2226
- name: Validate inputs
2327
run: |
24-
if [[ ${{ inputs.rocky9 }} == 'false' && ${{ inputs.ubuntu-noble }} == 'false' ]]; then
28+
if [[ ${{ inputs.rocky9 }} == 'false' && ${{ inputs.rocky10 }} == 'false' && ${{ inputs.ubuntu-noble }} == 'false' ]]; then
2529
echo "At least one distribution must be selected"
2630
exit 1
2731
fi
@@ -74,6 +78,13 @@ jobs:
7478
working-directory: src/kayobe-config
7579
if: inputs.rocky9
7680

81+
- name: Gather Rocky Linux 10 overcloud host image tag
82+
id: rocky10_image_tag
83+
run: |
84+
echo image_tag=$(grep stackhpc_rocky_10_overcloud_host_image_version: etc/kayobe/pulp-host-image-versions.yml | awk '{print $2}') >> $GITHUB_OUTPUT
85+
working-directory: src/kayobe-config
86+
if: inputs.rocky10
87+
7788
- name: Gather Ubuntu Noble overcloud host image tag
7889
id: ubuntu_noble_image_tag
7990
run: |
@@ -95,6 +106,20 @@ jobs:
95106
KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD_CI_BUILDER }}
96107
if: inputs.rocky9
97108

109+
- name: Promote Rocky Linux 10 overcloud host image artifact
110+
run: |
111+
source venvs/kayobe/bin/activate &&
112+
source src/kayobe-config/kayobe-env --environment ci-builder &&
113+
kayobe playbook run \
114+
src/kayobe-config/etc/kayobe/ansible/pulp/pulp-artifact-promote.yml \
115+
-e artifact_type="kayobe-images" \
116+
-e os_distribution='rocky' \
117+
-e os_release='10' \
118+
-e promotion_tag=${{ steps.rocky10_image_tag.outputs.image_tag }}
119+
env:
120+
KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD_CI_BUILDER }}
121+
if: inputs.rocky10
122+
98123
- name: Promote Ubuntu Noble 24.04 overcloud host image artifact
99124
run: |
100125
source venvs/kayobe/bin/activate &&

.github/workflows/overcloud-host-image-upload.yml

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ on:
77
description: Upload Rocky Linux 9
88
type: boolean
99
default: true
10+
rocky10:
11+
description: Upload Rocky Linux 10
12+
type: boolean
13+
default: true
1014
ubuntu-noble:
1115
description: Upload Ubuntu 24.04 Noble
1216
type: boolean
@@ -50,7 +54,7 @@ jobs:
5054
steps:
5155
- name: Validate inputs
5256
run: |
53-
if [[ ${{ inputs.rocky9 }} == 'false' && ${{ inputs.ubuntu-noble }} == 'false' ]]; then
57+
if [[ ${{ inputs.rocky9 }} == 'false' && ${{ inputs.rocky10 }} == 'false' && ${{ inputs.ubuntu-noble }} == 'false' ]]; then
5458
echo "At least one distribution must be selected"
5559
exit 1
5660
fi
@@ -141,6 +145,51 @@ jobs:
141145
OS_APPLICATION_CREDENTIAL_SECRET: ${{ secrets.OS_APPLICATION_CREDENTIAL_SECRET }}
142146
if: inputs.rocky9 && steps.rocky_9_image_exists.outcome == 'failure'
143147

148+
- name: Output Rocky Linux 10 image tag
149+
id: rocky_10_image_tag
150+
run: |
151+
echo image_tag=$(grep stackhpc_rocky_10_overcloud_host_image_version: src/kayobe-config/etc/kayobe/pulp-host-image-versions.yml | awk '{print $2}') >> $GITHUB_OUTPUT
152+
153+
- name: Check if image exists already
154+
id: rocky_10_image_exists
155+
run: |
156+
source venvs/kayobe/bin/activate &&
157+
openstack image show \
158+
overcloud-rocky-10-${{ steps.rocky_10_image_tag.outputs.image_tag }}
159+
env:
160+
OS_CLOUD: openstack
161+
OS_APPLICATION_CREDENTIAL_ID: ${{ secrets.OS_APPLICATION_CREDENTIAL_ID }}
162+
OS_APPLICATION_CREDENTIAL_SECRET: ${{ secrets.OS_APPLICATION_CREDENTIAL_SECRET }}
163+
continue-on-error: true
164+
165+
- name: Download Rocky Linux 10 overcloud host image from Ark
166+
run: |
167+
source venvs/kayobe/bin/activate &&
168+
source src/kayobe-config/kayobe-env --environment ${{ inputs.kayobe-environment }} &&
169+
kayobe playbook run \
170+
src/kayobe-config/etc/kayobe/ansible/pulp/pulp-host-image-download.yml \
171+
-e os_distribution="rocky" \
172+
-e os_release="10"
173+
env:
174+
KAYOBE_VAULT_PASSWORD: ${{ secrets.KAYOBE_VAULT_PASSWORD_CI_BUILDER }}
175+
if: inputs.rocky10 && steps.rocky_10_image_exists.outcome == 'failure'
176+
177+
- name: Upload Rocky Linux 10 overcloud host image to Cloud
178+
run: |
179+
source venvs/kayobe/bin/activate &&
180+
openstack image create \
181+
overcloud-rocky-10-${{ steps.rocky_10_image_tag.outputs.image_tag }} \
182+
--container-format bare \
183+
--disk-format qcow2 \
184+
--file /tmp/rocky-10.qcow2 \
185+
--private \
186+
--progress
187+
env:
188+
OS_CLOUD: openstack
189+
OS_APPLICATION_CREDENTIAL_ID: ${{ secrets.OS_APPLICATION_CREDENTIAL_ID }}
190+
OS_APPLICATION_CREDENTIAL_SECRET: ${{ secrets.OS_APPLICATION_CREDENTIAL_SECRET }}
191+
if: inputs.rocky10 && steps.rocky_10_image_exists.outcome == 'failure'
192+
144193
- name: Output Ubuntu Noble image tag
145194
id: ubuntu_noble_image_tag
146195
run: |

.github/workflows/stackhpc-all-in-one.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,12 +77,12 @@ jobs:
7777
runner-selection:
7878
uses: ./.github/workflows/runner-selector.yml
7979
with:
80-
runner_env: ${{ inputs.upgrade == true && 'Leafcloud' || inputs.runner_env }}
80+
runner_env: ${{ inputs.runner_env }}
8181
# NOTE: Runner needs unzip and nodejs packages.
8282
all-in-one:
8383
name: All in one
8484
if: ${{ inputs.if && !cancelled() }}
85-
environment: ${{ inputs.upgrade == true && 'Leafcloud' || inputs.runner_env }}
85+
environment: ${{ inputs.runner_env }}
8686
runs-on: ${{ needs.runner-selection.outputs.runner_name_aio }}
8787
needs:
8888
- runner-selection

.github/workflows/update-overcloud-host-image-tags.yml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ on:
77
rocky9_tag:
88
description: Overcloud host image tag for Rocky 9
99
type: string
10+
rocky10_tag:
11+
description: Overcloud host image tag for Rocky 10
12+
type: string
1013
ubuntu_noble_tag:
1114
description: Overcloud host image tag for Ubuntu
1215
type: string
@@ -31,6 +34,11 @@ jobs:
3134
sed -i "/stackhpc_rocky_9_overcloud_host_image_version/s/.*/stackhpc_rocky_9_overcloud_host_image_version: ${{ inputs.rocky9_tag }}/" ${{ github.workspace }}/src/kayobe-config/etc/kayobe/pulp-host-image-versions.yml
3235
if: "${{ inputs.rocky9_tag != '' }}"
3336

37+
- name: Update Rocky 10 overcloud host image tag
38+
run: |
39+
sed -i "/stackhpc_rocky_10_overcloud_host_image_version/s/.*/stackhpc_rocky_10_overcloud_host_image_version: ${{ inputs.rocky10_tag }}/" ${{ github.workspace }}/src/kayobe-config/etc/kayobe/pulp-host-image-versions.yml
40+
if: "${{ inputs.rocky10_tag != '' }}"
41+
3442
- name: Update Ubuntu Noble overcloud host image tag
3543
run: |
3644
sed -i "/stackhpc_ubuntu_noble_overcloud_host_image_version/s/.*/stackhpc_ubuntu_noble_overcloud_host_image_version: ${{ inputs.ubuntu_noble_tag }}/" ${{ github.workspace }}/src/kayobe-config/etc/kayobe/pulp-host-image-versions.yml
@@ -43,14 +51,15 @@ jobs:
4351
commit-message: >-
4452
Bump overcloud host image tags
4553
author: stackhpc-ci <22933334+stackhpc-ci@users.noreply.github.com>
46-
branch: bump-overcloud-host-images-${{ inputs.rocky9_tag }}-${{ inputs.ubuntu_noble_tag }}
54+
branch: bump-overcloud-host-images-${{ inputs.rocky9_tag }}-${{ inputs.rocky10_tag }}-${{ inputs.ubuntu_noble_tag }}
4755
delete-branch: true
4856
title: >-
4957
Bump overcloud host image tags
5058
body: |
5159
This PR was created automatically to update the overcloud host image
5260
tags.
5361
Rocky 9: ${{ inputs.rocky9_tag }}
62+
Rocky 10: ${{ inputs.rocky10_tag }}
5463
Ubuntu Noble: ${{ inputs.ubuntu_noble_tag }}
5564
labels: |
5665
automated

doc/source/configuration/monitoring.rst

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,12 @@ present, the workaround is to go into each node running Grafana and manually
6969
restart the process with ``systemctl restart kolla-grafana-container.service``
7070
and then try the reconfigure command again.)
7171

72+
.. note::
73+
If the environment defines additional Prometheus Node Exporter startup parameters
74+
via ``prometheus_node_exporter_cmdline_extras``, the parameters should be updated
75+
to include the textfile collector used by SMART monitoring:
76+
``--collector.textfile.directory=/var/lib/node_exporter/textfile_collector``
77+
7278
Once the reconfigure has completed you can now run the custom playbook which
7379
copies over the scripts and sets up the cron jobs to start SMART monitoring
7480
on the overcloud hosts:
@@ -81,6 +87,27 @@ on the overcloud hosts:
8187
SMART reporting should now be enabled along with a Prometheus alert for
8288
unhealthy disks and a Grafana dashboard called ``Hardware Overview``.
8389

90+
Monitoring Drive Writes Per Day
91+
-------------------------------
92+
93+
Drives can be monitored for the level of write intensity of the
94+
workload, and alerts defined for drives that are persistently
95+
exceeding their stated level of write endurance. To enable this
96+
feature, set the flag ``create_dwpd_ratings``:
97+
98+
.. code-block:: console
99+
100+
(kayobe) [stack@node ~]$ cd etc/kayobe
101+
(kayobe) [stack@node kayobe]$ kayobe playbook run ansible/deployment/smartmon-tools.yml -e create_dwpd_ratings=true
102+
103+
This flag scans for NVME/SSD devices in the system and creates a new
104+
file, ``dwpd-ratings.yml``, in the directory of the current environment.
105+
106+
.. note::
107+
The playbook assigns placeholder values for write endurance for each
108+
drive model. These values should be updated with specifications from
109+
vendor datasheets.
110+
84111
Alertmanager, Slack and Microsoft Teams
85112
=======================================
86113

doc/source/configuration/openbao.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -236,8 +236,8 @@ Certificates generation
236236
Create the external TLS certificates (testing only)
237237
---------------------------------------------------
238238

239-
This method should only be used for testing. For external TLS on production systems,
240-
See `Installing External TLS Certificates <installing-external-tls-certificates>`__.
239+
This method should only be used for testing. For external TLS on production
240+
systems, see :ref:`installing-external-tls-certificates`.
241241

242242
Typically external API TLS certificates should be generated by a organisation's trusted internal or third-party CA.
243243
For test and development purposes it is possible to use OpenBao as a CA for the external API.

doc/source/contributor/ofed.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ to be reset before rebooting.
8686
kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/maintenance/reset-bls-entries.yml -e reset_bls_host=mlnx
8787
8888
The hosts can now be rebooted to use the latest kernel, a rolling reboot may be applicable
89-
here to reduce distruptions. See the `package updates documentation <package-updates>`.
89+
here to reduce disruptions. See the :doc:`package updates documentation <package-updates>`.
9090

9191
.. code-block:: console
9292

doc/source/operations/openstack-reconfiguration.rst

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,7 @@ Installing External TLS Certificates
4141
====================================
4242

4343
This section explains the process of deploying external TLS.
44-
For internal and backend TLS, see `Hashicorp Vault for internal PKI
45-
<hashicorp-vault>`__.
44+
For internal and backend TLS, see :doc:`/configuration/openbao`.
4645

4746
To configure TLS for the first time, we write the contents of a PEM
4847
file to the ``secrets.yml`` file as ``secrets_kolla_external_tls_cert``.

etc/kayobe/environments/ci-builder/stackhpc-ci.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@ stackhpc_repo_rocky_9_highavailability_version: "{{ stackhpc_pulp_repo_rocky_9_h
8080
stackhpc_repo_rocky_9_sig_security_common_version: "{{ stackhpc_pulp_repo_multiarch_rocky_9_sig_security_common_version }}"
8181
stackhpc_repo_rhel9_doca_version: "{{ stackhpc_pulp_repo_rhel9_doca_version }}"
8282

83+
stackhpc_repo_rocky_10_baseos_version: "{{ stackhpc_pulp_repo_rocky_10_baseos_version }}"
84+
stackhpc_repo_rocky_10_appstream_version: "{{ stackhpc_pulp_repo_rocky_10_appstream_version }}"
85+
8386
# Rocky-and-CI-specific Pulp urls
8487
stackhpc_include_os_minor_version_in_repo_url: true
8588

0 commit comments

Comments
 (0)