Skip to content

Commit 2c7a5f1

Browse files
committed
osd: load latest cephx key during osd pod init
Create a new OSD pod init container that loads the latest CephX key for the OSD. The init container must be run after OSD activation, due to keyfile reset behavior of ceph-bluestore-tool, used during activation. The primary purpose of this work is to allow OSDs to start and run correctly after key rotation, whether done manually by users or automatically by future Rook work. This work makes redundant prior code changes done in rook#14826. The CI test added in the prior work is kept to ensure no regression. Signed-off-by: Blaine Gardner <blaine.gardner@ibm.com>
1 parent 6349ec9 commit 2c7a5f1

2 files changed

Lines changed: 133 additions & 60 deletions

File tree

pkg/operator/ceph/cluster/osd/spec.go

Lines changed: 93 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -111,39 +111,6 @@ KEYRING_FILE="$OSD_DATA_DIR"/keyring
111111
CV_MODE=%s
112112
DEVICE="$%s"
113113
114-
# In rare cases keyring file created with prepare-osd but did not
115-
# being stored in ceph auth system therefore we need to import it
116-
# from keyring file instead of creating new one
117-
if ! ceph -n client.admin auth get osd."$OSD_ID" -k /etc/ceph/admin-keyring-store/keyring; then
118-
if [ -f "$KEYRING_FILE" ]; then
119-
# import keyring from existing file
120-
TMP_DIR=$(mktemp -d)
121-
122-
python3 -c "
123-
import configparser
124-
125-
config = configparser.ConfigParser()
126-
config.read('$KEYRING_FILE')
127-
128-
if not config.has_section('osd.$OSD_ID'):
129-
exit()
130-
131-
config['osd.$OSD_ID'] = {'key': config['osd.$OSD_ID']['key'], 'caps mon': '\"allow profile osd\"', 'caps mgr': '\"allow profile osd\"', 'caps osd': '\"allow *\"'}
132-
133-
with open('$TMP_DIR/keyring', 'w') as configfile:
134-
config.write(configfile)
135-
"
136-
137-
cat "$TMP_DIR"/keyring
138-
ceph -n client.admin auth import -i "$TMP_DIR"/keyring -k /etc/ceph/admin-keyring-store/keyring
139-
140-
rm --recursive --force "$TMP_DIR"
141-
else
142-
# create new keyring if no keyring file found
143-
ceph -n client.admin auth get-or-create osd."$OSD_ID" mon 'allow profile osd' mgr 'allow profile osd' osd 'allow *' -k /etc/ceph/admin-keyring-store/keyring
144-
fi
145-
fi
146-
147114
# active the osd with ceph-volume
148115
if [[ "$CV_MODE" == "lvm" ]]; then
149116
TMP_DIR=$(mktemp -d)
@@ -651,6 +618,13 @@ func (c *Cluster) makeDeployment(osdProps osdProperties, osd *OSDInfo, provision
651618
initContainers = append(initContainers, c.getExpandInitContainer(osdProps, c.spec.DataDirHostPath, c.clusterInfo.Namespace, osdID, *osd))
652619
}
653620

621+
// key update init container must go after activate
622+
adminKeyVol, cephxContainer := c.getCephxKeyUpdateInitContainer(osdID, osdProps)
623+
if !volumeExistsWithName(volumes, adminKeyVol.Name) {
624+
volumes = append(volumes, adminKeyVol)
625+
}
626+
initContainers = append(initContainers, cephxContainer)
627+
654628
// Doing a chown in a post start lifecycle hook does not reliably complete before the OSD
655629
// process starts, which can cause the pod to fail without the lifecycle hook's chown command
656630
// completing. It can take an arbitrarily long time for a pod restart to successfully chown the
@@ -1430,6 +1404,83 @@ func (c *Cluster) getEncryptedStatusPVCInitContainer(mountPath string, osdProps
14301404
}
14311405
}
14321406

1407+
// init container to get (or create, if needed) the most up-to-date keyring for the given OSD and
1408+
// update the OSD config directory keyring file. this allows OSD cephx keys to be rotated without
1409+
// causing OSD pod crash loops.
1410+
//
1411+
// this must come after OSD activate init container because 'ceph-bluestore-tool' (and consequently,
1412+
// 'ceph-volume activate') pulls the keyring from the OSD disk's bluestore data and overwrites the
1413+
// config dir's keyring file, even when the on-disk key is not the most up-to-date.
1414+
func (c *Cluster) getCephxKeyUpdateInitContainer(osdID string, osdProps osdProperties) (v1.Volume, v1.Container) {
1415+
volMounts := []v1.VolumeMount{
1416+
{Name: k8sutil.ConfigOverrideName, ReadOnly: true, MountPath: opconfig.EtcCephDir},
1417+
}
1418+
1419+
adminKeyringVol, adminKeyringVolMount := cephkey.Volume().Admin(), cephkey.VolumeMount().Admin()
1420+
volMounts = append(volMounts, adminKeyringVolMount)
1421+
1422+
// OSD config dir is at /var/lib/ceph/osd/ceph-<id>
1423+
osdConfigMountPath := activateOSDMountPath + osdID
1424+
1425+
if osdProps.onPVC() {
1426+
// assumption: bridge vol will already be added to pod spec for PVC OSDs
1427+
volMounts = append(volMounts, getPvcOSDBridgeMountActivate(osdConfigMountPath, osdProps.pvc.ClaimName))
1428+
} else {
1429+
// assumption: activate OSD volume will already be added to pod spec for non-PVC OSDs at the
1430+
// correct location for mounting to /var/lib/ceph/osd/ceph-<id>
1431+
volMounts = append(volMounts,
1432+
v1.VolumeMount{Name: activateOSDVolumeName, MountPath: osdConfigMountPath},
1433+
)
1434+
}
1435+
1436+
keyUpdateScript := `
1437+
set -o errexit
1438+
set -o nounset
1439+
set -o pipefail
1440+
set -o xtrace
1441+
1442+
OSD_ID="` + osdID + `"
1443+
KEYRING_FILE=/var/lib/ceph/osd/ceph-"${OSD_ID}"/keyring
1444+
1445+
ceph --name client.admin auth get-or-create osd."${OSD_ID}" \
1446+
mon 'allow profile osd' mgr 'allow profile osd' osd 'allow *' \
1447+
--keyring /etc/ceph/admin-keyring-store/keyring > "$KEYRING_FILE"
1448+
`
1449+
1450+
envVars := []v1.EnvVar{
1451+
{
1452+
Name: "ROOK_CEPH_MON_HOST",
1453+
ValueFrom: &v1.EnvVarSource{
1454+
SecretKeyRef: &v1.SecretKeySelector{
1455+
LocalObjectReference: v1.LocalObjectReference{
1456+
Name: "rook-ceph-config",
1457+
},
1458+
Key: "mon_host",
1459+
},
1460+
},
1461+
},
1462+
{Name: "CEPH_ARGS", Value: "--mon-host=$(ROOK_CEPH_MON_HOST)"},
1463+
}
1464+
1465+
container := v1.Container{
1466+
Command: []string{
1467+
"/bin/bash",
1468+
"-c",
1469+
keyUpdateScript,
1470+
},
1471+
Name: "cephx-keyring-update",
1472+
Image: c.spec.CephVersion.Image,
1473+
ImagePullPolicy: controller.GetContainerImagePullPolicy(c.spec.CephVersion.ImagePullPolicy),
1474+
VolumeMounts: volMounts,
1475+
SecurityContext: controller.PrivilegedContext(true),
1476+
Env: envVars,
1477+
EnvFrom: getEnvFromSources(),
1478+
Resources: osdProps.resources,
1479+
}
1480+
1481+
return adminKeyringVol, container
1482+
}
1483+
14331484
func (c *Cluster) getOSDContainerPorts() []v1.ContainerPort {
14341485
var ports []v1.ContainerPort
14351486
if c.spec.RequireMsgr2() {
@@ -1495,3 +1546,12 @@ func getOSDCmd(cmd []string, interval int) []string {
14951546
}
14961547
return cmd
14971548
}
1549+
1550+
func volumeExistsWithName(vols []v1.Volume, name string) bool {
1551+
for _, v := range vols {
1552+
if v.Name == name {
1553+
return true
1554+
}
1555+
}
1556+
return false
1557+
}

0 commit comments

Comments
 (0)