Skip to content

Commit c3ee142

Browse files
authored
Merge pull request rook#16003 from BlaineEXE/osd-allow-manual-rotation
osd: load latest cephx key during osd pod init
2 parents fcbcf69 + 2c7a5f1 commit c3ee142

2 files changed

Lines changed: 133 additions & 60 deletions

File tree

pkg/operator/ceph/cluster/osd/spec.go

Lines changed: 93 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -111,39 +111,6 @@ KEYRING_FILE="$OSD_DATA_DIR"/keyring
111111
CV_MODE=%s
112112
DEVICE="$%s"
113113
114-
# In rare cases keyring file created with prepare-osd but did not
115-
# being stored in ceph auth system therefore we need to import it
116-
# from keyring file instead of creating new one
117-
if ! ceph -n client.admin auth get osd."$OSD_ID" -k /etc/ceph/admin-keyring-store/keyring; then
118-
if [ -f "$KEYRING_FILE" ]; then
119-
# import keyring from existing file
120-
TMP_DIR=$(mktemp -d)
121-
122-
python3 -c "
123-
import configparser
124-
125-
config = configparser.ConfigParser()
126-
config.read('$KEYRING_FILE')
127-
128-
if not config.has_section('osd.$OSD_ID'):
129-
exit()
130-
131-
config['osd.$OSD_ID'] = {'key': config['osd.$OSD_ID']['key'], 'caps mon': '\"allow profile osd\"', 'caps mgr': '\"allow profile osd\"', 'caps osd': '\"allow *\"'}
132-
133-
with open('$TMP_DIR/keyring', 'w') as configfile:
134-
config.write(configfile)
135-
"
136-
137-
cat "$TMP_DIR"/keyring
138-
ceph -n client.admin auth import -i "$TMP_DIR"/keyring -k /etc/ceph/admin-keyring-store/keyring
139-
140-
rm --recursive --force "$TMP_DIR"
141-
else
142-
# create new keyring if no keyring file found
143-
ceph -n client.admin auth get-or-create osd."$OSD_ID" mon 'allow profile osd' mgr 'allow profile osd' osd 'allow *' -k /etc/ceph/admin-keyring-store/keyring
144-
fi
145-
fi
146-
147114
# active the osd with ceph-volume
148115
if [[ "$CV_MODE" == "lvm" ]]; then
149116
TMP_DIR=$(mktemp -d)
@@ -651,6 +618,13 @@ func (c *Cluster) makeDeployment(osdProps osdProperties, osd *OSDInfo, provision
651618
initContainers = append(initContainers, c.getExpandInitContainer(osdProps, c.spec.DataDirHostPath, c.clusterInfo.Namespace, osdID, *osd))
652619
}
653620

621+
// key update init container must go after activate
622+
adminKeyVol, cephxContainer := c.getCephxKeyUpdateInitContainer(osdID, osdProps)
623+
if !volumeExistsWithName(volumes, adminKeyVol.Name) {
624+
volumes = append(volumes, adminKeyVol)
625+
}
626+
initContainers = append(initContainers, cephxContainer)
627+
654628
// Doing a chown in a post start lifecycle hook does not reliably complete before the OSD
655629
// process starts, which can cause the pod to fail without the lifecycle hook's chown command
656630
// completing. It can take an arbitrarily long time for a pod restart to successfully chown the
@@ -1430,6 +1404,83 @@ func (c *Cluster) getEncryptedStatusPVCInitContainer(mountPath string, osdProps
14301404
}
14311405
}
14321406

1407+
// init container to get (or create, if needed) the most up-to-date keyring for the given OSD and
1408+
// update the OSD config directory keyring file. this allows OSD cephx keys to be rotated without
1409+
// causing OSD pod crash loops.
1410+
//
1411+
// this must come after OSD activate init container because 'ceph-bluestore-tool' (and consequently,
1412+
// 'ceph-volume activate') pulls the keyring from the OSD disk's bluestore data and overwrites the
1413+
// config dir's keyring file, even when the on-disk key is not the most up-to-date.
1414+
func (c *Cluster) getCephxKeyUpdateInitContainer(osdID string, osdProps osdProperties) (v1.Volume, v1.Container) {
1415+
volMounts := []v1.VolumeMount{
1416+
{Name: k8sutil.ConfigOverrideName, ReadOnly: true, MountPath: opconfig.EtcCephDir},
1417+
}
1418+
1419+
adminKeyringVol, adminKeyringVolMount := cephkey.Volume().Admin(), cephkey.VolumeMount().Admin()
1420+
volMounts = append(volMounts, adminKeyringVolMount)
1421+
1422+
// OSD config dir is at /var/lib/ceph/osd/ceph-<id>
1423+
osdConfigMountPath := activateOSDMountPath + osdID
1424+
1425+
if osdProps.onPVC() {
1426+
// assumption: bridge vol will already be added to pod spec for PVC OSDs
1427+
volMounts = append(volMounts, getPvcOSDBridgeMountActivate(osdConfigMountPath, osdProps.pvc.ClaimName))
1428+
} else {
1429+
// assumption: activate OSD volume will already be added to pod spec for non-PVC OSDs at the
1430+
// correct location for mounting to /var/lib/ceph/osd/ceph-<id>
1431+
volMounts = append(volMounts,
1432+
v1.VolumeMount{Name: activateOSDVolumeName, MountPath: osdConfigMountPath},
1433+
)
1434+
}
1435+
1436+
keyUpdateScript := `
1437+
set -o errexit
1438+
set -o nounset
1439+
set -o pipefail
1440+
set -o xtrace
1441+
1442+
OSD_ID="` + osdID + `"
1443+
KEYRING_FILE=/var/lib/ceph/osd/ceph-"${OSD_ID}"/keyring
1444+
1445+
ceph --name client.admin auth get-or-create osd."${OSD_ID}" \
1446+
mon 'allow profile osd' mgr 'allow profile osd' osd 'allow *' \
1447+
--keyring /etc/ceph/admin-keyring-store/keyring > "$KEYRING_FILE"
1448+
`
1449+
1450+
envVars := []v1.EnvVar{
1451+
{
1452+
Name: "ROOK_CEPH_MON_HOST",
1453+
ValueFrom: &v1.EnvVarSource{
1454+
SecretKeyRef: &v1.SecretKeySelector{
1455+
LocalObjectReference: v1.LocalObjectReference{
1456+
Name: "rook-ceph-config",
1457+
},
1458+
Key: "mon_host",
1459+
},
1460+
},
1461+
},
1462+
{Name: "CEPH_ARGS", Value: "--mon-host=$(ROOK_CEPH_MON_HOST)"},
1463+
}
1464+
1465+
container := v1.Container{
1466+
Command: []string{
1467+
"/bin/bash",
1468+
"-c",
1469+
keyUpdateScript,
1470+
},
1471+
Name: "cephx-keyring-update",
1472+
Image: c.spec.CephVersion.Image,
1473+
ImagePullPolicy: controller.GetContainerImagePullPolicy(c.spec.CephVersion.ImagePullPolicy),
1474+
VolumeMounts: volMounts,
1475+
SecurityContext: controller.PrivilegedContext(true),
1476+
Env: envVars,
1477+
EnvFrom: getEnvFromSources(),
1478+
Resources: osdProps.resources,
1479+
}
1480+
1481+
return adminKeyringVol, container
1482+
}
1483+
14331484
func (c *Cluster) getOSDContainerPorts() []v1.ContainerPort {
14341485
var ports []v1.ContainerPort
14351486
if c.spec.RequireMsgr2() {
@@ -1495,3 +1546,12 @@ func getOSDCmd(cmd []string, interval int) []string {
14951546
}
14961547
return cmd
14971548
}
1549+
1550+
func volumeExistsWithName(vols []v1.Volume, name string) bool {
1551+
for _, v := range vols {
1552+
if v.Name == name {
1553+
return true
1554+
}
1555+
}
1556+
return false
1557+
}

0 commit comments

Comments
 (0)