Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 107 additions & 0 deletions test/extended/edge_topologies/tnf_recovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
exutil "github.com/openshift/origin/test/extended/util"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
k8srand "k8s.io/apimachinery/pkg/util/rand"
"k8s.io/kubernetes/test/e2e/framework"
)

Expand Down Expand Up @@ -414,6 +415,112 @@ var _ = g.Describe("[sig-etcd][apigroup:config.openshift.io][OCPFeatureGate:Dual
memberPromotedVotingTimeout, utils.FiveSecondPollInterval)
})

g.It("should update fencing credentials and validate fencing with updated credentials", func() {
bmcNode := targetNode
survivedNode := peerNode

g.By(fmt.Sprintf("Reading current fencing credentials for node %s", bmcNode.Name))
creds, err := apis.FindFencingCredentialsByNodeName(oc, bmcNode.Name)
o.Expect(err).ToNot(o.HaveOccurred(), "expected to find fencing credentials secret")
framework.Logf("Found fencing credentials secret %s (address: %s, username: %s)",
creds.SecretName, creds.Address, creds.Username)

g.By("Parsing Redfish address from fencing credentials")
redfishHost, redfishPort, redfishPath, err := apis.ParseRedfishAddress(creds.Address)
o.Expect(err).ToNot(o.HaveOccurred(), "expected to parse Redfish address")
framework.Logf("Redfish endpoint: host=%s port=%s path=%s", redfishHost, redfishPort, redfishPath)

sslInsecure := creds.CertificateVerification == "Disabled"
originalPassword := creds.Password
newPassword := k8srand.String(32)

scriptPath := "/etc/kubernetes/static-pod-resources/etcd-certs/configmaps/etcd-scripts/update-fencing-credentials.sh"
bashCmd := scriptPath + ` --node "$1" --username "$2" --password "$3" --address "$4"`
if sslInsecure {
bashCmd += " --ssl-insecure"
}

g.DeferCleanup(func() {
framework.Logf("Restoring original BMC password via Redfish API")
if restoreErr := apis.ChangeBMCPasswordViaRedfish(oc, bmcNode.Name, redfishHost, redfishPort,
creds.Username, newPassword, originalPassword); restoreErr != nil {
fmt.Fprintf(g.GinkgoWriter, "Warning: failed to restore BMC password via Redfish: %v\n", restoreErr)
return
}

framework.Logf("Re-running update-fencing-credentials.sh with original credentials")
output, restoreErr := exutil.DebugNodeRetryWithOptionsAndChroot(oc, bmcNode.Name, "openshift-etcd",
"bash", "-c", bashCmd, "update-fencing-credentials",
bmcNode.Name, creds.Username, originalPassword, creds.Address)
if restoreErr != nil {
fmt.Fprintf(g.GinkgoWriter, "Warning: failed to restore fencing credentials via script: %v\noutput: %s\n",
restoreErr, output)
}
})

g.By(fmt.Sprintf("Changing BMC password via Redfish API on %s", bmcNode.Name))
err = apis.ChangeBMCPasswordViaRedfish(oc, bmcNode.Name, redfishHost, redfishPort,
creds.Username, originalPassword, newPassword)
o.Expect(err).ToNot(o.HaveOccurred(), "expected to change BMC password via Redfish API")

g.By(fmt.Sprintf("Validating new BMC credentials via fence_redfish on %s", bmcNode.Name))
err = apis.ValidateBMCCredentials(oc, bmcNode.Name, redfishHost, redfishPort, redfishPath,
creds.Username, newPassword, sslInsecure)
o.Expect(err).ToNot(o.HaveOccurred(), "expected new BMC credentials to be valid")

g.By(fmt.Sprintf("Running update-fencing-credentials.sh on %s with new credentials", bmcNode.Name))
output, err := exutil.DebugNodeRetryWithOptionsAndChroot(oc, bmcNode.Name, "openshift-etcd",
"bash", "-c", bashCmd, "update-fencing-credentials",
bmcNode.Name, creds.Username, newPassword, creds.Address)
o.Expect(err).ToNot(o.HaveOccurred(), "expected update-fencing-credentials.sh to succeed")
framework.Logf("update-fencing-credentials.sh output:\n%s", output)

g.By("Validating pacemaker health after credential update")
ctx, cancel := context.WithTimeout(context.Background(), nodeIsHealthyTimeout)
defer cancel()
pcsOutput, err := services.PcsStatusViaDebug(ctx, oc, bmcNode.Name)
o.Expect(err).ToNot(o.HaveOccurred(), "expected pcs status to succeed")
failedActions := services.ExtractPcsFailedActions(pcsOutput)
o.Expect(failedActions).To(o.BeEmpty(), "expected no failed pacemaker resource actions after credential update")

g.By("Ensuring etcd members remain healthy after fencing credentials update")
o.Eventually(func() error {
if err := helpers.EnsureHealthyMember(g.GinkgoT(), etcdClientFactory, survivedNode.Name); err != nil {
return err
}
if err := helpers.EnsureHealthyMember(g.GinkgoT(), etcdClientFactory, bmcNode.Name); err != nil {
return err
}
return nil
}, nodeIsHealthyTimeout, utils.FiveSecondPollInterval).ShouldNot(o.HaveOccurred(),
"etcd members should be healthy after fencing credentials update")

g.By(fmt.Sprintf("Triggering fencing-style network disruption between %s and %s", bmcNode.Name, survivedNode.Name))
command, err := exutil.TriggerNetworkDisruption(oc.KubeClient(), &bmcNode, &survivedNode, networkDisruptionDuration)
o.Expect(err).To(o.BeNil(), "Expected to disrupt network without errors")
framework.Logf("network disruption command: %q", command)

g.By(fmt.Sprintf("Ensuring cluster recovery after network disruption (timeout: %v)", memberIsLeaderTimeout))
leaderNode, learnerNode, learnerStarted := validateEtcdRecoveryStateWithoutAssumingLeader(oc, etcdClientFactory,
&survivedNode, &bmcNode, memberIsLeaderTimeout, utils.FiveSecondPollInterval)

if learnerStarted {
framework.Logf("Learner node %q already started as learner after disruption", learnerNode.Name)
} else {
g.By(fmt.Sprintf("Ensuring '%s' rejoins as learner (timeout: %v)", learnerNode.Name, memberRejoinedLearnerTimeout))
validateEtcdRecoveryState(oc, etcdClientFactory,
leaderNode,
learnerNode, true, true,
memberRejoinedLearnerTimeout, utils.FiveSecondPollInterval)
}

g.By(fmt.Sprintf("Ensuring learner node '%s' is promoted back as voting member (timeout: %v)", learnerNode.Name, memberPromotedVotingTimeout))
validateEtcdRecoveryState(oc, etcdClientFactory,
leaderNode,
learnerNode, true, false,
memberPromotedVotingTimeout, utils.FiveSecondPollInterval)
})

g.It("should compute etcd revision bump and preserve backup container after kernel panic recovery", func() {
// Note: This test triggers a kernel panic on one node via sysrq trigger, then verifies
// the surviving node computes the etcd revision bump as floor(maxRaftIndex * 0.2) per
Expand Down
66 changes: 64 additions & 2 deletions test/extended/edge_topologies/utils/apis/baremetalhost.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,72 @@ import (
)

const (
BMCSecretNamespace = "openshift-machine-api"
secretsDataPasswordKey = "password"
BMCSecretNamespace = "openshift-machine-api"
FencingCredentialsNamespace = "openshift-etcd"
fencingCredentialsPrefix = "fencing-credentials-"
secretsDataPasswordKey = "password"
)

// FencingCredentials holds the fields from a fencing-credentials secret in openshift-etcd.
type FencingCredentials struct {
SecretName string
Address string
Username string
Password string
CertificateVerification string
}

// FindFencingCredentialsByNodeName discovers the fencing-credentials secret for a node
// by listing secrets in openshift-etcd and matching against the node's short name.
func FindFencingCredentialsByNodeName(oc *exutil.CLI, nodeName string) (*FencingCredentials, error) {
shortName := strings.Split(nodeName, ".")[0]

ctx := context.Background()
list, err := oc.AdminKubeClient().CoreV1().Secrets(FencingCredentialsNamespace).List(ctx, metav1.ListOptions{})
if err != nil {
return nil, fmt.Errorf("list secrets in %s: %w", FencingCredentialsNamespace, err)
}

expected := map[string]struct{}{
fencingCredentialsPrefix + shortName: {},
fencingCredentialsPrefix + nodeName: {},
}

for _, secret := range list.Items {
if _, ok := expected[secret.Name]; ok {
getRequired := func(key string) (string, error) {
v, exists := secret.Data[key]
if !exists || len(v) == 0 {
return "", fmt.Errorf("secret %s missing required key %q", secret.Name, key)
}
return string(v), nil
}
address, err := getRequired("address")
if err != nil {
return nil, err
}
username, err := getRequired("username")
if err != nil {
return nil, err
}
password, err := getRequired("password")
if err != nil {
return nil, err
}
return &FencingCredentials{
SecretName: secret.Name,
Address: address,
Username: username,
Password: password,
CertificateVerification: string(secret.Data["certificateVerification"]),
}, nil
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}
}

return nil, fmt.Errorf("no fencing-credentials secret found matching node %q (prefix: %s, contains: %s) in %s",
nodeName, fencingCredentialsPrefix, shortName, FencingCredentialsNamespace)
}

// BMHGVR is the GroupVersionResource for BareMetalHost (metal3.io/v1alpha1). Use for API-based get/delete/patch.
var BMHGVR = schema.GroupVersionResource{
Group: "metal3.io", Version: "v1alpha1", Resource: "baremetalhosts",
Expand Down
153 changes: 153 additions & 0 deletions test/extended/edge_topologies/utils/apis/redfish.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
package apis

import (
"encoding/json"
"fmt"
"net"
"net/url"
"strings"

exutil "github.com/openshift/origin/test/extended/util"
"k8s.io/kubernetes/test/e2e/framework"
)

// ParseRedfishAddress parses a Redfish address into its components.
// Input format: "redfish+https://host:port/redfish/v1/Systems/1" (IPv6 uses bracket notation).
func ParseRedfishAddress(address string) (host, port, path string, err error) {
if !strings.HasPrefix(address, "redfish+") {
return "", "", "", fmt.Errorf("invalid Redfish address: %q: missing redfish+ prefix", address)
}
stripped := strings.TrimPrefix(address, "redfish+")
parsed, err := url.Parse(stripped)
if err != nil {
return "", "", "", fmt.Errorf("parse redfish address %q: %w", address, err)
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}

host = parsed.Hostname()
port = parsed.Port()
path = parsed.Path

if port == "" {
if parsed.Scheme == "https" {
port = "443"
} else {
port = "80"
}
}

if host == "" {
return "", "", "", fmt.Errorf("empty host in redfish address %q", address)
}
if path == "" {
return "", "", "", fmt.Errorf("empty path in redfish address %q", address)
}

return host, port, path, nil
}

type redfishAccountCollection struct {
Members []struct {
OdataID string `json:"@odata.id"`
} `json:"Members"`
}

type redfishAccount struct {
ID string `json:"Id"`
UserName string `json:"UserName"`
}

// ChangeBMCPasswordViaRedfish changes the BMC password using the Redfish AccountService API.
// It discovers the account matching the given username, then PATCHes the password.
func ChangeBMCPasswordViaRedfish(oc *exutil.CLI, nodeName, redfishHost, redfishPort, username, currentPassword, newPassword string) error {
authority := net.JoinHostPort(redfishHost, redfishPort)
baseURL := fmt.Sprintf("https://%s", authority)

accountURL, err := findRedfishAccountByUsername(oc, nodeName, baseURL, username, currentPassword)
if err != nil {
return fmt.Errorf("find redfish account for user %q: %w", username, err)
}

framework.Logf("Changing BMC password for account %s on %s", accountURL, authority)

patchScript := `curl -k -s -o /dev/null -w "%{http_code}" -X PATCH \
-H 'Content-Type: application/json' \
-u "$1:$2" \
-d "{\"Password\": \"$3\"}" \
"$4"`

patchURL := baseURL + accountURL
output, err := exutil.DebugNodeRetryWithOptionsAndChroot(oc, nodeName, "openshift-etcd",
"bash", "-c", patchScript, "redfish-patch", username, currentPassword, newPassword, patchURL)
if err != nil {
return fmt.Errorf("PATCH %s failed: %w", patchURL, err)
}

statusCode := strings.TrimSpace(output)
if statusCode != "200" && statusCode != "204" {
return fmt.Errorf("PATCH %s returned HTTP %s (expected 200 or 204)", patchURL, statusCode)
}

framework.Logf("Successfully changed BMC password via Redfish API (HTTP %s)", statusCode)
return nil
}

func findRedfishAccountByUsername(oc *exutil.CLI, nodeName, baseURL, username, password string) (string, error) {
accountsURL := baseURL + "/redfish/v1/AccountService/Accounts"
curlGet := `curl -k -s -u "$1:$2" "$3"`

output, err := exutil.DebugNodeRetryWithOptionsAndChroot(oc, nodeName, "openshift-etcd",
"bash", "-c", curlGet, "redfish-list", username, password, accountsURL)
if err != nil {
return "", fmt.Errorf("GET %s failed: %w", accountsURL, err)
}

var collection redfishAccountCollection
if err := json.Unmarshal([]byte(output), &collection); err != nil {
return "", fmt.Errorf("parse account collection: %w (body: %s)", err, output)
}

for _, member := range collection.Members {
memberURL := baseURL + member.OdataID
acctOutput, err := exutil.DebugNodeRetryWithOptionsAndChroot(oc, nodeName, "openshift-etcd",
"bash", "-c", curlGet, "redfish-get", username, password, memberURL)
if err != nil {
framework.Logf("Warning: failed to GET %s: %v", memberURL, err)
continue
}

var account redfishAccount
if err := json.Unmarshal([]byte(acctOutput), &account); err != nil {
framework.Logf("Warning: failed to parse account at %s: %v", memberURL, err)
continue
}

if account.UserName == username {
return member.OdataID, nil
}
}

return "", fmt.Errorf("no Redfish account found with username %q", username)
}

// ValidateBMCCredentials validates credentials against the BMC using fence_redfish --action status.
func ValidateBMCCredentials(oc *exutil.CLI, nodeName, redfishHost, redfishPort, redfishPath, username, password string, sslInsecure bool) error {
fenceScript := `/usr/sbin/fence_redfish --username "$1" --password "$2" --ip "$3" --ipport "$4" --systems-uri "$5" --action status`
if sslInsecure {
fenceScript += " --ssl-insecure"
}

ipForFence := redfishHost
if strings.Contains(redfishHost, ":") {
ipForFence = "[" + redfishHost + "]"
}

output, err := exutil.DebugNodeRetryWithOptionsAndChroot(oc, nodeName, "openshift-etcd",
"bash", "-c", fenceScript, "fence-validate",
username, password, ipForFence, redfishPort, redfishPath)
if err != nil {
return fmt.Errorf("fence_redfish validation failed: %w (output: %s)", err, output)
}

framework.Logf("BMC credential validation passed: %s", strings.TrimSpace(output))
return nil
}