Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
196 changes: 196 additions & 0 deletions pkg/operator/status.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
package operator

import (
"bytes"
"context"
"encoding/json"
"fmt"
"reflect"
"sort"
"strings"

"github.com/BurntSushi/toml"
mcfgv1 "github.com/openshift/api/machineconfiguration/v1"

configv1 "github.com/openshift/api/config/v1"
Expand All @@ -27,6 +29,7 @@ import (
"github.com/openshift/machine-config-operator/pkg/apihelpers"
ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common"
kcc "github.com/openshift/machine-config-operator/pkg/controller/kubelet-config"
"github.com/openshift/machine-config-operator/pkg/osimagestream"
)

// syncVersion handles reporting the version to the clusteroperator
Expand Down Expand Up @@ -283,6 +286,16 @@ func (optr *Operator) syncUpgradeableStatus(co *configv1.ClusterOperator) error
coStatusCondition.Message = skewErrorMessage
}

runcBlockExists, runcBlockMessage, err := optr.checkRuncUpgradeableGuard()
if err != nil {
return err
}
if runcBlockExists {
coStatusCondition.Status = configv1.ConditionFalse
coStatusCondition.Reason = "RuncDefaultRuntimeConfigured"
coStatusCondition.Message = runcBlockMessage
}

var degraded, interrupted bool
for _, pool := range pools {
interrupted = isPoolStatusConditionTrue(pool, mcfgv1.MachineConfigPoolBuildInterrupted)
Expand Down Expand Up @@ -407,9 +420,192 @@ func (optr *Operator) cfeEvalRunc() (bool, error) {
return true, nil
}
}

// Also check rendered MachineConfigs for runc, since the default container
// runtime drop-in (99-<pool>-generated-crio-default-container-runtime) is
// created as a MachineConfig, not a ContainerRuntimeConfig CR.
runcPools, err := optr.detectRuncInRenderedConfigs()
if err != nil {
return false, err
}
if len(runcPools) > 0 {
return true, nil
}

return false, nil
}

type crioRuntimeConfig struct {
Crio struct {
Runtime struct {
DefaultRuntime string `toml:"default_runtime,omitempty"`
} `toml:"runtime"`
} `toml:"crio"`
}

const crioDropInDir = "/etc/crio/crio.conf.d/"

// detectRuncInRenderedConfigs inspects every MachineConfigPool's rendered MachineConfig
// to determine which pools have runc as their effective container runtime.
// Unlike checking ContainerRuntimeConfig CRs, this catches runc set via raw
// MachineConfigs that inject CRI-O drop-in files directly.
func (optr *Operator) detectRuncInRenderedConfigs() ([]string, error) {
// Allow nil listers so callers in unit tests don't need to mock these.
if optr.mcpLister == nil || optr.mcLister == nil {
return nil, nil
}

pools, err := optr.mcpLister.List(labels.Everything())
if err != nil {
return nil, err
}

var runcPools []string
for _, pool := range pools {
// Pools without a rendered configuration haven't been reconciled yet.
if pool.Spec.Configuration.Name == "" {
continue
}

mc, err := optr.mcLister.Get(pool.Spec.Configuration.Name)
if err != nil {
if apierrors.IsNotFound(err) {
klog.V(4).Infof("Rendered MachineConfig %q for pool %q not found, skipping runc check", pool.Spec.Configuration.Name, pool.Name)
continue
}
return nil, fmt.Errorf("failed to get rendered MachineConfig %q for pool %q: %w", pool.Spec.Configuration.Name, pool.Name, err)
}

ignCfg, err := ctrlcommon.ParseAndConvertConfig(mc.Spec.Config.Raw)
if err != nil {
return nil, fmt.Errorf("failed to parse Ignition config for rendered MC %q (pool %q): %w", mc.Name, pool.Name, err)
}

// Collect all CRI-O drop-in config files from the Ignition config.
var crioFiles []string
for _, f := range ignCfg.Storage.Files {
if strings.HasPrefix(f.Path, crioDropInDir) {
crioFiles = append(crioFiles, f.Path)
}
}
// Sort lexicographically to match CRI-O's drop-in layering order:
// later files override earlier ones, so the last file wins.
sort.Strings(crioFiles)

// Walk drop-ins in order, letting each file's default_runtime override the previous.
// After the loop, effectiveRuntime holds the final resolved value.
effectiveRuntime := ""
for _, path := range crioFiles {
fileData, err := ctrlcommon.GetIgnitionFileDataByPath(&ignCfg, path)
if err != nil {
return nil, fmt.Errorf("failed to decode CRI-O drop-in %q in MC %q: %w", path, mc.Name, err)
}
if fileData == nil {
continue
}

var cfg crioRuntimeConfig
if _, err := toml.NewDecoder(bytes.NewReader(fileData)).Decode(&cfg); err != nil {
return nil, fmt.Errorf("failed to parse TOML from CRI-O drop-in %q in MC %q: %w", path, mc.Name, err)
}
if cfg.Crio.Runtime.DefaultRuntime != "" {
effectiveRuntime = cfg.Crio.Runtime.DefaultRuntime
}
}

if effectiveRuntime == mcfgv1.ContainerRuntimeDefaultRuntimeRunc {
runcPools = append(runcPools, pool.Name)
}
}

return runcPools, nil
}

func (optr *Operator) getDefaultOSImageStreamName() string {
if optr.osImageStreamLister != nil && osimagestream.IsFeatureEnabled(optr.fgHandler) {
osis, err := optr.osImageStreamLister.Get("cluster")
if err == nil && osis.Status.DefaultStream != "" {
return osis.Status.DefaultStream
}
klog.V(4).Infof("Could not get OSImageStream default stream: %v", err)
}
return ""
}

func isRHEL10Stream(stream string) bool {
return stream == osimagestream.StreamNameRHEL10 || stream == osimagestream.StreamNameCentOS10
}

// checkRuncUpgradeableGuard blocks cluster upgrades when any MachineConfigPool
// targeting a RHEL 10 / CentOS 10 OS image stream still uses runc as its
// default container runtime.
//
// Returns (true, message, nil) when upgrades should be blocked, with a
// user-facing message listing the affected pools and remediation steps.
// Returns (false, "", nil) when no blocking condition exists.
func (optr *Operator) checkRuncUpgradeableGuard() (bool, string, error) {
runcPools, err := optr.detectRuncInRenderedConfigs()
if err != nil {
return false, "", err
}
// No pools use runc — nothing to block.
if len(runcPools) == 0 {
return false, "", nil
}

// For each runc pool, check whether it targets a RHEL 10 stream.
// Only those pools need to block upgrades; runc on RHEL 9 is still supported.
defaultStream := optr.getDefaultOSImageStreamName()
var rhel10RuncPools []string
for _, poolName := range runcPools {
pool, err := optr.mcpLister.Get(poolName)
if err != nil {
klog.V(4).Infof("Could not get pool %q for stream check: %v", poolName, err)
continue
}
streamName, err := ctrlcommon.GetEffectiveOSImageStreamName(pool, optr.mcpLister)
if err != nil {
klog.V(4).Infof("Could not get effective stream for pool %q: %v", poolName, err)
continue
}
// Fall back to the cluster-wide default stream if the pool has no override.
if streamName == "" {
streamName = defaultStream
}
// If the stream is still unknown, assume RHEL 9 (not blocked).
// https://redhat.atlassian.net/browse/MCO-1914
if streamName == "" {
continue
}
if isRHEL10Stream(streamName) {
rhel10RuncPools = append(rhel10RuncPools, poolName)
}
}
// All runc pools are on RHEL 9 or unknown streams — no block needed.
if len(rhel10RuncPools) == 0 {
return false, "", nil
}

// Build a version-specific docs URL so the message points to the right OCP docs.
docsVersion := "latest"
if ocpVersion := optr.getCurrentOCPVersionFromClusterVersion(); ocpVersion != "" {
if parts := strings.SplitN(ocpVersion, ".", 3); len(parts) >= 2 {
docsVersion = parts[0] + "." + parts[1]
}
}
docsURL := fmt.Sprintf("https://docs.redhat.com/en/documentation/openshift_container_platform/%s/html/postinstallation_configuration/machine-configuration-tasks", docsVersion)

message := fmt.Sprintf(
"Upgrades are blocked because the following MachineConfigPools have runc configured as the default container runtime: [%s]. "+
"runc is deprecated and is not available in RHCOS 10. "+
"To unblock upgrades, migrate to crun by removing any ContainerRuntimeConfig that sets defaultRuntime to runc, "+
"and removing any MachineConfig that sets default_runtime = \"runc\" in CRI-O configuration under /etc/crio/crio.conf.d/. "+
"See %s for migration instructions.",
strings.Join(rhel10RuncPools, ", "), docsURL)

return true, message, nil
}

// GetAllManagedNodes returns the nodes managed by MCO
func (optr *Operator) GetAllManagedNodes(pools []*mcfgv1.MachineConfigPool) ([]*corev1.Node, error) {
nodes := []*corev1.Node{}
Expand Down
Loading