diff --git a/api/v1alpha1/conversion.go b/api/v1alpha1/conversion.go index 93556e66d..72270fd25 100644 --- a/api/v1alpha1/conversion.go +++ b/api/v1alpha1/conversion.go @@ -492,6 +492,14 @@ func Convert_v1alpha2_ProxmoxMachineTemplateResource_To_v1alpha1_ProxmoxMachineT return Convert_v1alpha2_ProxmoxMachineSpec_To_v1alpha1_ProxmoxMachineSpec(&in.Spec, &out.Spec, s) } +// Convert_v1alpha2_ProxmoxMachineSpec_To_v1alpha1_ProxmoxMachineSpec handles +// the lossy conversion of ProxmoxMachineSpec from v1alpha2 to v1alpha1. +// The FailureDomain field is intentionally dropped (it does not exist in v1alpha1 +// and is restored from annotation on ConvertTo). +func Convert_v1alpha2_ProxmoxMachineSpec_To_v1alpha1_ProxmoxMachineSpec(in *v1alpha2.ProxmoxMachineSpec, out *ProxmoxMachineSpec, s conversion.Scope) error { + return autoConvert_v1alpha2_ProxmoxMachineSpec_To_v1alpha1_ProxmoxMachineSpec(in, out, s) +} + func Convert_v1beta1_ObjectMeta_To_v1beta2_ObjectMeta(in *clusterv1beta1.ObjectMeta, out *clusterv1beta2.ObjectMeta, s conversion.Scope) error { if err := clusterv1beta1.Convert_v1beta1_ObjectMeta_To_v1beta2_ObjectMeta(in, out, s); err != nil { return err diff --git a/api/v1alpha1/proxmoxcluster_conversion.go b/api/v1alpha1/proxmoxcluster_conversion.go index c232124ff..38dd41e3d 100644 --- a/api/v1alpha1/proxmoxcluster_conversion.go +++ b/api/v1alpha1/proxmoxcluster_conversion.go @@ -41,6 +41,7 @@ func (src *ProxmoxCluster) ConvertTo(dstRaw conversion.Hub) error { // Restore lossy fields dst.Spec.ZoneConfigs = restored.Spec.ZoneConfigs dst.Status.InClusterZoneRef = restored.Status.InClusterZoneRef + dst.Status.FailureDomains = restored.Status.FailureDomains clusterv1.Convert_bool_To_Pointer_bool(src.Spec.ExternalManagedControlPlane, ok, restored.Spec.ExternalManagedControlPlane, &dst.Spec.ExternalManagedControlPlane) diff --git a/api/v1alpha1/proxmoxmachine_conversion.go b/api/v1alpha1/proxmoxmachine_conversion.go index e92042415..3e235f77f 100644 --- a/api/v1alpha1/proxmoxmachine_conversion.go +++ b/api/v1alpha1/proxmoxmachine_conversion.go @@ -87,6 +87,11 @@ func restoreProxmoxMachineSpec(src *ProxmoxMachineSpec, dst *v1alpha2.ProxmoxMac clusterv1.Convert_int32_To_Pointer_int32(src.NumSockets, ok, restored.NumSockets, &dst.NumSockets) clusterv1.Convert_int32_To_Pointer_int32(src.MemoryMiB, ok, restored.MemoryMiB, &dst.MemoryMiB) + // Restore FailureDomain (v1alpha2-only field, set by CAPI machine controller). + if ok { + dst.FailureDomain = restored.FailureDomain + } + // Turn ProxmoxMachineSpec.Target into allowedNodes. in v1alpha1, target will // ignore AllowedNodes, so we can literally overwrite these. if src.Target != nil { diff --git a/api/v1alpha1/zz_generated.conversion.go b/api/v1alpha1/zz_generated.conversion.go index f33158012..83f9bd227 100644 --- a/api/v1alpha1/zz_generated.conversion.go +++ b/api/v1alpha1/zz_generated.conversion.go @@ -169,11 +169,6 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } - if err := s.AddGeneratedConversionFunc((*v1alpha2.ProxmoxMachineSpec)(nil), (*ProxmoxMachineSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha2_ProxmoxMachineSpec_To_v1alpha1_ProxmoxMachineSpec(a.(*v1alpha2.ProxmoxMachineSpec), b.(*ProxmoxMachineSpec), scope) - }); err != nil { - return err - } if err := s.AddGeneratedConversionFunc((*ProxmoxMachineTemplate)(nil), (*v1alpha2.ProxmoxMachineTemplate)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1alpha1_ProxmoxMachineTemplate_To_v1alpha2_ProxmoxMachineTemplate(a.(*ProxmoxMachineTemplate), b.(*v1alpha2.ProxmoxMachineTemplate), scope) }); err != nil { @@ -409,6 +404,11 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } + if err := s.AddConversionFunc((*v1alpha2.ProxmoxMachineSpec)(nil), (*ProxmoxMachineSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha2_ProxmoxMachineSpec_To_v1alpha1_ProxmoxMachineSpec(a.(*v1alpha2.ProxmoxMachineSpec), b.(*ProxmoxMachineSpec), scope) + }); err != nil { + return err + } if err := s.AddConversionFunc((*v1alpha2.ProxmoxMachineStatus)(nil), (*ProxmoxMachineStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1alpha2_ProxmoxMachineStatus_To_v1alpha1_ProxmoxMachineStatus(a.(*v1alpha2.ProxmoxMachineStatus), b.(*ProxmoxMachineStatus), scope) }); err != nil { @@ -952,6 +952,7 @@ func autoConvert_v1alpha2_ProxmoxClusterStatus_To_v1alpha1_ProxmoxClusterStatus( } else { out.NodeLocations = nil } + // WARNING: in.FailureDomains requires manual conversion: does not exist in peer-type return nil } @@ -1215,6 +1216,7 @@ func autoConvert_v1alpha2_ProxmoxMachineSpec_To_v1alpha1_ProxmoxMachineSpec(in * if err := Convert_v1alpha2_VirtualMachineCloneSpec_To_v1alpha1_VirtualMachineCloneSpec(&in.VirtualMachineCloneSpec, &out.VirtualMachineCloneSpec, s); err != nil { return err } + // WARNING: in.FailureDomain requires manual conversion: does not exist in peer-type if err := v1.Convert_string_To_Pointer_string(&in.ProviderID, &out.ProviderID, s); err != nil { return err } @@ -1254,11 +1256,6 @@ func autoConvert_v1alpha2_ProxmoxMachineSpec_To_v1alpha1_ProxmoxMachineSpec(in * return nil } -// Convert_v1alpha2_ProxmoxMachineSpec_To_v1alpha1_ProxmoxMachineSpec is an autogenerated conversion function. -func Convert_v1alpha2_ProxmoxMachineSpec_To_v1alpha1_ProxmoxMachineSpec(in *v1alpha2.ProxmoxMachineSpec, out *ProxmoxMachineSpec, s conversion.Scope) error { - return autoConvert_v1alpha2_ProxmoxMachineSpec_To_v1alpha1_ProxmoxMachineSpec(in, out, s) -} - func autoConvert_v1alpha1_ProxmoxMachineStatus_To_v1alpha2_ProxmoxMachineStatus(in *ProxmoxMachineStatus, out *v1alpha2.ProxmoxMachineStatus, s conversion.Scope) error { // WARNING: in.Ready requires manual conversion: does not exist in peer-type out.Addresses = *(*[]v1beta2.MachineAddress)(unsafe.Pointer(&in.Addresses)) diff --git a/api/v1alpha2/conditions_consts.go b/api/v1alpha2/conditions_consts.go index a909174e4..b851c5c7f 100644 --- a/api/v1alpha2/conditions_consts.go +++ b/api/v1alpha2/conditions_consts.go @@ -94,6 +94,12 @@ const ( // documents a ProxmoxMachine assigning host addresses for Cluster API. ProxmoxMachineVirtualMachineProvisionedWaitingForClusterAPIMachineAddressesReason = "WaitingForClusterAPIMachineAddresses" + // ProxmoxMachineVirtualMachineProvisionedFailureDomainNotReadyReason documents + // a ProxmoxMachine waiting for its failure domain (zone) to be configured + // in the ProxmoxCluster. This is a transient condition that resolves when + // the zone is added to spec.zoneConfig. + ProxmoxMachineVirtualMachineProvisionedFailureDomainNotReadyReason = "FailureDomainNotReady" + // ProxmoxMachineVirtualMachineProvisionedVMProvisionFailedReason documents a failure // during virtual machine provisioning. ProxmoxMachineVirtualMachineProvisionedVMProvisionFailedReason = "VMProvisionFailed" diff --git a/api/v1alpha2/proxmoxcluster_types.go b/api/v1alpha2/proxmoxcluster_types.go index 84a0ed9ea..fb4422c54 100644 --- a/api/v1alpha2/proxmoxcluster_types.go +++ b/api/v1alpha2/proxmoxcluster_types.go @@ -24,6 +24,7 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/utils/ptr" + clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -148,6 +149,18 @@ type ZoneConfigSpec struct { // +listType=set // +kubebuilder:validation:MinItems=1 DNSServers []string `json:"dnsServers,omitempty"` + + // nodes specifies the Proxmox nodes that belong to this zone. + // When set, machines assigned to this failure domain will only + // be placed on these nodes. + // +optional + // +listType=set + Nodes []string `json:"nodes,omitempty"` + + // controlPlane indicates whether this zone is eligible for control plane machines. + // Defaults to true when not set. + // +optional + ControlPlane *bool `json:"controlPlane,omitempty"` } // IPConfigSpec contains information about available IP config. @@ -232,6 +245,14 @@ type ProxmoxClusterStatus struct { // for different machines. // +optional NodeLocations *NodeLocations `json:"nodeLocations,omitempty"` + + // failureDomains is a slice of failure domains synced from zone configurations. + // This field is part of the Cluster API contract and is used by KubeadmControlPlane + // to distribute control plane machines across zones. + // +optional + // +listType=map + // +listMapKey=name + FailureDomains []clusterv1.FailureDomain `json:"failureDomains,omitempty"` } // ProxmoxClusterInitializationStatus provides observations of the ProxmoxCluster initialization process. @@ -523,6 +544,18 @@ func (c *ProxmoxCluster) addNodeLocation(loc NodeLocation, isControlPlane bool) c.Status.NodeLocations.Workers = append(c.Status.NodeLocations.Workers, loc) } +// GetZoneNodes returns the Proxmox node names for a given zone name. +// Returns nil if the zone is not found or has no explicit nodes configured. +func (c *ProxmoxCluster) GetZoneNodes(zoneName string) []string { + for _, zc := range c.Spec.ZoneConfigs { + if ptr.Deref(zc.Zone, "") == zoneName { + return zc.Nodes + } + } + + return nil +} + func init() { objectTypes = append(objectTypes, &ProxmoxCluster{}, &ProxmoxClusterList{}) } diff --git a/api/v1alpha2/proxmoxcluster_types_test.go b/api/v1alpha2/proxmoxcluster_types_test.go index a87f43fad..bb3eef091 100644 --- a/api/v1alpha2/proxmoxcluster_types_test.go +++ b/api/v1alpha2/proxmoxcluster_types_test.go @@ -216,6 +216,40 @@ func TestRemoveNodeLocation(t *testing.T) { require.Len(t, cl.Status.NodeLocations.ControlPlane, 0) } +func TestGetZoneNodes(t *testing.T) { + cl := &ProxmoxCluster{ + Spec: ProxmoxClusterSpec{ + ZoneConfigs: []ZoneConfigSpec{ + { + Zone: ptr.To("zone-a"), + Nodes: []string{"pve1", "pve2"}, + }, + { + Zone: ptr.To("zone-b"), + // No nodes explicitly set. + }, + }, + }, + } + + // Zone found with nodes. + nodes := cl.GetZoneNodes("zone-a") + require.Equal(t, []string{"pve1", "pve2"}, nodes) + + // Zone found without nodes. + nodes = cl.GetZoneNodes("zone-b") + require.Nil(t, nodes) + + // Zone not found. + nodes = cl.GetZoneNodes("zone-c") + require.Nil(t, nodes) + + // Empty ZoneConfigs. + empty := &ProxmoxCluster{} + nodes = empty.GetZoneNodes("anything") + require.Nil(t, nodes) +} + func TestSetInClusterIPPoolRef(t *testing.T) { cl := defaultCluster() diff --git a/api/v1alpha2/proxmoxmachine_types.go b/api/v1alpha2/proxmoxmachine_types.go index 4a2745ea6..b0bbdef10 100644 --- a/api/v1alpha2/proxmoxmachine_types.go +++ b/api/v1alpha2/proxmoxmachine_types.go @@ -65,6 +65,12 @@ type ProxmoxMachineChecks struct { type ProxmoxMachineSpec struct { VirtualMachineCloneSpec `json:",inline"` + // failureDomain is the failure domain the machine is placed in. + // This field is part of the Cluster API InfrastructureMachine contract + // and is set by the CAPI machine controller. + // +optional + FailureDomain *string `json:"failureDomain,omitempty"` + // providerID is the virtual machine BIOS UUID formatted as // proxmox://6c3fa683-bef9-4425-b413-eaa45a9d6191 // +optional diff --git a/api/v1alpha2/zz_generated.deepcopy.go b/api/v1alpha2/zz_generated.deepcopy.go index 8606a5e41..4e5716df3 100644 --- a/api/v1alpha2/zz_generated.deepcopy.go +++ b/api/v1alpha2/zz_generated.deepcopy.go @@ -504,6 +504,13 @@ func (in *ProxmoxClusterStatus) DeepCopyInto(out *ProxmoxClusterStatus) { *out = new(NodeLocations) (*in).DeepCopyInto(*out) } + if in.FailureDomains != nil { + in, out := &in.FailureDomains, &out.FailureDomains + *out = make([]v1beta2.FailureDomain, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ProxmoxClusterStatus. @@ -715,6 +722,11 @@ func (in *ProxmoxMachineList) DeepCopyObject() runtime.Object { func (in *ProxmoxMachineSpec) DeepCopyInto(out *ProxmoxMachineSpec) { *out = *in in.VirtualMachineCloneSpec.DeepCopyInto(&out.VirtualMachineCloneSpec) + if in.FailureDomain != nil { + in, out := &in.FailureDomain, &out.FailureDomain + *out = new(string) + **out = **in + } if in.VirtualMachineID != nil { in, out := &in.VirtualMachineID, &out.VirtualMachineID *out = new(int64) @@ -1277,6 +1289,16 @@ func (in *ZoneConfigSpec) DeepCopyInto(out *ZoneConfigSpec) { *out = make([]string, len(*in)) copy(*out, *in) } + if in.Nodes != nil { + in, out := &in.Nodes, &out.Nodes + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.ControlPlane != nil { + in, out := &in.ControlPlane, &out.ControlPlane + *out = new(bool) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ZoneConfigSpec. diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxclusters.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxclusters.yaml index 1fdc4c576..88fd21cec 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxclusters.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxclusters.yaml @@ -1244,6 +1244,11 @@ spec: description: ZoneConfigSpec is the Network Configuration for further deployment zones. properties: + controlPlane: + description: |- + controlPlane indicates whether this zone is eligible for control plane machines. + Defaults to true when not set. + type: boolean dnsServers: description: dnsServers contains information about nameservers used by the machines in this zone. @@ -1328,6 +1333,15 @@ spec: x-kubernetes-validations: - message: IPv6Config addresses must be provided rule: self.addresses.size() > 0 + nodes: + description: |- + nodes specifies the Proxmox nodes that belong to this zone. + When set, machines assigned to this failure domain will only + be placed on these nodes. + items: + type: string + type: array + x-kubernetes-list-type: set zone: description: zone is the name of your deployment zone. pattern: ^[a-z0-9A-Z](?:[a-z0-9A-Z-_.]{0,61}[a-z0-9A-Z])?$ @@ -1412,6 +1426,38 @@ spec: x-kubernetes-list-map-keys: - type x-kubernetes-list-type: map + failureDomains: + description: |- + failureDomains is a slice of failure domains synced from zone configurations. + This field is part of the Cluster API contract and is used by KubeadmControlPlane + to distribute control plane machines across zones. + items: + description: |- + FailureDomain is the Schema for Cluster API failure domains. + It allows controllers to understand how many failure domains a cluster can optionally span across. + properties: + attributes: + additionalProperties: + type: string + description: attributes is a free form map of attributes an + infrastructure provider might use or require. + type: object + controlPlane: + description: controlPlane determines if this failure domain + is suitable for use by control plane machines. + type: boolean + name: + description: name is the name of the failure domain. + maxLength: 256 + minLength: 1 + type: string + required: + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map inClusterIPPoolRef: description: inClusterIPPoolRef is the reference to the created in-cluster IP pool. diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxclustertemplates.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxclustertemplates.yaml index d80c57869..56af4c86f 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxclustertemplates.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxclustertemplates.yaml @@ -1128,6 +1128,11 @@ spec: description: ZoneConfigSpec is the Network Configuration for further deployment zones. properties: + controlPlane: + description: |- + controlPlane indicates whether this zone is eligible for control plane machines. + Defaults to true when not set. + type: boolean dnsServers: description: dnsServers contains information about nameservers used by the machines in this zone. @@ -1214,6 +1219,15 @@ spec: x-kubernetes-validations: - message: IPv6Config addresses must be provided rule: self.addresses.size() > 0 + nodes: + description: |- + nodes specifies the Proxmox nodes that belong to this zone. + When set, machines assigned to this failure domain will only + be placed on these nodes. + items: + type: string + type: array + x-kubernetes-list-type: set zone: description: zone is the name of your deployment zone. pattern: ^[a-z0-9A-Z](?:[a-z0-9A-Z-_.]{0,61}[a-z0-9A-Z])?$ diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxmachines.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxmachines.yaml index 6b391edc9..110129ce5 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxmachines.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxmachines.yaml @@ -978,6 +978,12 @@ spec: - message: Value is immutable rule: self == oldSelf type: object + failureDomain: + description: |- + failureDomain is the failure domain the machine is placed in. + This field is part of the Cluster API InfrastructureMachine contract + and is set by the CAPI machine controller. + type: string format: description: format for file storage. Only valid for full clone. enum: diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxmachinetemplates.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxmachinetemplates.yaml index 220fca029..ba6e12a69 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxmachinetemplates.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxmachinetemplates.yaml @@ -840,6 +840,12 @@ spec: - message: Value is immutable rule: self == oldSelf type: object + failureDomain: + description: |- + failureDomain is the failure domain the machine is placed in. + This field is part of the Cluster API InfrastructureMachine contract + and is set by the CAPI machine controller. + type: string format: description: format for file storage. Only valid for full clone. diff --git a/docs/advanced-setups.md b/docs/advanced-setups.md index b7c15703d..e88199419 100644 --- a/docs/advanced-setups.md +++ b/docs/advanced-setups.md @@ -278,6 +278,95 @@ volumes: ``` +## Failure Domains (Zone-Based Placement) + +CAPMOX zones can be mapped to CAPI failure domains, enabling KubeadmControlPlane +to automatically distribute control plane nodes across zones and MachineDeployments +to target specific zones. + +### Configuration + +Add `nodes` and optionally `controlPlane` to each zone in `ProxmoxCluster.spec.zoneConfig`: + +```yaml +apiVersion: infrastructure.cluster.x-k8s.io/v1alpha2 +kind: ProxmoxCluster +metadata: + name: my-cluster +spec: + allowedNodes: + - pve1 + - pve2 + - pve3 + - pve4 + ipv4Config: + addresses: ["10.0.0.10-10.0.0.50"] + prefix: 24 + gateway: "10.0.0.1" + dnsServers: ["8.8.8.8"] + zoneConfig: + - zone: "rack-a" + nodes: ["pve1", "pve2"] + ipv4Config: + addresses: ["10.0.1.10-10.0.1.30"] + prefix: 24 + gateway: "10.0.1.1" + dnsServers: ["8.8.8.8"] + - zone: "rack-b" + nodes: ["pve3", "pve4"] + controlPlane: false # workers only + ipv4Config: + addresses: ["10.0.2.10-10.0.2.30"] + prefix: 24 + gateway: "10.0.2.1" + dnsServers: ["8.8.8.8"] +``` + +| Field | Type | Default | Description | +|-------|------|---------|-------------| +| `nodes` | `[]string` | — | Proxmox nodes belonging to this zone. VMs assigned to this failure domain are scheduled only on these nodes. | +| `controlPlane` | `*bool` | `true` | Whether this zone is eligible for control plane machines. Set to `false` for worker-only zones. | + +### How it works + +When `zoneConfig` entries include a `nodes` field, the cluster controller populates +`status.failureDomains` on the ProxmoxCluster. The CAPI Cluster controller copies +these to `Cluster.status.failureDomains`, and KubeadmControlPlane uses them to +distribute control plane replicas round-robin across eligible zones. + +For workers, set `failureDomain` on MachineDeployments to pin them to a specific zone: + +```yaml +apiVersion: cluster.x-k8s.io/v1beta2 +kind: MachineDeployment +metadata: + name: workers-rack-a +spec: + clusterName: my-cluster + replicas: 3 + template: + spec: + clusterName: my-cluster + failureDomain: "rack-a" + # ... +``` + +Each zone gets its own IPAM pool (e.g. `my-cluster-rack-a-v4-icip`). Machines +assigned to a zone receive IP addresses from that zone's pool automatically. + +### Behavior without zones + +When no `zoneConfig` entries are present, `status.failureDomains` is not set. +KubeadmControlPlane does not perform failure domain placement. Existing clusters +without zones continue to work exactly as before. + +### Limitations + +- Failure domains operate within a single Proxmox cluster (single API endpoint). + For multi-datacenter deployments spanning separate Proxmox clusters, see [#370](https://github.com/ionos-cloud/cluster-api-provider-proxmox/issues/370). +- Zones without a `nodes` list appear as failure domains but do not restrict VM placement + to specific Proxmox nodes. The IPAM pool selection still works per-zone. + ## Custom Allowed Nodes for ProxmoxMachine Previously, the Proxmox nodes that will host the Machines are defined in `ProxmoxCluster.spec.allowedNodes`, that config restrict us from placing some set of machines into some specific nodes. diff --git a/internal/controller/proxmoxcluster_controller.go b/internal/controller/proxmoxcluster_controller.go index 1862363be..12cae9c28 100644 --- a/internal/controller/proxmoxcluster_controller.go +++ b/internal/controller/proxmoxcluster_controller.go @@ -20,6 +20,7 @@ package controller import ( "context" "fmt" + "sort" "time" "github.com/pkg/errors" @@ -223,6 +224,8 @@ func (r *ProxmoxClusterReconciler) reconcileNormal(ctx context.Context, clusterS return res, nil } + r.reconcileFailureDomains(clusterScope) + if err := r.reconcileNormalCredentialsSecret(ctx, clusterScope); err != nil { conditions.Set(clusterScope.ProxmoxCluster, metav1.Condition{ Type: infrav1.ProxmoxClusterProxmoxAvailableCondition, @@ -310,6 +313,33 @@ func (r *ProxmoxClusterReconciler) reconcileIPAM(ctx context.Context, clusterSco return reconcile.Result{}, nil } +// reconcileFailureDomains populates Status.FailureDomains from ZoneConfigs. +// When no zones are configured, FailureDomains is set to nil, preserving +// backwards compatibility (KCP will not attempt failure domain placement). +func (r *ProxmoxClusterReconciler) reconcileFailureDomains(clusterScope *scope.ClusterScope) { + pc := clusterScope.ProxmoxCluster + if len(pc.Spec.ZoneConfigs) == 0 { + pc.Status.FailureDomains = nil + return + } + + faildoms := make([]clusterv1.FailureDomain, 0, len(pc.Spec.ZoneConfigs)) + for _, zc := range pc.Spec.ZoneConfigs { + zoneName := ptr.Deref(zc.Zone, "") + if zoneName == "" { + continue + } + + faildoms = append(faildoms, clusterv1.FailureDomain{ + Name: zoneName, + ControlPlane: ptr.To(ptr.Deref(zc.ControlPlane, true)), + }) + } + + sort.Slice(faildoms, func(i, j int) bool { return faildoms[i].Name < faildoms[j].Name }) + pc.Status.FailureDomains = faildoms +} + func (r *ProxmoxClusterReconciler) reconcileNormalCredentialsSecret(ctx context.Context, clusterScope *scope.ClusterScope) error { proxmoxCluster := clusterScope.ProxmoxCluster if !hasCredentialsRef(proxmoxCluster) { diff --git a/internal/controller/proxmoxmachine_controller.go b/internal/controller/proxmoxmachine_controller.go index 354acb2dc..d83ad1e2b 100644 --- a/internal/controller/proxmoxmachine_controller.go +++ b/internal/controller/proxmoxmachine_controller.go @@ -18,6 +18,8 @@ package controller import ( "context" + "fmt" + "time" "github.com/go-logr/logr" "github.com/pkg/errors" @@ -213,6 +215,21 @@ func (r *ProxmoxMachineReconciler) reconcileNormal(ctx context.Context, machineS } } + // If the CAPI Machine has a failure domain set, restrict allowed nodes + // to those belonging to that zone and set the effective zone for IPAM. + if faildom := machineScope.Machine.Spec.FailureDomain; faildom != "" { + if err := machineScope.ApplyFailureDomainNodes(faildom); err != nil { + machineScope.Logger.Error(err, "failure domain not found", "failureDomain", faildom) + conditions.Set(machineScope.ProxmoxMachine, metav1.Condition{ + Type: infrav1.ProxmoxMachineVirtualMachineProvisionedCondition, + Status: metav1.ConditionFalse, + Reason: infrav1.ProxmoxMachineVirtualMachineProvisionedFailureDomainNotReadyReason, + Message: fmt.Sprintf("failure domain %q not found in ProxmoxCluster zones", faildom), + }) + return ctrl.Result{RequeueAfter: 30 * time.Second}, nil + } + } + // find the vm // Get or create the VM. vm, err := vmservice.ReconcileVM(ctx, machineScope) @@ -238,7 +255,7 @@ func (r *ProxmoxMachineReconciler) reconcileNormal(ctx context.Context, machineS // Set proxmox deployment zone for label selectors. labels := machineScope.ProxmoxMachine.GetLabels() labels[infrav1.ProxmoxZoneLabel] = - ptr.Deref(machineScope.ProxmoxMachine.Spec.Network.Zone, "default") + ptr.Deref(machineScope.GetEffectiveZone(), "default") machineScope.ProxmoxMachine.SetLabels(labels) machineScope.SetReady() diff --git a/internal/service/scheduler/vmscheduler.go b/internal/service/scheduler/vmscheduler.go index d2292e4c6..20bc51558 100644 --- a/internal/service/scheduler/vmscheduler.go +++ b/internal/service/scheduler/vmscheduler.go @@ -48,17 +48,16 @@ func (err InsufficientMemoryError) Error() string { // It requires the machine's ProxmoxCluster to have at least 1 allowed node. func ScheduleVM(ctx context.Context, machineScope *scope.MachineScope) (string, error) { client := machineScope.InfraCluster.ProxmoxClient - // Use the default allowed nodes from the ProxmoxCluster. - allowedNodes := machineScope.InfraCluster.ProxmoxCluster.Spec.AllowedNodes schedulerHints := machineScope.InfraCluster.ProxmoxCluster.Spec.SchedulerHints locations := machineScope.InfraCluster.ProxmoxCluster.Status.NodeLocations.Workers if util.IsControlPlaneMachine(machineScope.Machine) { locations = machineScope.InfraCluster.ProxmoxCluster.Status.NodeLocations.ControlPlane } - // If ProxmoxMachine defines allowedNodes use them instead - if len(machineScope.ProxmoxMachine.Spec.AllowedNodes) > 0 { - allowedNodes = machineScope.ProxmoxMachine.Spec.AllowedNodes + // Use effective allowed nodes (failure-domain override > machine spec > cluster spec). + allowedNodes := machineScope.GetEffectiveAllowedNodes() + if len(allowedNodes) == 0 { + allowedNodes = machineScope.InfraCluster.ProxmoxCluster.Spec.AllowedNodes } return selectNode(ctx, client, machineScope.ProxmoxMachine, locations, allowedNodes, schedulerHints) diff --git a/internal/service/scheduler/vmscheduler_test.go b/internal/service/scheduler/vmscheduler_test.go index af806dc69..c3c0eacb0 100644 --- a/internal/service/scheduler/vmscheduler_test.go +++ b/internal/service/scheduler/vmscheduler_test.go @@ -250,6 +250,61 @@ func TestScheduleVM(t *testing.T) { require.Equal(t, "pve2", node) } +func TestScheduleVMWithEffectiveAllowedNodes(t *testing.T) { + ctrlClient := setupClient() + + proxmoxCluster := infrav1.ProxmoxCluster{ + ObjectMeta: metav1.ObjectMeta{Name: "bar"}, + Spec: infrav1.ProxmoxClusterSpec{ + AllowedNodes: []string{"pve1", "pve2", "pve3"}, + }, + Status: infrav1.ProxmoxClusterStatus{ + NodeLocations: &infrav1.NodeLocations{ + Workers: []infrav1.NodeLocation{}, + }, + }, + } + require.NoError(t, ctrlClient.Create(context.Background(), &proxmoxCluster)) + + proxmoxMachine := &infrav1.ProxmoxMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "foo-machine", + Labels: map[string]string{"cluster.x-k8s.io/cluster-name": "bar"}, + }, + Spec: infrav1.ProxmoxMachineSpec{ + MemoryMiB: ptr.To(int32(10)), + }, + } + + fakeProxmoxClient := proxmoxtest.NewMockClient(t) + cluster := &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{Name: "bar", Namespace: "default"}, + } + machineScope, err := scope.NewMachineScope(scope.MachineScopeParams{ + Client: ctrlClient, + Machine: &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "foo-machine", Namespace: "default"}}, + Cluster: cluster, + InfraCluster: &scope.ClusterScope{ + Cluster: cluster, + ProxmoxCluster: &proxmoxCluster, + ProxmoxClient: fakeProxmoxClient, + }, + ProxmoxMachine: proxmoxMachine, + IPAMHelper: &ipam.Helper{}, + }) + require.NoError(t, err) + + // Set effective allowed nodes to only pve3 (simulating failure domain filtering). + machineScope.SetEffectiveAllowedNodes([]string{"pve3"}) + + // Only pve3 should be queried. + fakeProxmoxClient.EXPECT().GetReservableMemoryBytes(context.Background(), "pve3", int64(100)).Return(miBytes(60), nil) + + node, err := ScheduleVM(context.Background(), machineScope) + require.NoError(t, err) + require.Equal(t, "pve3", node) +} + func TestInsufficientMemoryError_Error(t *testing.T) { err := InsufficientMemoryError{ node: "pve1", diff --git a/internal/service/vmservice/helpers_test.go b/internal/service/vmservice/helpers_test.go index 155acc6b6..839b63c78 100644 --- a/internal/service/vmservice/helpers_test.go +++ b/internal/service/vmservice/helpers_test.go @@ -327,7 +327,7 @@ func createNetworkSpecForMachine(t *testing.T, c client.Client, machineScope *sc // Can't hurt to create ippools here createIPPools(t, c, machineScope) - defaultPools, _ := machineScope.IPAMHelper.GetInClusterPools(context.Background(), machineScope.ProxmoxMachine) + defaultPools, _ := machineScope.IPAMHelper.GetInClusterPools(context.Background(), machineScope.ProxmoxMachine, machineScope.GetEffectiveZone()) i := 0 // counter for ipPrefix variadic argument // Create the pools sequentially by ref for _, device := range ptr.Deref(machineScope.ProxmoxMachine.Spec.Network, infrav1.NetworkSpec{}).NetworkDevices { @@ -419,7 +419,7 @@ func isDefaultPool(machineScope *scope.MachineScope, pool corev1.TypedLocalObjec func getDefaultPoolRefs(machineScope *scope.MachineScope) infrav1.InClusterZoneRef { cluster := machineScope.InfraCluster.ProxmoxCluster - zone := ptr.Deref(machineScope.ProxmoxMachine.Spec.Network.Zone, "default") + zone := ptr.Deref(machineScope.GetEffectiveZone(), "default") zoneIndex := slices.IndexFunc(cluster.Status.InClusterZoneRef, func(z infrav1.InClusterZoneRef) bool { return zone == *z.Zone }) diff --git a/internal/service/vmservice/utils.go b/internal/service/vmservice/utils.go index d2b09ec0c..dff14de1d 100644 --- a/internal/service/vmservice/utils.go +++ b/internal/service/vmservice/utils.go @@ -50,7 +50,7 @@ func GetInClusterIPPoolRefs(ctx context.Context, machineScope *scope.MachineScop IPv6 *corev1.TypedLocalObjectReference } - pools, err := machineScope.IPAMHelper.GetInClusterPools(ctx, machineScope.ProxmoxMachine) + pools, err := machineScope.IPAMHelper.GetInClusterPools(ctx, machineScope.ProxmoxMachine, machineScope.GetEffectiveZone()) if err != nil { return ret, err } diff --git a/internal/service/vmservice/vm.go b/internal/service/vmservice/vm.go index 2d70c2fb7..21553c4d0 100644 --- a/internal/service/vmservice/vm.go +++ b/internal/service/vmservice/vm.go @@ -464,7 +464,7 @@ func createVM(ctx context.Context, scope *scope.MachineScope) (proxmox.VMCloneRe scope.InfraCluster.ProxmoxCluster.Status.NodeLocations = new(infrav1.NodeLocations) } - if len(scope.InfraCluster.ProxmoxCluster.Spec.AllowedNodes) > 0 || len(scope.ProxmoxMachine.Spec.AllowedNodes) > 0 { + if len(scope.GetEffectiveAllowedNodes()) > 0 || len(scope.InfraCluster.ProxmoxCluster.Spec.AllowedNodes) > 0 { var err error options.Target, err = selectNextNode(ctx, scope) if err != nil { @@ -516,6 +516,7 @@ func createVM(ctx context.Context, scope *scope.MachineScope) (proxmox.VMCloneRe scope.InfraCluster.ProxmoxCluster.AddNodeLocation(infrav1.NodeLocation{ Machine: corev1.LocalObjectReference{Name: options.Name}, Node: node, + Zone: scope.GetEffectiveZone(), }, util.IsControlPlaneMachine(scope.Machine)) return res, scope.InfraCluster.PatchObject() diff --git a/pkg/kubernetes/ipam/ipam.go b/pkg/kubernetes/ipam/ipam.go index 35636c2cb..1e07b675e 100644 --- a/pkg/kubernetes/ipam/ipam.go +++ b/pkg/kubernetes/ipam/ipam.go @@ -151,8 +151,9 @@ func (h *Helper) poolFromObjectRef(ctx context.Context, o interface{}, namespace } // GetInClusterPools returns the IPPools belonging to the ProxmoxCluster relative to its Zone. +// The zoneOverride parameter, when non-nil, takes precedence over the machine's Spec.Network.Zone. // TODO: streamline codeflow (unify GetIPPools). -func (h *Helper) GetInClusterPools(ctx context.Context, moxm *infrav1.ProxmoxMachine) ( +func (h *Helper) GetInClusterPools(ctx context.Context, moxm *infrav1.ProxmoxMachine, zoneOverride infrav1.Zone) ( struct { Zone infrav1.Zone IPv4 *struct { @@ -178,7 +179,13 @@ func (h *Helper) GetInClusterPools(ctx context.Context, moxm *infrav1.ProxmoxMac namespace := moxm.ObjectMeta.Namespace - zone := ptr.To(ptr.Deref(moxm.Spec.Network.Zone, "default")) + // Use zoneOverride if provided (from failure domain), else fall back to spec. + specZone := "default" + if moxm.Spec.Network != nil && moxm.Spec.Network.Zone != nil { + specZone = *moxm.Spec.Network.Zone + } + + zone := ptr.To(ptr.Deref(zoneOverride, specZone)) zoneIndex := slices.IndexFunc(h.cluster.Status.InClusterZoneRef, func(z infrav1.InClusterZoneRef) bool { return ptr.Equal(zone, z.Zone) }) diff --git a/pkg/scope/machine.go b/pkg/scope/machine.go index d314ca16e..89170107f 100644 --- a/pkg/scope/machine.go +++ b/pkg/scope/machine.go @@ -61,6 +61,14 @@ type MachineScope struct { ProxmoxMachine *infrav1.ProxmoxMachine IPAMHelper *ipam.Helper VirtualMachine *proxmox.VirtualMachine + + // effectiveAllowedNodes overrides AllowedNodes when a failure domain + // is active. nil means "use the spec value". + effectiveAllowedNodes []string + + // effectiveZone overrides Network.Zone when a failure domain is active. + // nil means "use the spec value". + effectiveZone *string } // NewMachineScope creates a new MachineScope from the supplied parameters. @@ -252,3 +260,63 @@ func (m *MachineScope) SkipCloudInitCheck() bool { return false } + +// SetEffectiveAllowedNodes sets an in-memory override for AllowedNodes, +// used when a failure domain restricts placement to specific Proxmox nodes. +func (m *MachineScope) SetEffectiveAllowedNodes(nodes []string) { + m.effectiveAllowedNodes = nodes +} + +// GetEffectiveAllowedNodes returns the effective AllowedNodes, preferring +// the failure-domain override, then ProxmoxMachine spec, then nil. +func (m *MachineScope) GetEffectiveAllowedNodes() []string { + if m.effectiveAllowedNodes != nil { + return m.effectiveAllowedNodes + } + + return m.ProxmoxMachine.Spec.AllowedNodes +} + +// ApplyFailureDomainNodes scopes the machine's effective AllowedNodes to +// the nodes declared in the given failure domain (zone). If the zone has +// no explicit nodes configured, it is a no-op. +// Returns an error if no matching zone is found in the ProxmoxCluster spec. +func (m *MachineScope) ApplyFailureDomainNodes(failureDomain string) error { + // Set the effective zone for IPAM pool selection (does not mutate spec). + m.SetEffectiveZone(ptr.To(failureDomain)) + + nodes := m.InfraCluster.ProxmoxCluster.GetZoneNodes(failureDomain) + if nodes != nil { + m.SetEffectiveAllowedNodes(nodes) + return nil + } + + // Check if the zone exists at all (may have no explicit node list). + for _, zc := range m.InfraCluster.ProxmoxCluster.Spec.ZoneConfigs { + if ptr.Deref(zc.Zone, "") == failureDomain { + return nil + } + } + + return fmt.Errorf("zone %q not configured in ProxmoxCluster", failureDomain) +} + +// SetEffectiveZone sets an in-memory zone override for IPAM pool selection, +// used when a failure domain is active. Does not mutate ProxmoxMachine spec. +func (m *MachineScope) SetEffectiveZone(zone *string) { + m.effectiveZone = zone +} + +// GetEffectiveZone returns the effective zone, preferring the failure-domain +// override, then Spec.Network.Zone, then "default". +func (m *MachineScope) GetEffectiveZone() *string { + if m.effectiveZone != nil { + return m.effectiveZone + } + + if m.ProxmoxMachine.Spec.Network != nil { + return m.ProxmoxMachine.Spec.Network.Zone + } + + return nil +} diff --git a/pkg/scope/machine_test.go b/pkg/scope/machine_test.go index 6b7ce584e..3f00cc469 100644 --- a/pkg/scope/machine_test.go +++ b/pkg/scope/machine_test.go @@ -174,6 +174,69 @@ func TestMachineScope_SkipCloudInit(t *testing.T) { require.False(t, scope.SkipQemuGuestCheck()) } +func TestMachineScope_GetEffectiveAllowedNodes(t *testing.T) { + scope := MachineScope{ + ProxmoxMachine: &infrav1.ProxmoxMachine{ + Spec: infrav1.ProxmoxMachineSpec{ + AllowedNodes: []string{"spec-node1"}, + }, + }, + } + + // Without override, returns spec AllowedNodes. + require.Equal(t, []string{"spec-node1"}, scope.GetEffectiveAllowedNodes()) + + // With override, returns effective nodes. + scope.SetEffectiveAllowedNodes([]string{"fd-node1", "fd-node2"}) + require.Equal(t, []string{"fd-node1", "fd-node2"}, scope.GetEffectiveAllowedNodes()) +} + +func TestMachineScope_ApplyFailureDomainNodes(t *testing.T) { + cluster := &infrav1.ProxmoxCluster{ + Spec: infrav1.ProxmoxClusterSpec{ + ZoneConfigs: []infrav1.ZoneConfigSpec{ + { + Zone: ptr.To("zone-a"), + Nodes: []string{"pve1", "pve2"}, + }, + { + Zone: ptr.To("zone-b"), + // No explicit nodes. + }, + }, + }, + } + + t.Run("zone with nodes", func(t *testing.T) { + scope := MachineScope{ + InfraCluster: &ClusterScope{ProxmoxCluster: cluster}, + ProxmoxMachine: &infrav1.ProxmoxMachine{}, + } + require.NoError(t, scope.ApplyFailureDomainNodes("zone-a")) + require.Equal(t, []string{"pve1", "pve2"}, scope.GetEffectiveAllowedNodes()) + }) + + t.Run("zone without nodes", func(t *testing.T) { + scope := MachineScope{ + InfraCluster: &ClusterScope{ProxmoxCluster: cluster}, + ProxmoxMachine: &infrav1.ProxmoxMachine{}, + } + require.NoError(t, scope.ApplyFailureDomainNodes("zone-b")) + // No override set, falls back to spec AllowedNodes. + require.Nil(t, scope.GetEffectiveAllowedNodes()) + }) + + t.Run("zone not found", func(t *testing.T) { + scope := MachineScope{ + InfraCluster: &ClusterScope{ProxmoxCluster: cluster}, + ProxmoxMachine: &infrav1.ProxmoxMachine{}, + } + err := scope.ApplyFailureDomainNodes("zone-c") + require.Error(t, err) + require.Contains(t, err.Error(), "zone-c") + }) +} + func TestMachineScope_SkipQemuDisablesCloudInitCheck(t *testing.T) { p := infrav1.ProxmoxMachine{ Spec: infrav1.ProxmoxMachineSpec{