feat: add topologyspreadconstraints for service (#100)

gshilei · web-flow · commit e849dcfd3a69 · 2024-11-21T16:20:12.000+08:00
diff --git a/modules/service/common.k b/modules/service/common.k
@@ -1,5 +1,6 @@
 import container as c
 import secret as sec
+import topologyspreadconstraint as tp
 import kam.v1.workload as wl
 
 schema WorkloadBase(wl.Workload):
@@ -30,6 +31,9 @@ schema WorkloadBase(wl.Workload):
     # The number of containers that should be ran.
     replicas?:                   int
 
+    # TopologySpreadConstraint describes how a group of pods ought to spread across topology domains.
+    topologySpreadConstraints?:  {str:tp.TopologySpreadConstraint}
+
     ###### Other metadata info
     # Labels and annotations can be used to attach arbitrary metadata as key-value pairs to resources.
     labels?:                    {str:str}
diff --git a/modules/service/kcl.mod b/modules/service/kcl.mod
@@ -1,6 +1,6 @@
 [package]
 name = "service"
-version = "0.2.0"
+version = "0.2.1"
 
 [dependencies]
 kam = { git = "https://github.com/KusionStack/kam.git", tag = "0.2.0" }
diff --git a/modules/service/src/service.go b/modules/service/src/service.go
@@ -59,6 +59,8 @@ func (svc *Service) Generate(_ context.Context, request *module.GeneratorRequest
 		return nil, err
 	}
 
+	topologySpreadConstraints := handleTopologySpreadConstraints(svc.TopologySpreadConstraints)
+
 	res := make([]kusionv1.Resource, 0)
 	// Create ConfigMap objects based on the App's configuration.
 	for _, cm := range configMaps {
@@ -89,8 +91,9 @@ func (svc *Service) Generate(_ context.Context, request *module.GeneratorRequest
 			Annotations: annotations,
 		},
 		Spec: corev1.PodSpec{
-			Containers: containers,
-			Volumes:    volumes,
+			TopologySpreadConstraints: topologySpreadConstraints,
+			Containers:                containers,
+			Volumes:                   volumes,
 		},
 	}
 
diff --git a/modules/service/src/type.go b/modules/service/src/type.go
@@ -1,6 +1,10 @@
 package main
 
-import "gopkg.in/yaml.v2"
+import (
+	"gopkg.in/yaml.v2"
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
 
 const (
 	BuiltinModulePrefix = ""
@@ -10,6 +14,49 @@ const (
 	TypeTCP             = BuiltinModulePrefix + ProbePrefix + "Tcp"
 )
 
+// LabelSelector is a label query over a set of resources.
+type LabelSelector struct {
+	// matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
+	// map is equivalent to an element of matchExpressions, whose key field is "key", the
+	// operator is "In", and the values array contains only "value".
+	MatchLabels map[string]string `yaml:"matchLabels,omitempty" json:"matchLabels,omitempty"`
+	// matchExpressions is a list of label selector requirements.
+	MatchExpressions []LabelSelectorRequirement `yaml:"matchExpressions,omitempty" json:"matchExpressions,omitempty"`
+}
+
+// LabelSelectorRequirement is a selector that contains values, a key, and an operator that relates the key and values.
+type LabelSelectorRequirement struct {
+	// key is the label key that the selector applies to.
+	Key string `yaml:"key" json:"key"`
+	// operator represents a key's relationship to a set of values.
+	// Valid operators are In, NotIn, Exists and DoesNotExist.
+	Operator metav1.LabelSelectorOperator `yaml:"operator" json:"operator"`
+	// values is an array of string values. If the operator is In or NotIn,
+	// the values array must be non-empty. If the operator is Exists or DoesNotExist,
+	// the values array must be empty. This array is replaced during a strategic merge patch.
+	Values []string `yaml:"values,omitempty" json:"values,omitempty"`
+}
+
+// TopologySpreadConstraint specifies how to spread matching pods among the given topology.
+type TopologySpreadConstraint struct {
+	// MaxSkew describes the degree to which pods may be unevenly distributed.
+	MaxSkew int32 `yaml:"maxSkew" json:"maxSkew"`
+	// TopologyKey is the key of node labels.
+	TopologyKey string `yaml:"topologyKey" json:"topologyKey"`
+	// WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy the spread constraint.
+	WhenUnsatisfiable corev1.UnsatisfiableConstraintAction `yaml:"whenUnsatisfiable" json:"whenUnsatisfiable"`
+	// LabelSelector is used to find matching pods.
+	LabelSelector *LabelSelector `yaml:"labelSelector,omitempty" json:"labelSelector,omitempty"`
+	// MinDomains indicates a minimum number of eligible domains.
+	MinDomains *int32 `yaml:"minDomains,omitempty" json:"minDomains,omitempty"`
+	// NodeAffinityPolicy indicates how we will treat Pod's nodeAffinity/nodeSelector when calculating pod topology spread skew.
+	NodeAffinityPolicy *corev1.NodeInclusionPolicy `yaml:"nodeAffinityPolicy,omitempty" json:"nodeAffinityPolicy,omitempty"`
+	// NodeTaintsPolicy indicates how we will treat node taints when calculating pod topology spread skew.
+	NodeTaintsPolicy *corev1.NodeInclusionPolicy `yaml:"nodeTaintsPolicy,omitempty" json:"nodeTaintsPolicy,omitempty"`
+	// MatchLabelKeys is a set of pod label keys to select the pods over which spreading will be calculated.
+	MatchLabelKeys []string `yaml:"matchLabelKeys,omitempty" json:"matchLabelKeys,omitempty"`
+}
+
 // Container describes how the App's tasks are expected to be run.
 type Container struct {
 	// Image to run for this container
@@ -179,6 +226,9 @@ type Base struct {
 	// Labels and Annotations can be used to attach arbitrary metadata as key-value pairs to resources.
 	Labels      map[string]string `json:"labels,omitempty" yaml:"labels,omitempty"`
 	Annotations map[string]string `json:"annotations,omitempty" yaml:"annotations,omitempty"`
+	// TopologySpreadConstraints describes how a group of pods ought to spread across topology domains.
+	// Scheduler will schedule pods in a way which abides by the constraints. All topologySpreadConstraints are ANDed.
+	TopologySpreadConstraints map[string]TopologySpreadConstraint `json:"topologySpreadConstraints,omitempty" yaml:"topologySpreadConstraints,omitempty"`
 }
 
 type ServiceType string
diff --git a/modules/service/src/workload_base.go b/modules/service/src/workload_base.go
@@ -449,3 +449,39 @@ func parseSecretReference(ref string) (result secretReference, _ bool, _ error)
 
 	return result, true, nil
 }
+
+func handleTopologySpreadConstraints(tps map[string]TopologySpreadConstraint) []corev1.TopologySpreadConstraint {
+	var topologySpreadConstraints []corev1.TopologySpreadConstraint
+	if len(tps) == 0 {
+		return nil
+	}
+
+	for _, v := range tps {
+		tp := corev1.TopologySpreadConstraint{
+			MaxSkew:            v.MaxSkew,
+			TopologyKey:        v.TopologyKey,
+			WhenUnsatisfiable:  v.WhenUnsatisfiable,
+			MinDomains:         v.MinDomains,
+			NodeAffinityPolicy: v.NodeAffinityPolicy,
+			NodeTaintsPolicy:   v.NodeTaintsPolicy,
+			MatchLabelKeys:     v.MatchLabelKeys,
+		}
+
+		if v.LabelSelector != nil {
+			var matchExpressions []metav1.LabelSelectorRequirement
+			for _, m := range v.LabelSelector.MatchExpressions {
+				matchExpressions = append(matchExpressions, metav1.LabelSelectorRequirement{
+					Key:      m.Key,
+					Operator: m.Operator,
+					Values:   m.Values,
+				})
+			}
+			tp.LabelSelector = &metav1.LabelSelector{
+				MatchLabels:      v.LabelSelector.MatchLabels,
+				MatchExpressions: matchExpressions,
+			}
+		}
+		topologySpreadConstraints = append(topologySpreadConstraints, tp)
+	}
+	return topologySpreadConstraints
+}
diff --git a/modules/service/topologyspreadconstraint/label_selector.k b/modules/service/topologyspreadconstraint/label_selector.k
@@ -0,0 +1,13 @@
+schema LabelSelector:
+    """ A label selector is a label query over a set of resources. The result of matchLabels and matchExpressions are ANDed.
+    An empty label selector matches all objects. A null label selector matches no objects.
+    """
+
+    # MatchExpressions is a list of label selector requirements. The requirements are ANDed.
+    matchExpressions?:                      [LabelSelectorRequirement]
+
+    # matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions,
+    # whose key field is "key", the operator is "In", and the values array contains only "value". The requirements are ANDed.
+    matchLabels?:                           {str:str}
+
+
diff --git a/modules/service/topologyspreadconstraint/label_selector_requirement.k b/modules/service/topologyspreadconstraint/label_selector_requirement.k
@@ -0,0 +1,16 @@
+schema LabelSelectorRequirement:
+    """ A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values.
+    """
+
+    # Key is the label key that the selector applies to.
+    key:                      str
+
+    # Operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist.
+    operator:                 str
+
+    # Values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator
+    # is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch.
+    values?:                  [str]
+
+    check:
+        operator in ["In", "NotIn", "Exists", "DoesNotExist"], "operator value is invalid"
diff --git a/modules/service/topologyspreadconstraint/topology_spread_constraint.k b/modules/service/topologyspreadconstraint/topology_spread_constraint.k
@@ -0,0 +1,109 @@
+schema TopologySpreadConstraint:
+    """ TopologySpreadConstraint describes how a group of pods ought to spread across topology domains.
+    Scheduler will schedule pods in a way which abides by the constraints. All topologySpreadConstraints are ANDed.
+
+    Attributes
+    ----------
+    maxSkew: int, default is Undefined, required.
+        MaxSkew describes the degree to which pods may be unevenly distributed. When whenUnsatisfiable=DoNotSchedule,it is the
+        maximum permitted difference between the number of matching pods in the target topology and the global minimum. The global
+        minimum is the minimum number of matching pods in an eligible domain or zero if the number of eligible domains is less than
+        MinDomains. For example, in a 3-zone cluster, MaxSkew is set to 1, and pods with the same labelSelector spread as 2/2/1: In
+        this case, the global minimum is 1. | zone1 | zone2 | zone3 | | P P | P P | P | - if MaxSkew is 1, incoming pod can only be
+        scheduled to zone3 to become 2/2/2; scheduling it onto zone1(zone2) would make the ActualSkew(3-1) on zone1(zone2) violate MaxSkew(1).
+        - if MaxSkew is 2, incoming pod can be scheduled onto any zone. When whenUnsatisfiable=ScheduleAnyway, it is used to give higher
+        precedence to topologies that satisfy it. It's a required field. Default value is 1 and 0 is not allowed.
+    topologyKey: str, default is Undefined, required.
+        TopologyKey is the key of node labels. Nodes that have a label with this key and identical values are considered to be in the same
+        topology. We consider each <key, value> as a "bucket", and try to put balanced number of pods into each bucket. We define a domain
+        as a particular instance of a topology. Also, we define an eligible domain as a domain whose nodes meet the requirements of nodeAffinityPolicy
+        and nodeTaintsPolicy. e.g. If TopologyKey is "kubernetes.io/hostname", each Node is a domain of that topology. And, if TopologyKey is
+        "topology.kubernetes.io/zone", each zone is a domain of that topology. It's a required field.
+    whenUnsatisfiable: str, default is Undefined, required.
+        WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy the spread constraint. - DoNotSchedule (default) tells the
+        scheduler not to schedule it. - ScheduleAnyway tells the scheduler to schedule the pod in any location, but giving higher precedence
+        to topologies that would help reduce the skew. A constraint is considered "Unsatisfiable" for an incoming pod if and only if every
+        possible node assignment for that pod would violate "MaxSkew" on some topology. For example, in a 3-zone cluster, MaxSkew is set to 1,
+        and pods with the same labelSelector spread as 3/1/1: | zone1 | zone2 | zone3 | | P P P | P | P | If WhenUnsatisfiable is set to DoNotSchedule,
+        incoming pod can only be scheduled to zone2(zone3) to become 3/2/1(3/1/2) as ActualSkew(2-1) on zone2(zone3) satisfies MaxSkew(1). In other words,
+        the cluster can still be imbalanced, but scheduler won't make it more imbalanced. It's a required field.
+        Possible enum values:
+        - `"DoNotSchedule"` instructs the scheduler not to schedule the pod when constraints are not satisfied.
+        - `"ScheduleAnyway"` instructs the scheduler to schedule the pod even if constraints are not satisfied.
+    labelSelector: LabelSelector, default is Undefined, optional.
+        LabelSelector is used to find matching pods. Pods that match this label selector are counted to determine the number of
+        pods in their corresponding topology domain.
+    matchLabelKeys: [str], default is Undefined, optional.
+        MatchLabelKeys is a set of pod label keys to select the pods over which spreading will be calculated. The keys are used
+        to lookup values from the incoming pod labels, those key-value labels are ANDed with labelSelector to select the group
+        of existing pods over which spreading will be calculated for the incoming pod. Keys that don't exist in the incoming pod
+        labels will be ignored. A null or empty list means only match against labelSelector.
+    minDomains: int, default is Undefined, optional.
+        MinDomains indicates a minimum number of eligible domains. When the number of eligible domains with matching topology keys is
+        less than minDomains, Pod Topology Spread treats "global minimum" as 0, and then the calculation of Skew is performed. And when
+        the number of eligible domains with matching topology keys equals or greater than minDomains, this value has no effect on scheduling.
+        As a result, when the number of eligible domains is less than minDomains, scheduler won't schedule more than maxSkew Pods to those
+        domains. If value is nil, the constraint behaves as if MinDomains is equal to 1. Valid values are integers greater than 0. When value
+        is not nil, WhenUnsatisfiable must be DoNotSchedule.
+        For example, in a 3-zone cluster, MaxSkew is set to 2, MinDomains is set to 5 and pods with the same labelSelector spread as
+        2/2/2: | zone1 | zone2 | zone3 | | P P | P P | P P | The number of domains is less than 5(MinDomains), so "global minimum" is treated
+        as 0. In this situation, new pod with the same labelSelector cannot be scheduled, because computed skew will be 3(3 - 0) if new Pod is
+        scheduled to any of the three zones, it will violate MaxSkew.
+        This is a beta field and requires the MinDomainsInPodTopologySpread feature gate to be enabled (enabled by default).
+    nodeAffinityPolicy: str, default is Undefined, optional.
+        NodeAffinityPolicy indicates how we will treat Pod's nodeAffinity/nodeSelector when calculating pod topology spread skew. Options are:
+        - Honor: only nodes matching nodeAffinity/nodeSelector are included in the calculations. - Ignore: nodeAffinity/nodeSelector are ignored.
+        All nodes are included in the calculations.
+        If this value is nil, the behavior is equivalent to the Honor policy. This is a beta-level feature default enabled by the
+        NodeInclusionPolicyInPodTopologySpread feature flag.
+    nodeTaintsPolicy: str, default is Undefined, optional.
+        NodeTaintsPolicy indicates how we will treat node taints when calculating pod topology spread skew. Options are: - Honor: nodes without
+        taints, along with tainted nodes for which the incoming pod has a toleration, are included. - Ignore: node taints are ignored. All nodes
+        are included.
+        If this value is nil, the behavior is equivalent to the Ignore policy. This is a beta-level feature default enabled by the
+        NodeInclusionPolicyInPodTopologySpread feature flag.
+
+    Examples
+    --------
+    import catalog.workload.topologyspreadconstraint as tp
+
+    topologyspreadconstraint = tp.TopologySpreadConstraint {
+        maxSkew: 1
+        topologyKey: "kubernetes.io/hostname"
+        whenUnsatisfiable: "DoNotSchedule"
+    }
+    """
+
+    # MaxSkew describes the degree to which pods may be unevenly distributed.
+    # Must be greater than 0.
+    maxSkew:                      int
+
+    # TopologyKey is the key of node labels.
+    topologyKey:                  str
+
+    # WhenUnsatisfiable indicates how to deal with a pod if it doesn't satisfy the spread constraint.
+    whenUnsatisfiable:            str
+
+    # LabelSelector is used to find matching pods.
+    labelSelector?:               LabelSelector
+
+    # MatchLabelKeys is a set of pod label keys to select the pods over which spreading will be calculated.
+    matchLabelKeys?:              [str]
+
+    # MinDomains indicates a minimum number of eligible domains.
+    minDomains?:                  int
+
+    # NodeAffinityPolicy indicates how we will treat Pod's nodeAffinity/nodeSelector when calculating pod topology spread skew.
+    nodeAffinityPolicy?:          str
+
+    # NodeTaintsPolicy indicates how we will treat node taints when calculating pod topology spread skew.
+    nodeTaintsPolicy?:            str
+
+    check:
+        maxSkew > 0, "maxSkew must be greater than 0"
+        whenUnsatisfiable in ["DoNotSchedule", "ScheduleAnyway"], "whenUnsatisfiable value is invalid"
+        labelSelector if matchLabelKeys, "matchLabelKeys can't be set when labelSelector isn't set"
+        minDomains > 0 if minDomains != Undefined, "minDomains must be greater than 0"
+        nodeAffinityPolicy in ["Ignore", "Honor"] if nodeAffinityPolicy, "nodeAffinityPolicy value is invalid"
+        nodeTaintsPolicy in ["Ignore", "Honor"] if nodeTaintsPolicy, "nodeTaintsPolicy value is invalid"
+