Skip to content

Commit b9f5936

Browse files
L3n41cclaude
andauthored
[CASCL-1304] Fix Fargate profile name in dd-cluster-info ConfigMap (#3000)
* [CASCL-1304] Fix Fargate profile name in dd-cluster-info ConfigMap The dd-cluster-info ConfigMap was writing an empty string as the Fargate profile-name bucket key, because `classifyNodeByLabel` read `eks.amazonaws.com/fargate-profile` from the Node. EKS stamps that label on the Pod scheduled on the Fargate node, not on the Node itself, so the read always returned "". Resolve the profile name by listing Pods filtered server-side with `LabelSelector: "eks.amazonaws.com/fargate-profile"`, building a nodeName→profileName index, and threading it through the classifier. A Fargate node with no Pod yet scheduled keeps falling into the empty-key bucket — acceptable for a one-shot informational snapshot. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * [CASCL-1304] Cover error and empty-NodeName branches of fargateProfilesByNode The patch coverage gate failed at 72.72% (threshold 80%) because three new branches in classify.go were untested: - Classify error-return when fargateProfilesByNode fails. - The empty-NodeName skip path in fargateProfilesByNode. - The List error wrap path in fargateProfilesByNode. Adds two tests that close those branches via fake-clientset reactors and a pending pod fixture, mirroring the existing API-list-error pattern in guess/karpenter_test.go. fargateProfilesByNode now reaches 100% statement coverage and the package totals 93.9%. --------- Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 2c675b3 commit b9f5936

2 files changed

Lines changed: 126 additions & 15 deletions

File tree

cmd/kubectl-datadog/autoscaling/cluster/common/clusterinfo/classify.go

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,12 @@ func Classify(ctx context.Context, in ClassifyInput) (*ClusterInfo, error) {
8888
}
8989
info.ClusterARN, info.Region = describeClusterIdentity(ctx, in.EKS, in.ClusterName)
9090

91-
asgCandidates, err := classifyByLabels(ctx, in.K8sClient, info)
91+
fargateProfileByNode, err := fargateProfilesByNode(ctx, in.K8sClient)
92+
if err != nil {
93+
return nil, err
94+
}
95+
96+
asgCandidates, err := classifyByLabels(ctx, in.K8sClient, fargateProfileByNode, info)
9297
if err != nil {
9398
return nil, err
9499
}
@@ -115,6 +120,35 @@ func Classify(ctx context.Context, in ClassifyInput) (*ClusterInfo, error) {
115120
return info, nil
116121
}
117122

123+
// fargateProfilesByNode lists Pods that carry the
124+
// `eks.amazonaws.com/fargate-profile` label (server-side filtered) and indexes
125+
// the profile name by the Node the Pod is scheduled on. EKS stamps that label
126+
// on the Pod, not on the Node, so the Node listing alone cannot recover the
127+
// profile name. A Fargate node hosts exactly one user pod, so the map is
128+
// uncontested in practice.
129+
func fargateProfilesByNode(ctx context.Context, k8sClient kubernetes.Interface) (map[string]string, error) {
130+
const fargateProfileLabel = "eks.amazonaws.com/fargate-profile"
131+
132+
out := map[string]string{}
133+
p := pager.New(func(ctx context.Context, opts metav1.ListOptions) (runtime.Object, error) {
134+
return k8sClient.CoreV1().Pods(corev1.NamespaceAll).List(ctx, opts)
135+
})
136+
opts := metav1.ListOptions{LabelSelector: fargateProfileLabel}
137+
if err := p.EachListItem(ctx, opts, func(obj runtime.Object) error {
138+
pod := obj.(*corev1.Pod)
139+
if pod.Spec.NodeName == "" {
140+
return nil
141+
}
142+
if profile := pod.Labels[fargateProfileLabel]; profile != "" {
143+
out[pod.Spec.NodeName] = profile
144+
}
145+
return nil
146+
}); err != nil {
147+
return nil, fmt.Errorf("failed to list Fargate pods: %w", err)
148+
}
149+
return out, nil
150+
}
151+
118152
// asgCandidate is a node that needs an AWS API call to determine whether
119153
// it's in an ASG (asg bucket) or not (standalone bucket).
120154
type asgCandidate struct {
@@ -126,15 +160,15 @@ type asgCandidate struct {
126160
// decision tree (Fargate, Karpenter, EKS managed node group, unknown). Nodes
127161
// with an AWS EC2 providerID that don't match any of the above are returned
128162
// as ASG candidates for resolveASGs to bucket as asg or standalone.
129-
func classifyByLabels(ctx context.Context, k8sClient kubernetes.Interface, info *ClusterInfo) ([]asgCandidate, error) {
163+
func classifyByLabels(ctx context.Context, k8sClient kubernetes.Interface, fargateProfileByNode map[string]string, info *ClusterInfo) ([]asgCandidate, error) {
130164
var candidates []asgCandidate
131165

132166
p := pager.New(func(ctx context.Context, opts metav1.ListOptions) (runtime.Object, error) {
133167
return k8sClient.CoreV1().Nodes().List(ctx, opts)
134168
})
135169
if err := p.EachListItem(ctx, metav1.ListOptions{}, func(obj runtime.Object) error {
136170
node := obj.(*corev1.Node)
137-
if mgr, entity, ok := classifyNodeByLabel(node); ok {
171+
if mgr, entity, ok := classifyNodeByLabel(node, fargateProfileByNode); ok {
138172
addToBucket(info, mgr, entity, node.Name)
139173
return nil
140174
}
@@ -155,12 +189,13 @@ func classifyByLabels(ctx context.Context, k8sClient kubernetes.Interface, info
155189
return candidates, nil
156190
}
157191

158-
// classifyNodeByLabel applies steps 1-3 of the decision tree using only the
159-
// Node labels and name. Returns false when the node needs an AWS API lookup
160-
// or the unknown-bucket fallback.
161-
func classifyNodeByLabel(node *corev1.Node) (NodeManager, string, bool) {
192+
// classifyNodeByLabel applies steps 1-3 of the decision tree using the Node
193+
// labels, the Node name, and a pre-built nodeName→Fargate-profile index.
194+
// Returns false when the node needs an AWS API lookup or the unknown-bucket
195+
// fallback.
196+
func classifyNodeByLabel(node *corev1.Node, fargateProfileByNode map[string]string) (NodeManager, string, bool) {
162197
if node.Labels["eks.amazonaws.com/compute-type"] == "fargate" || strings.HasPrefix(node.Name, "fargate-ip-") {
163-
return NodeManagerFargate, node.Labels["eks.amazonaws.com/fargate-profile"], true
198+
return NodeManagerFargate, fargateProfileByNode[node.Name], true
164199
}
165200

166201
if v := node.Labels["karpenter.sh/nodepool"]; v != "" {

cmd/kubectl-datadog/autoscaling/cluster/common/clusterinfo/classify_test.go

Lines changed: 83 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,13 @@ func node(name string, providerID string, labels map[string]string) *corev1.Node
9797
}
9898
}
9999

100+
func pod(name, namespace, nodeName string, labels map[string]string) *corev1.Pod {
101+
return &corev1.Pod{
102+
ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: namespace, Labels: labels},
103+
Spec: corev1.PodSpec{NodeName: nodeName},
104+
}
105+
}
106+
100107
func deploymentWith(namespace, name string, labels map[string]string, image string) *appsv1.Deployment {
101108
return &appsv1.Deployment{
102109
ObjectMeta: metav1.ObjectMeta{Namespace: namespace, Name: name, Labels: labels},
@@ -195,12 +202,17 @@ func TestClassify_ClusterIdentity_DescribeError(t *testing.T) {
195202

196203
func TestClassify_AllBucketsByLabel(t *testing.T) {
197204
objs := []runtime.Object{
198-
// fargate via label
205+
// fargate via label; profile name comes from the hosted Pod's label
206+
// (EKS stamps `eks.amazonaws.com/fargate-profile` on the Pod, not the
207+
// Node).
199208
node("fargate-by-label", "aws:///us-east-1a/fargate-abc", map[string]string{
200-
"eks.amazonaws.com/compute-type": "fargate",
209+
"eks.amazonaws.com/compute-type": "fargate",
210+
}),
211+
pod("workload", "default", "fargate-by-label", map[string]string{
201212
"eks.amazonaws.com/fargate-profile": "fp-default",
202213
}),
203-
// fargate via name fallback (no compute-type label)
214+
// fargate via name fallback (no compute-type label, no Pod yet
215+
// scheduled) — exercises the empty-key fallback path.
204216
node("fargate-ip-10-0-0-1.eu-west-3.compute.internal", "", nil),
205217
// karpenter via primary label
206218
node("kp-primary", "aws:///us-east-1a/i-0aaa", map[string]string{
@@ -248,6 +260,64 @@ func TestClassify_AllBucketsByLabel(t *testing.T) {
248260
assert.Empty(t, asg.calls, "label-only nodes must not trigger AWS calls")
249261
}
250262

263+
func TestClassify_FargateMultipleProfiles(t *testing.T) {
264+
clientset := fake.NewSimpleClientset(
265+
node("fargate-a", "", map[string]string{"eks.amazonaws.com/compute-type": "fargate"}),
266+
node("fargate-b", "", map[string]string{"eks.amazonaws.com/compute-type": "fargate"}),
267+
pod("workload-a", "team-a", "fargate-a", map[string]string{
268+
"eks.amazonaws.com/fargate-profile": "fp-team-a",
269+
}),
270+
pod("workload-b", "team-b", "fargate-b", map[string]string{
271+
"eks.amazonaws.com/fargate-profile": "fp-team-b",
272+
}),
273+
)
274+
275+
info, err := Classify(t.Context(), classifyOpts(clientset, &fakeASG{}))
276+
require.NoError(t, err)
277+
278+
assert.Equal(t, []string{"fargate-a"},
279+
info.NodeManagement[NodeManagerFargate]["fp-team-a"].Nodes)
280+
assert.Equal(t, []string{"fargate-b"},
281+
info.NodeManagement[NodeManagerFargate]["fp-team-b"].Nodes)
282+
}
283+
284+
// TestClassify_FargatePendingPodSkipped guards the empty-NodeName branch of
285+
// fargateProfilesByNode: a Pod that carries the Fargate label but isn't yet
286+
// scheduled onto a Node must not populate the index (and must not panic).
287+
func TestClassify_FargatePendingPodSkipped(t *testing.T) {
288+
clientset := fake.NewSimpleClientset(
289+
node("fargate-a", "", map[string]string{"eks.amazonaws.com/compute-type": "fargate"}),
290+
// Scheduled pod -> profile resolves normally.
291+
pod("workload-a", "team-a", "fargate-a", map[string]string{
292+
"eks.amazonaws.com/fargate-profile": "fp-team-a",
293+
}),
294+
// Pending pod (no NodeName yet) -> must be ignored.
295+
pod("workload-pending", "team-a", "", map[string]string{
296+
"eks.amazonaws.com/fargate-profile": "fp-team-a",
297+
}),
298+
)
299+
300+
info, err := Classify(t.Context(), classifyOpts(clientset, &fakeASG{}))
301+
require.NoError(t, err)
302+
303+
assert.Equal(t, []string{"fargate-a"},
304+
info.NodeManagement[NodeManagerFargate]["fp-team-a"].Nodes)
305+
}
306+
307+
// TestClassify_FargatePodListErrorPropagates guards the error path of
308+
// fargateProfilesByNode: a failing Pod listing must surface to the caller
309+
// of Classify wrapped with a recognisable message.
310+
func TestClassify_FargatePodListErrorPropagates(t *testing.T) {
311+
clientset := fake.NewSimpleClientset()
312+
clientset.PrependReactor("list", "pods", func(_ k8stesting.Action) (bool, runtime.Object, error) {
313+
return true, nil, apierrors.NewServiceUnavailable("test failure")
314+
})
315+
316+
_, err := Classify(t.Context(), classifyOpts(clientset, &fakeASG{}))
317+
require.Error(t, err)
318+
assert.Contains(t, err.Error(), "failed to list Fargate pods")
319+
}
320+
251321
func TestClassify_ASGAndStandalone(t *testing.T) {
252322
clientset := fake.NewSimpleClientset(
253323
node("worker-1", "aws:///us-east-1a/i-1111", nil),
@@ -448,11 +518,15 @@ func TestClassify_KarpenterNodePoolOwnership_NoCRD(t *testing.T) {
448518
func TestClassify_FargateProfileOwnership(t *testing.T) {
449519
clientset := fake.NewSimpleClientset(
450520
node("fargate-1", "", map[string]string{
451-
"eks.amazonaws.com/compute-type": "fargate",
452-
"eks.amazonaws.com/fargate-profile": "dd-karpenter-test",
521+
"eks.amazonaws.com/compute-type": "fargate",
453522
}),
454523
node("fargate-2", "", map[string]string{
455-
"eks.amazonaws.com/compute-type": "fargate",
524+
"eks.amazonaws.com/compute-type": "fargate",
525+
}),
526+
pod("workload-1", "default", "fargate-1", map[string]string{
527+
"eks.amazonaws.com/fargate-profile": "dd-karpenter-test",
528+
}),
529+
pod("workload-2", "default", "fargate-2", map[string]string{
456530
"eks.amazonaws.com/fargate-profile": "third-party",
457531
}),
458532
)
@@ -476,7 +550,9 @@ func TestClassify_FargateProfileOwnership_DescribeError(t *testing.T) {
476550
// stays unflagged and a warning is logged.
477551
clientset := fake.NewSimpleClientset(
478552
node("fargate-1", "", map[string]string{
479-
"eks.amazonaws.com/compute-type": "fargate",
553+
"eks.amazonaws.com/compute-type": "fargate",
554+
}),
555+
pod("workload-1", "default", "fargate-1", map[string]string{
480556
"eks.amazonaws.com/fargate-profile": "dd-karpenter-test",
481557
}),
482558
)

0 commit comments

Comments
 (0)