Skip to content

Commit 1f01bfa

Browse files
committed
fix: fixed the provider bug for labels
1 parent a71b258 commit 1f01bfa

3 files changed

Lines changed: 242 additions & 8 deletions

File tree

provider/kubernetes/provider.go

Lines changed: 66 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"fmt"
66
"io"
7+
"maps"
78

89
"helm.sh/helm/v3/pkg/action"
910
"helm.sh/helm/v3/pkg/chart"
@@ -143,7 +144,16 @@ func (p *Provider) Capabilities() []provider.Capability {
143144
// (ClusterIP=None) + volumeClaimTemplates per persistent volume + per-
144145
// service ConfigMaps. Each replica gets its own PVC by name.
145146
func (p *Provider) Provision(ctx context.Context, req provider.ProvisionRequest) (*provider.ProvisionResult, error) {
146-
labels := instanceLabels(req.InstanceID, req.TenantID, p.cfg.Labels)
147+
// Per-instance labels (req.Labels — e.g. a caller's workspace/component
148+
// tags) are layered onto the provider's static cfg labels so the pods
149+
// are identifiable and queryable by the caller's own scheme, not just
150+
// the opaque instance ID. cfg labels are the base; req labels win on
151+
// collision; the reserved ctrlplane.io/* keys are set last by
152+
// instanceLabels and always survive.
153+
extra := make(map[string]string, len(p.cfg.Labels)+len(req.Labels))
154+
maps.Copy(extra, p.cfg.Labels)
155+
maps.Copy(extra, req.Labels)
156+
labels := instanceLabels(req.InstanceID, req.TenantID, extra)
147157
ns := p.cfg.Namespace
148158

149159
// Create per-service ConfigMaps before the controller object so the
@@ -413,15 +423,69 @@ func (p *Provider) Rollback(_ context.Context, _ id.ID, _ id.ID) error {
413423
return nil
414424
}
415425

416-
// Scale adjusts the instance's resource allocation.
426+
// Scale adjusts the instance's resource allocation. A ScaleRequest may
427+
// carry CPU/memory changes, a replica change, or both: resource changes
428+
// patch the pod template's container resources (rolling the pods),
429+
// replica changes patch the scale subresource.
417430
func (p *Provider) Scale(ctx context.Context, instanceID id.ID, spec provider.ResourceSpec) error {
431+
if spec.CPUMillis > 0 || spec.MemoryMB > 0 {
432+
if err := p.applyResources(ctx, instanceID, spec); err != nil {
433+
return err
434+
}
435+
}
436+
418437
if spec.Replicas > 0 {
419438
return p.scaleReplicas(ctx, instanceID, int32(min(spec.Replicas, int(^int32(0))))) //nolint:gosec // clamped to int32 range via min
420439
}
421440

422441
return nil
423442
}
424443

444+
// applyResources patches the pod template's container resource
445+
// requests+limits to spec, rolling the pods. Mirrors instance.Service's
446+
// Scale contract: it targets the workload's app container(s); a
447+
// multi-service workload gets every app container set to the same spec
448+
// (per-service resourcing is a future API). Init containers are left
449+
// untouched. The Deployment path is tried first, then StatefulSet —
450+
// same dispatch as Deploy.
451+
func (p *Provider) applyResources(ctx context.Context, instanceID id.ID, spec provider.ResourceSpec) error {
452+
ns := p.cfg.Namespace
453+
name := deploymentName(instanceID)
454+
455+
dep, depErr := p.client.AppsV1().Deployments(ns).Get(ctx, name, metav1.GetOptions{})
456+
if depErr == nil {
457+
setContainerResources(dep.Spec.Template.Spec.Containers, spec)
458+
459+
if _, err := p.client.AppsV1().Deployments(ns).Update(ctx, dep, metav1.UpdateOptions{}); err != nil {
460+
return fmt.Errorf("kubernetes: patch deployment resources: %w", err)
461+
}
462+
463+
return nil
464+
}
465+
466+
ss, ssErr := p.client.AppsV1().StatefulSets(ns).Get(ctx, name, metav1.GetOptions{})
467+
if ssErr != nil {
468+
return fmt.Errorf("kubernetes: get workload for resize: deployment: %w; statefulset: %w", depErr, ssErr)
469+
}
470+
471+
setContainerResources(ss.Spec.Template.Spec.Containers, spec)
472+
473+
if _, err := p.client.AppsV1().StatefulSets(ns).Update(ctx, ss, metav1.UpdateOptions{}); err != nil {
474+
return fmt.Errorf("kubernetes: patch statefulset resources: %w", err)
475+
}
476+
477+
return nil
478+
}
479+
480+
// setContainerResources sets every container's requests+limits to spec.
481+
// Separate ResourceLists per field avoid map aliasing between the two.
482+
func setContainerResources(containers []corev1.Container, spec provider.ResourceSpec) {
483+
for i := range containers {
484+
containers[i].Resources.Requests = buildResourceList(spec)
485+
containers[i].Resources.Limits = buildResourceList(spec)
486+
}
487+
}
488+
425489
// Resources returns a one-shot point-in-time sample of the
426490
// instance's pod resource usage via the metrics.k8s.io API.
427491
//

provider/kubernetes/resources.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,16 +57,20 @@ func providerRef(namespace string, instanceID id.ID) string {
5757
return fmt.Sprintf("k8s:%s/%s", namespace, deploymentName(instanceID))
5858
}
5959

60-
// instanceLabels builds the standard label set for a ctrlplane-managed resource.
60+
// instanceLabels builds the standard label set for a ctrlplane-managed
61+
// resource. Caller-supplied extra labels are layered in first; the
62+
// reserved ctrlplane.io/* keys are set last so they're authoritative and
63+
// a caller's labels can never clobber instance identity/management (the
64+
// selector depends on labelInstanceID).
6165
func instanceLabels(instanceID id.ID, tenantID string, extra map[string]string) map[string]string {
62-
labels := map[string]string{
63-
labelInstanceID: instanceID.String(),
64-
labelTenantID: tenantID,
65-
labelManagedBy: labelManagedByValue,
66-
}
66+
labels := make(map[string]string, len(extra)+3)
6767

6868
maps.Copy(labels, extra)
6969

70+
labels[labelInstanceID] = instanceID.String()
71+
labels[labelTenantID] = tenantID
72+
labels[labelManagedBy] = labelManagedByValue
73+
7074
return labels
7175
}
7276

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
package kubernetes
2+
3+
import (
4+
"context"
5+
"testing"
6+
7+
appsv1 "k8s.io/api/apps/v1"
8+
corev1 "k8s.io/api/core/v1"
9+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
10+
k8sfake "k8s.io/client-go/kubernetes/fake"
11+
12+
"github.com/xraph/ctrlplane/id"
13+
"github.com/xraph/ctrlplane/provider"
14+
)
15+
16+
// TestScale_PatchesContainerResources is the regression for the bug where
17+
// Provider.Scale ignored CPU/memory and only acted on replicas — leaving a
18+
// provisioned workload's resources immutable. A resize must now patch the
19+
// pod template's container requests AND limits.
20+
func TestScale_PatchesContainerResources(t *testing.T) {
21+
instID := id.New(id.PrefixInstance)
22+
name := deploymentName(instID)
23+
24+
const ns = "default"
25+
26+
big := provider.ResourceSpec{CPUMillis: 500, MemoryMB: 512}
27+
dep := &appsv1.Deployment{
28+
ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: ns},
29+
Spec: appsv1.DeploymentSpec{
30+
Template: corev1.PodTemplateSpec{
31+
Spec: corev1.PodSpec{
32+
Containers: []corev1.Container{{
33+
Name: "twinos",
34+
Image: "img:1",
35+
Resources: corev1.ResourceRequirements{
36+
Requests: buildResourceList(big),
37+
Limits: buildResourceList(big),
38+
},
39+
}},
40+
},
41+
},
42+
},
43+
}
44+
45+
p := &Provider{
46+
cfg: Config{Namespace: ns},
47+
client: k8sfake.NewSimpleClientset(dep),
48+
}
49+
50+
// Shrink to values that fit a small node.
51+
err := p.Scale(context.Background(), instID, provider.ResourceSpec{CPUMillis: 150, MemoryMB: 256})
52+
if err != nil {
53+
t.Fatalf("Scale: %v", err)
54+
}
55+
56+
got, err := p.client.AppsV1().Deployments(ns).Get(context.Background(), name, metav1.GetOptions{})
57+
if err != nil {
58+
t.Fatalf("get deployment: %v", err)
59+
}
60+
61+
c := got.Spec.Template.Spec.Containers[0]
62+
63+
if cpu := c.Resources.Requests.Cpu().MilliValue(); cpu != 150 {
64+
t.Errorf("cpu request: want 150m, got %dm", cpu)
65+
}
66+
67+
if cpu := c.Resources.Limits.Cpu().MilliValue(); cpu != 150 {
68+
t.Errorf("cpu limit: want 150m, got %dm", cpu)
69+
}
70+
71+
if memMB := c.Resources.Requests.Memory().Value() / (1024 * 1024); memMB != 256 {
72+
t.Errorf("mem request: want 256Mi, got %dMi", memMB)
73+
}
74+
}
75+
76+
// TestScale_noResourceChange_isNoop verifies that a ScaleRequest carrying
77+
// neither CPU nor memory nor replicas leaves the workload untouched (and
78+
// crucially does not attempt the resource patch).
79+
func TestScale_noResourceChange_isNoop(t *testing.T) {
80+
instID := id.New(id.PrefixInstance)
81+
name := deploymentName(instID)
82+
83+
const ns = "default"
84+
85+
orig := provider.ResourceSpec{CPUMillis: 500, MemoryMB: 512}
86+
dep := &appsv1.Deployment{
87+
ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: ns},
88+
Spec: appsv1.DeploymentSpec{
89+
Template: corev1.PodTemplateSpec{
90+
Spec: corev1.PodSpec{
91+
Containers: []corev1.Container{{
92+
Name: "twinos",
93+
Image: "img:1",
94+
Resources: corev1.ResourceRequirements{Requests: buildResourceList(orig), Limits: buildResourceList(orig)},
95+
}},
96+
},
97+
},
98+
},
99+
}
100+
101+
p := &Provider{cfg: Config{Namespace: ns}, client: k8sfake.NewSimpleClientset(dep)}
102+
103+
if err := p.Scale(context.Background(), instID, provider.ResourceSpec{}); err != nil {
104+
t.Fatalf("Scale (empty spec): %v", err)
105+
}
106+
107+
got, err := p.client.AppsV1().Deployments(ns).Get(context.Background(), name, metav1.GetOptions{})
108+
if err != nil {
109+
t.Fatalf("get deployment: %v", err)
110+
}
111+
112+
if cpu := got.Spec.Template.Spec.Containers[0].Resources.Requests.Cpu().MilliValue(); cpu != 500 {
113+
t.Errorf("empty scale must not change cpu: got %dm, want 500m", cpu)
114+
}
115+
}
116+
117+
// TestProvision_propagatesRequestLabelsToPods is the regression for pods
118+
// carrying only ctrlplane labels: a caller's per-instance labels (e.g.
119+
// twinos.workspace / twinos.component) must reach the Deployment AND its
120+
// pod template so pods are identifiable/queryable, while the reserved
121+
// ctrlplane.io/* keys stay authoritative.
122+
func TestProvision_propagatesRequestLabelsToPods(t *testing.T) {
123+
instID := id.New(id.PrefixInstance)
124+
125+
const ns = "default"
126+
127+
p := &Provider{
128+
cfg: Config{Namespace: ns},
129+
client: k8sfake.NewSimpleClientset(),
130+
}
131+
132+
req := provider.ProvisionRequest{
133+
InstanceID: instID,
134+
TenantID: "ten_abc",
135+
Kind: provider.KindDeployment,
136+
Labels: map[string]string{
137+
"twinos.workspace": "ws_acme",
138+
"twinos.component": "twinos",
139+
// A caller trying to clobber a reserved key must NOT win.
140+
labelInstanceID: "spoofed",
141+
},
142+
Services: []provider.ServiceSpec{{
143+
Name: "twinos",
144+
Image: "img:1",
145+
Role: provider.RoleMain,
146+
}},
147+
}
148+
149+
if _, err := p.Provision(context.Background(), req); err != nil {
150+
t.Fatalf("Provision: %v", err)
151+
}
152+
153+
dep, err := p.client.AppsV1().Deployments(ns).Get(context.Background(), deploymentName(instID), metav1.GetOptions{})
154+
if err != nil {
155+
t.Fatalf("get deployment: %v", err)
156+
}
157+
158+
pod := dep.Spec.Template.Labels
159+
if pod["twinos.workspace"] != "ws_acme" || pod["twinos.component"] != "twinos" {
160+
t.Fatalf("caller labels missing from pod template: %v", pod)
161+
}
162+
163+
if pod[labelInstanceID] != instID.String() {
164+
t.Fatalf("reserved instance-id label was clobbered: got %q, want %q", pod[labelInstanceID], instID.String())
165+
}
166+
}

0 commit comments

Comments
 (0)