Skip to content

Commit 4e87c30

Browse files
scotwellsclaude
andcommitted
feat(controller): surface rollout progress via UpdatedReplicas + ObservedGeneration
A restart/rolling update was invisible from the project plane: there was no status field representing how many instances are on the new template revision. Add UpdatedReplicas (instances whose observed template hash matches the desired template, regardless of readiness) and ObservedGeneration to both WorkloadDeployment and Workload (plus placement) status. UpdatedReplicas is computed on the cell WD reconcile alongside CurrentReplicas (which is now its Programmed subset), aggregated up into the Workload, and rides the existing status sync to the project plane. Repoint the "Up-to-date" printcolumn to .status.updatedReplicas to match `kubectl get deployment` semantics, so a roll is visible as the count dips below Replicas and recovers. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 53e182a commit 4e87c30

6 files changed

Lines changed: 112 additions & 21 deletions

File tree

api/v1alpha/workload_types.go

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,15 +58,27 @@ type WorkloadStatus struct {
5858
// The number of instances that currently exist
5959
Replicas int32 `json:"replicas"`
6060

61-
// The number of instances which have the latest workload settings applied.
61+
// The number of instances which have the latest workload settings applied
62+
// and are programmed (a subset of UpdatedReplicas that are ready to serve).
6263
CurrentReplicas int32 `json:"currentReplicas"`
6364

65+
// The number of instances updated to the latest template revision (their
66+
// observed template hash matches the desired template), regardless of
67+
// readiness. Lags Replicas during a rolling update or restart, then catches
68+
// back up — making an in-progress roll observable.
69+
UpdatedReplicas int32 `json:"updatedReplicas"`
70+
6471
// The desired number of instances
6572
DesiredReplicas int32 `json:"desiredReplicas"`
6673

6774
// The number of instances which are ready.
6875
ReadyReplicas int32 `json:"readyReplicas"`
6976

77+
// The most recent generation observed by the workload controller.
78+
//
79+
// +kubebuilder:validation:Optional
80+
ObservedGeneration int64 `json:"observedGeneration,omitempty"`
81+
7082
// The current status of placemetns in a workload.
7183
Placements []WorkloadPlacementStatus `json:"placements,omitempty"`
7284

@@ -99,7 +111,7 @@ type WorkloadGatewayStatus struct {
99111
// +kubebuilder:printcolumn:name="Replicas",type=string,JSONPath=`.status.replicas`
100112
// +kubebuilder:printcolumn:name="Ready",type=string,JSONPath=`.status.readyReplicas`
101113
// +kubebuilder:printcolumn:name="Desired",type=string,JSONPath=`.status.desiredReplicas`
102-
// +kubebuilder:printcolumn:name="Up-to-date",type=string,JSONPath=`.status.currentReplicas`
114+
// +kubebuilder:printcolumn:name="Up-to-date",type=string,JSONPath=`.status.updatedReplicas`
103115
type Workload struct {
104116
metav1.TypeMeta `json:",inline"`
105117
metav1.ObjectMeta `json:"metadata,omitempty"`
@@ -146,9 +158,14 @@ type WorkloadPlacementStatus struct {
146158
// The number of instances that currently exist
147159
Replicas int32 `json:"replicas"`
148160

149-
// The number of instances which have the latest workload settings applied.
161+
// The number of instances which have the latest workload settings applied
162+
// and are programmed (a subset of UpdatedReplicas that are ready to serve).
150163
CurrentReplicas int32 `json:"currentReplicas"`
151164

165+
// The number of instances updated to the latest template revision, regardless
166+
// of readiness. Lags Replicas during a rolling update or restart.
167+
UpdatedReplicas int32 `json:"updatedReplicas"`
168+
152169
// The desired number of instances
153170
DesiredReplicas int32 `json:"desiredReplicas"`
154171

api/v1alpha/workloaddeployment_types.go

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,28 @@ type WorkloadDeploymentStatus struct {
4949
// The number of instances created
5050
Replicas int32 `json:"replicas"`
5151

52-
// The number of instances which have the latest workload settings applied.
52+
// The number of instances which have the latest workload settings applied
53+
// and are programmed (a subset of UpdatedReplicas that are ready to serve).
5354
CurrentReplicas int32 `json:"currentReplicas"`
5455

56+
// The number of instances updated to the latest template revision, i.e.
57+
// whose observed template hash matches the desired template, regardless of
58+
// readiness. Lags Replicas during a rolling update or restart, then catches
59+
// back up — making an in-progress roll observable.
60+
UpdatedReplicas int32 `json:"updatedReplicas"`
61+
5562
// The desired number of instances
5663
DesiredReplicas int32 `json:"desiredReplicas"`
5764

5865
// The number of instances which are ready.
5966
ReadyReplicas int32 `json:"readyReplicas"`
67+
68+
// The most recent generation observed by the deployment controller. When
69+
// this matches metadata.generation, the controller has reconciled the
70+
// latest spec (e.g. a restart request).
71+
//
72+
// +kubebuilder:validation:Optional
73+
ObservedGeneration int64 `json:"observedGeneration,omitempty"`
6074
}
6175

6276
const (
@@ -79,7 +93,7 @@ const (
7993
// +kubebuilder:printcolumn:name="Replicas",type=string,JSONPath=`.status.replicas`
8094
// +kubebuilder:printcolumn:name="Ready",type=string,JSONPath=`.status.readyReplicas`
8195
// +kubebuilder:printcolumn:name="Desired",type=string,JSONPath=`.status.desiredReplicas`
82-
// +kubebuilder:printcolumn:name="Up-to-date",type=string,JSONPath=`.status.currentReplicas`
96+
// +kubebuilder:printcolumn:name="Up-to-date",type=string,JSONPath=`.status.updatedReplicas`
8397
// +kubebuilder:printcolumn:name="Location Namespace",type=string,JSONPath=`.status.location.namespace`,priority=1
8498
// +kubebuilder:printcolumn:name="Location Name",type=string,JSONPath=`.status.location.name`,priority=1
8599
type WorkloadDeployment struct {

config/base/crd/bases/compute.datumapis.com_workloaddeployments.yaml

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ spec:
3434
- jsonPath: .status.desiredReplicas
3535
name: Desired
3636
type: string
37-
- jsonPath: .status.currentReplicas
37+
- jsonPath: .status.updatedReplicas
3838
name: Up-to-date
3939
type: string
4040
- jsonPath: .status.location.namespace
@@ -1087,8 +1087,9 @@ spec:
10871087
type: object
10881088
type: array
10891089
currentReplicas:
1090-
description: The number of instances which have the latest workload
1091-
settings applied.
1090+
description: |-
1091+
The number of instances which have the latest workload settings applied
1092+
and are programmed (a subset of UpdatedReplicas that are ready to serve).
10921093
format: int32
10931094
type: integer
10941095
desiredReplicas:
@@ -1109,6 +1110,13 @@ spec:
11091110
- name
11101111
- namespace
11111112
type: object
1113+
observedGeneration:
1114+
description: |-
1115+
The most recent generation observed by the deployment controller. When
1116+
this matches metadata.generation, the controller has reconciled the
1117+
latest spec (e.g. a restart request).
1118+
format: int64
1119+
type: integer
11121120
readyReplicas:
11131121
description: The number of instances which are ready.
11141122
format: int32
@@ -1117,11 +1125,20 @@ spec:
11171125
description: The number of instances created
11181126
format: int32
11191127
type: integer
1128+
updatedReplicas:
1129+
description: |-
1130+
The number of instances updated to the latest template revision, i.e.
1131+
whose observed template hash matches the desired template, regardless of
1132+
readiness. Lags Replicas during a rolling update or restart, then catches
1133+
back up — making an in-progress roll observable.
1134+
format: int32
1135+
type: integer
11201136
required:
11211137
- currentReplicas
11221138
- desiredReplicas
11231139
- readyReplicas
11241140
- replicas
1141+
- updatedReplicas
11251142
type: object
11261143
type: object
11271144
served: true

config/base/crd/bases/compute.datumapis.com_workloads.yaml

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ spec:
3737
- jsonPath: .status.desiredReplicas
3838
name: Desired
3939
type: string
40-
- jsonPath: .status.currentReplicas
40+
- jsonPath: .status.updatedReplicas
4141
name: Up-to-date
4242
type: string
4343
name: v1alpha
@@ -1081,8 +1081,9 @@ spec:
10811081
type: object
10821082
type: array
10831083
currentReplicas:
1084-
description: The number of instances which have the latest workload
1085-
settings applied.
1084+
description: |-
1085+
The number of instances which have the latest workload settings applied
1086+
and are programmed (a subset of UpdatedReplicas that are ready to serve).
10861087
format: int32
10871088
type: integer
10881089
deployments:
@@ -1367,6 +1368,10 @@ spec:
13671368
- name
13681369
x-kubernetes-list-type: map
13691370
type: object
1371+
observedGeneration:
1372+
description: The most recent generation observed by the workload controller.
1373+
format: int64
1374+
type: integer
13701375
placements:
13711376
description: The current status of placemetns in a workload.
13721377
items:
@@ -1432,8 +1437,9 @@ spec:
14321437
type: object
14331438
type: array
14341439
currentReplicas:
1435-
description: The number of instances which have the latest workload
1436-
settings applied.
1440+
description: |-
1441+
The number of instances which have the latest workload settings applied
1442+
and are programmed (a subset of UpdatedReplicas that are ready to serve).
14371443
format: int32
14381444
type: integer
14391445
desiredReplicas:
@@ -1451,12 +1457,19 @@ spec:
14511457
description: The number of instances that currently exist
14521458
format: int32
14531459
type: integer
1460+
updatedReplicas:
1461+
description: |-
1462+
The number of instances updated to the latest template revision, regardless
1463+
of readiness. Lags Replicas during a rolling update or restart.
1464+
format: int32
1465+
type: integer
14541466
required:
14551467
- currentReplicas
14561468
- desiredReplicas
14571469
- name
14581470
- readyReplicas
14591471
- replicas
1472+
- updatedReplicas
14601473
type: object
14611474
type: array
14621475
readyReplicas:
@@ -1467,12 +1480,21 @@ spec:
14671480
description: The number of instances that currently exist
14681481
format: int32
14691482
type: integer
1483+
updatedReplicas:
1484+
description: |-
1485+
The number of instances updated to the latest template revision (their
1486+
observed template hash matches the desired template), regardless of
1487+
readiness. Lags Replicas during a rolling update or restart, then catches
1488+
back up — making an in-progress roll observable.
1489+
format: int32
1490+
type: integer
14701491
required:
14711492
- currentReplicas
14721493
- deployments
14731494
- desiredReplicas
14741495
- readyReplicas
14751496
- replicas
1497+
- updatedReplicas
14761498
type: object
14771499
required:
14781500
- spec

internal/controller/workload_controller.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@ func (r *WorkloadReconciler) reconcileWorkloadStatus(
220220
newWorkloadStatus := workload.Status.DeepCopy()
221221
totalReplicas := int32(0)
222222
totalCurrentReplicas := int32(0)
223+
totalUpdatedReplicas := int32(0)
223224
totalDesiredReplicas := int32(0)
224225
totalReadyReplicas := int32(0)
225226
totalDeployments := int32(0)
@@ -251,12 +252,14 @@ func (r *WorkloadReconciler) reconcileWorkloadStatus(
251252
foundAvailableDeployment := false
252253
replicas := int32(0)
253254
currentReplicas := int32(0)
255+
updatedReplicas := int32(0)
254256
desiredReplicas := int32(0)
255257
readyReplicas := int32(0)
256258
totalDeployments += int32(len(placementDeployments))
257259
for _, deployment := range placementDeployments {
258260
replicas += deployment.Status.Replicas
259261
currentReplicas += deployment.Status.CurrentReplicas
262+
updatedReplicas += deployment.Status.UpdatedReplicas
260263
desiredReplicas += deployment.Status.DesiredReplicas
261264
readyReplicas += deployment.Status.ReadyReplicas
262265

@@ -266,11 +269,13 @@ func (r *WorkloadReconciler) reconcileWorkloadStatus(
266269
}
267270
totalReplicas += replicas
268271
totalCurrentReplicas += currentReplicas
272+
totalUpdatedReplicas += updatedReplicas
269273
totalDesiredReplicas += desiredReplicas
270274
totalReadyReplicas += readyReplicas
271275

272276
placementStatus.Replicas = replicas
273277
placementStatus.CurrentReplicas = currentReplicas
278+
placementStatus.UpdatedReplicas = updatedReplicas
274279
placementStatus.DesiredReplicas = desiredReplicas
275280
placementStatus.ReadyReplicas = readyReplicas
276281

@@ -304,8 +309,10 @@ func (r *WorkloadReconciler) reconcileWorkloadStatus(
304309
newWorkloadStatus.Deployments = totalDeployments
305310
newWorkloadStatus.Replicas = totalReplicas
306311
newWorkloadStatus.CurrentReplicas = totalCurrentReplicas
312+
newWorkloadStatus.UpdatedReplicas = totalUpdatedReplicas
307313
newWorkloadStatus.DesiredReplicas = totalDesiredReplicas
308314
newWorkloadStatus.ReadyReplicas = totalReadyReplicas
315+
newWorkloadStatus.ObservedGeneration = workload.Generation
309316

310317
if equality.Semantic.DeepEqual(workload.Status, newWorkloadStatus) {
311318
return nil

internal/controller/workloaddeployment_controller.go

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -171,15 +171,17 @@ func (r *WorkloadDeploymentReconciler) Reconcile(ctx context.Context, req mcreco
171171
desiredReplicas = 0
172172
}
173173

174-
currentReplicas, readyReplicas, quotaBlockedReplicas, err := r.reconcileInstanceGates(ctx, cl.GetClient(), &deployment, instances.Items, networkReady)
174+
currentReplicas, updatedReplicas, readyReplicas, quotaBlockedReplicas, err := r.reconcileInstanceGates(ctx, cl.GetClient(), &deployment, instances.Items, networkReady)
175175
if err != nil {
176176
return ctrl.Result{}, err
177177
}
178178

179179
deployment.Status.Replicas = int32(replicas)
180180
deployment.Status.CurrentReplicas = int32(currentReplicas)
181+
deployment.Status.UpdatedReplicas = int32(updatedReplicas)
181182
deployment.Status.DesiredReplicas = desiredReplicas
182183
deployment.Status.ReadyReplicas = int32(readyReplicas)
184+
deployment.Status.ObservedGeneration = deployment.Generation
183185

184186
if quotaBlockedReplicas > 0 {
185187
apimeta.SetStatusCondition(&deployment.Status.Conditions, metav1.Condition{
@@ -239,7 +241,7 @@ func (r *WorkloadDeploymentReconciler) reconcileInstanceGates(
239241
deployment *computev1alpha.WorkloadDeployment,
240242
instances []computev1alpha.Instance,
241243
networkReady bool,
242-
) (currentReplicas, readyReplicas, quotaBlockedReplicas int, err error) {
244+
) (currentReplicas, updatedReplicas, readyReplicas, quotaBlockedReplicas int, err error) {
243245
templateHash := instancecontrol.ComputeHash(deployment.Spec.Template)
244246
for _, instance := range instances {
245247
if apimeta.IsStatusConditionPresentAndEqual(instance.Status.Conditions, computev1alpha.InstanceQuotaGranted, metav1.ConditionFalse) {
@@ -255,22 +257,34 @@ func (r *WorkloadDeploymentReconciler) reconcileInstanceGates(
255257
instance.Spec.Controller.SchedulingGates = newGates
256258
return nil
257259
}); patchErr != nil {
258-
return 0, 0, 0, fmt.Errorf("failed updating instance: %w", patchErr)
260+
return 0, 0, 0, 0, fmt.Errorf("failed updating instance: %w", patchErr)
259261
}
260262
}
261263
}
262264

263-
if apimeta.IsStatusConditionTrue(instance.Status.Conditions, computev1alpha.InstanceProgrammed) {
264-
if instance.Status.Controller.ObservedTemplateHash == templateHash {
265-
currentReplicas++
266-
}
265+
// An instance is "updated" once it has observed the desired template
266+
// revision, regardless of readiness. Counting these (even before they are
267+
// Programmed) makes a rolling update / restart observable: UpdatedReplicas
268+
// dips below Replicas while the recreated instance comes up, then recovers.
269+
// Status.Controller is a pointer the infra provider may not have populated
270+
// yet; guard the deref to avoid a panic that would abort the reconcile.
271+
onLatestRevision := instance.Status.Controller != nil &&
272+
instance.Status.Controller.ObservedTemplateHash == templateHash
273+
if onLatestRevision {
274+
updatedReplicas++
275+
}
276+
277+
// CurrentReplicas is the Programmed subset of UpdatedReplicas — updated
278+
// instances that are ready to serve.
279+
if onLatestRevision && apimeta.IsStatusConditionTrue(instance.Status.Conditions, computev1alpha.InstanceProgrammed) {
280+
currentReplicas++
267281
}
268282

269283
if apimeta.IsStatusConditionTrue(instance.Status.Conditions, computev1alpha.InstanceReady) {
270284
readyReplicas++
271285
}
272286
}
273-
return currentReplicas, readyReplicas, quotaBlockedReplicas, nil
287+
return currentReplicas, updatedReplicas, readyReplicas, quotaBlockedReplicas, nil
274288
}
275289

276290
// writeStatusToKarmada copies the WorkloadDeployment status to the matching

0 commit comments

Comments
 (0)