Skip to content

Commit 1e5d08c

Browse files
committed
SynaXG plugin dev scheme review
1 parent 82335b0 commit 1e5d08c

32 files changed

Lines changed: 5327 additions & 146 deletions

Dockerfile.SynaXGVSP.rhel

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
FROM registry.ci.openshift.org/ocp/builder:rhel-9-golang-1.24-openshift-4.21 AS builder
2+
ARG TARGETOS
3+
ARG TARGETARCH
4+
5+
WORKDIR /workspace
6+
COPY . .
7+
8+
# Due to https://github.com/golang/go/issues/70329 cross-compilation hangs at times.
9+
# As a temporary workaround, we can try specifying GOMAXPROCS=2 to relieve this issue
10+
RUN mkdir -p /bin && \
11+
GOMAXPROCS=2 CGO_ENABLED=1 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} make build-synaxg-vsp
12+
13+
FROM registry.ci.openshift.org/ocp/4.21:base-rhel9
14+
ARG TARGETARCH
15+
COPY --from=builder /workspace/bin/vsp-synaxg.${TARGETARCH} /vsp-synaxg
16+
17+
RUN yum update -y \
18+
&& yum install -y \
19+
ethtool \
20+
net-tools \
21+
kmod \
22+
pciutils \
23+
iputils \
24+
iproute \
25+
&& yum clean all \
26+
&& rm -rf /var/cache/dnf
27+
28+
USER 0
29+
30+
ENTRYPOINT ["/vsp-synaxg"]

Makefile

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ fast-test:
152152

153153

154154
.PHONY: build
155-
build: manifests generate fmt vet build-manager build-daemon build-intel-vsp build-marvell-vsp build-intel-netsec-vsp build-network-resources-injector
155+
build: manifests generate fmt vet build-manager build-daemon build-intel-vsp build-marvell-vsp build-intel-netsec-vsp build-synaxg-vsp build-network-resources-injector
156156
@echo "Built all components"
157157

158158
.PHONY: build-manager
@@ -175,6 +175,10 @@ build-marvell-vsp:
175175
build-intel-netsec-vsp:
176176
go run tools/task/task.go build-bin-intel-netsec-vsp
177177

178+
.PHONY: build-synaxg-vsp
179+
build-synaxg-vsp:
180+
go run tools/task/task.go build-bin-synaxg-vsp
181+
178182
.PHONY: build-network-resources-injector
179183
build-network-resources-injector:
180184
go run tools/task/task.go build-bin-network-resources-injector

api/v1/dataprocessingunitconfig_types.go

Lines changed: 170 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,24 +23,185 @@ import (
2323
// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
2424
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.
2525

26+
// ========== DPU operation types ==========
27+
28+
type DpuOperationType string
29+
30+
const (
31+
// DpuOpNone No operation (default)
32+
DpuOpNone DpuOperationType = "None"
33+
// DpuOpFirmwareUpgrade Firmware upgrade operation
34+
DpuOpFirmwareUpgrade DpuOperationType = "FirmwareUpgrade"
35+
// DpuOpReboot DPU reboot operation (mandatory after firmware upgrade)
36+
DpuOpReboot DpuOperationType = "Reboot"
37+
)
38+
39+
// ========== Firmware types ==========
40+
41+
type DpuFirmwareType string
42+
43+
const (
44+
// DpuFirmwareTypeOAM OAM type firmware
45+
DpuFirmwareTypeOAM DpuFirmwareType = "OAM"
46+
// DpuFirmwareTypeSDK SDK type firmware
47+
DpuFirmwareTypeSDK DpuFirmwareType = "SDK"
48+
)
49+
50+
// ========== Operation status phases ==========
51+
52+
type DpuOperationStatusPhase string
53+
54+
const (
55+
// DpuPhasePending Operation pending execution (default)
56+
DpuPhasePending DpuOperationStatusPhase = "Pending"
57+
// DpuPhaseRunning Operation is in progress
58+
DpuPhaseRunning DpuOperationStatusPhase = "Running"
59+
// DpuPhaseRebooting DPU is rebooting (waiting for it to come back online)
60+
DpuPhaseRebooting DpuOperationStatusPhase = "Rebooting"
61+
// DpuPhaseSucceeded Operation completed successfully
62+
DpuPhaseSucceeded DpuOperationStatusPhase = "Succeeded"
63+
// DpuPhaseFailed Operation execution failed
64+
DpuPhaseFailed DpuOperationStatusPhase = "Failed"
65+
// DpuPhaseCancelled Operation was cancelled
66+
DpuPhaseCancelled DpuOperationStatusPhase = "Cancelled"
67+
)
68+
69+
// ========== Health status ==========
70+
71+
type DpuHealthStatus string
72+
73+
const (
74+
// HealthStatusHealthy DPU is healthy and responding
75+
HealthStatusHealthy DpuHealthStatus = "Healthy"
76+
// HealthStatusUnhealthy DPU is not responding
77+
HealthStatusUnhealthy DpuHealthStatus = "Unhealthy"
78+
// HealthStatusUnknown DPU health is unknown
79+
HealthStatusUnknown DpuHealthStatus = "Unknown"
80+
)
81+
82+
// ========== Firmware specification ==========
83+
84+
// DpuFirmwareSpec defines the firmware upgrade parameters.
85+
type DpuFirmwareSpec struct {
86+
// Firmware type (OAM/SDK)
87+
// +kubebuilder:validation:Required
88+
// +kubebuilder:validation:Enum=OAM;SDK
89+
Type DpuFirmwareType `json:"type"`
90+
91+
// Target firmware version number
92+
// +kubebuilder:validation:Required
93+
TargetVersion string `json:"targetVersion"`
94+
95+
// Firmware image path or URI (e.g. quay.io/openshift/firmware/dpu:v1.0.8)
96+
// +optional
97+
FirmwarePath string `json:"firmwarePath,omitempty"`
98+
}
99+
100+
// ========== DPU management (spec) ==========
101+
102+
// DataProcessingUnitManagement defines the desired management operation on a DPU.
103+
type DataProcessingUnitManagement struct {
104+
// DPU operation type to execute: None, FirmwareUpgrade, or Reboot.
105+
// +kubebuilder:validation:Enum=None;FirmwareUpgrade;Reboot
106+
// +kubebuilder:default=None
107+
Operation DpuOperationType `json:"operation,omitempty"`
108+
109+
// Detailed configuration for firmware upgrade.
110+
// Required when Operation is FirmwareUpgrade.
111+
// +optional
112+
Firmware *DpuFirmwareSpec `json:"firmware,omitempty"`
113+
}
114+
115+
// ========== Spec ==========
116+
26117
// DataProcessingUnitConfigSpec defines the desired state of DataProcessingUnitConfig.
27118
type DataProcessingUnitConfigSpec struct {
28-
// INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
29-
// Important: Run "make" to regenerate code after modifying this file
30-
31-
// DpuSelector specifies which DPUs this DpuConfig CR should target.
32-
// If empty, the DpuConfig will target all DPUs.
119+
// DpuSelector specifies which DPUs this config should target.
120+
// Must include a pci-address label to uniquely identify the DPU on
121+
// nodes that have more than one DPU of the same vendor.
33122
// +optional
34123
DpuSelector *metav1.LabelSelector `json:"dpuSelector,omitempty"`
35124

36-
// Foo is an example field of DataProcessingUnitConfig. Edit dataprocessingunitconfig_types.go to remove/update
37-
Foo string `json:"foo,omitempty"`
125+
// DpuManagement specifies the management operation to perform.
126+
DpuManagement DataProcessingUnitManagement `json:"dpuManagement,omitempty"`
38127
}
39128

129+
// ========== Status ==========
130+
131+
// DpuNodeOperationStatus tracks the status of the current management operation.
132+
type DpuNodeOperationStatus struct {
133+
// SubOperation type: distinguishes FirmwareUpgrade from Reboot.
134+
SubOperation DpuOperationType `json:"subOperation,omitempty"`
135+
136+
// FirmwareType (valid only when SubOperation is FirmwareUpgrade): OAM/SDK
137+
FirmwareType DpuFirmwareType `json:"firmwareType,omitempty"`
138+
139+
// Phase is the current status of the operation: Pending/Running/Succeeded/Failed.
140+
Phase DpuOperationStatusPhase `json:"phase,omitempty"`
141+
142+
// StartTime is when the operation started.
143+
// +optional
144+
StartTime *metav1.Time `json:"startTime,omitempty"`
145+
146+
// CompletionTime is when the operation completed (success or failure).
147+
// +optional
148+
CompletionTime *metav1.Time `json:"completionTime,omitempty"`
149+
150+
// PreviousVersion is the firmware version before upgrade.
151+
// +optional
152+
PreviousVersion string `json:"previousVersion,omitempty"`
153+
154+
// TargetVersion is the desired firmware version for upgrade.
155+
// +optional
156+
TargetVersion string `json:"targetVersion,omitempty"`
157+
158+
// Message is a human-readable summary of the operation result.
159+
// +optional
160+
Message string `json:"message,omitempty"`
161+
162+
// ErrorMessage contains error details when the operation fails.
163+
// +optional
164+
ErrorMessage string `json:"errorMessage,omitempty"`
165+
}
166+
167+
// DpuHealthInfo tracks the health / liveness of the DPU.
168+
type DpuHealthInfo struct {
169+
// Status is the current health of the DPU: Healthy/Unhealthy/Unknown.
170+
Status DpuHealthStatus `json:"status,omitempty"`
171+
172+
// Message is a human-readable health description.
173+
// +optional
174+
Message string `json:"message,omitempty"`
175+
176+
// LastProbeTime is the last time the health was checked.
177+
// +optional
178+
LastProbeTime *metav1.Time `json:"lastProbeTime,omitempty"`
179+
}
180+
181+
// ConditionTypeReady is the standard K8s condition used for kubectl-wait support.
182+
//
183+
// kubectl wait dpuconfig/<name> --for=condition=Ready --timeout=600s
184+
//
185+
// Condition values:
186+
// - True: phase=Succeeded and DPU is healthy
187+
// - False: phase=Failed or DPU is unreachable
188+
// - Unknown: operation in progress (Rebooting/Running)
189+
const ConditionTypeReady = "Ready"
190+
40191
// DataProcessingUnitConfigStatus defines the observed state of DataProcessingUnitConfig.
41192
type DataProcessingUnitConfigStatus struct {
42-
// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
43-
// Important: Run "make" to regenerate code after modifying this file
193+
// NodeStatus tracks the current management-operation status.
194+
NodeStatus DpuNodeOperationStatus `json:"nodeStatus,omitempty"`
195+
196+
// Health tracks the DPU liveness information.
197+
Health DpuHealthInfo `json:"health,omitempty"`
198+
199+
// Conditions holds standard Kubernetes status conditions.
200+
// The "Ready" condition supports `kubectl wait --for=condition=Ready`.
201+
// +optional
202+
// +listType=map
203+
// +listMapKey=type
204+
Conditions []metav1.Condition `json:"conditions,omitempty"`
44205
}
45206

46207
// +kubebuilder:object:root=true
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/*
2+
Copyright 2024.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package v1
18+
19+
import (
20+
"context"
21+
"fmt"
22+
23+
"k8s.io/apimachinery/pkg/runtime"
24+
ctrl "sigs.k8s.io/controller-runtime"
25+
logf "sigs.k8s.io/controller-runtime/pkg/log"
26+
"sigs.k8s.io/controller-runtime/pkg/webhook"
27+
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
28+
)
29+
30+
var dpuconfiglog = logf.Log.WithName("dataprocessingunitconfig-resource")
31+
32+
// SetupWebhookWithManager registers the validating webhook for DataProcessingUnitConfig.
33+
// Vendor-specific defaults (e.g. default firmware image) and allowlist enforcement are
34+
// handled inside each VSP process, which is the only component that knows the vendor.
35+
func (r *DataProcessingUnitConfig) SetupWebhookWithManager(mgr ctrl.Manager) error {
36+
return ctrl.NewWebhookManagedBy(mgr).
37+
For(r).
38+
WithValidator(r).
39+
Complete()
40+
}
41+
42+
// +kubebuilder:webhook:path=/validate-config-openshift-io-v1-dataprocessingunitconfig,mutating=false,failurePolicy=fail,sideEffects=None,groups=config.openshift.io,resources=dataprocessingunitconfigs,verbs=create;update,versions=v1,name=vdataprocessingunitconfig.kb.io,admissionReviewVersions=v1
43+
44+
var _ webhook.CustomValidator = &DataProcessingUnitConfig{}
45+
46+
// validateDataProcessingUnitConfig performs vendor-neutral structural validation.
47+
func validateDataProcessingUnitConfig(cfg *DataProcessingUnitConfig) (admission.Warnings, error) {
48+
if cfg.Spec.DpuManagement.Operation == DpuOpFirmwareUpgrade &&
49+
cfg.Spec.DpuManagement.Firmware == nil {
50+
return nil, fmt.Errorf("spec.dpuManagement.firmware is required when operation is FirmwareUpgrade")
51+
}
52+
return nil, nil
53+
}
54+
55+
// ValidateCreate validates a new DataProcessingUnitConfig on creation.
56+
func (r *DataProcessingUnitConfig) ValidateCreate(ctx context.Context, obj runtime.Object) (admission.Warnings, error) {
57+
cfg := obj.(*DataProcessingUnitConfig)
58+
dpuconfiglog.Info("validate create", "name", cfg.Name)
59+
return validateDataProcessingUnitConfig(cfg)
60+
}
61+
62+
// ValidateUpdate validates changes to an existing DataProcessingUnitConfig.
63+
func (r *DataProcessingUnitConfig) ValidateUpdate(ctx context.Context, oldObj runtime.Object, newObj runtime.Object) (admission.Warnings, error) {
64+
cfg := newObj.(*DataProcessingUnitConfig)
65+
dpuconfiglog.Info("validate update", "name", cfg.Name)
66+
return validateDataProcessingUnitConfig(cfg)
67+
}
68+
69+
// ValidateDelete allows deletion without restriction.
70+
func (r *DataProcessingUnitConfig) ValidateDelete(ctx context.Context, obj runtime.Object) (admission.Warnings, error) {
71+
return nil, nil
72+
}

api/v1/zz_generated.deepcopy.go

Lines changed: 16 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cmd/main.go

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -138,14 +138,20 @@ func main() {
138138
setupLog.Error(err, "unable to create webhook", "webhook", "DpuOperatorConfig")
139139
os.Exit(1)
140140
}
141+
if err = (&configv1.DataProcessingUnitConfig{}).SetupWebhookWithManager(mgr); err != nil {
142+
setupLog.Error(err, "unable to create webhook", "webhook", "DataProcessingUnitConfig")
143+
os.Exit(1)
144+
}
141145
}
142-
if err := (&controller.DataProcessingUnitConfigReconciler{
143-
Client: mgr.GetClient(),
144-
Scheme: mgr.GetScheme(),
145-
}).SetupWithManager(mgr); err != nil {
146-
setupLog.Error(err, "unable to create controller", "controller", "DataProcessingUnitConfig")
147-
os.Exit(1)
148-
}
146+
// Note: DataProcessingUnitConfig webhook validates generic structure only.
147+
// Vendor-specific firmware defaults and registry allowlists are enforced in each VSP.
148+
// if err := (&controller.DataProcessingUnitConfigReconciler{
149+
// Client: mgr.GetClient(),
150+
// Scheme: mgr.GetScheme(),
151+
// }).SetupWithManager(mgr); err != nil {
152+
// setupLog.Error(err, "unable to create controller", "controller", "DataProcessingUnitConfig")
153+
// os.Exit(1)
154+
// }
149155
//+kubebuilder:scaffold:builder
150156

151157
if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {

0 commit comments

Comments
 (0)