From e49657577d5747b726ea77c4d7cf9845d8552c9a Mon Sep 17 00:00:00 2001 From: Byonggon Chun Date: Thu, 3 Apr 2025 21:50:07 +0100 Subject: [PATCH 01/12] encapsulate Checkpoint internal state Co-authored-by: Jon Huhn --- cmd/dra-example-kubeletplugin/checkpoint.go | 33 +++++++++++++++++++ cmd/dra-example-kubeletplugin/state.go | 19 +++++------ .../profiles/profiles_test.go | 8 ++--- 3 files changed, 45 insertions(+), 15 deletions(-) rename cmd/dra-example-kubeletplugin/state_test.go => internal/profiles/profiles_test.go (90%) diff --git a/cmd/dra-example-kubeletplugin/checkpoint.go b/cmd/dra-example-kubeletplugin/checkpoint.go index 7311e760..160e163a 100644 --- a/cmd/dra-example-kubeletplugin/checkpoint.go +++ b/cmd/dra-example-kubeletplugin/checkpoint.go @@ -3,14 +3,21 @@ package main import ( "encoding/json" + "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum" + + "sigs.k8s.io/dra-example-driver/internal/profiles" ) +type PreparedClaims map[string]profiles.PreparedDevices + type Checkpoint struct { Checksum checksum.Checksum `json:"checksum"` V1 *CheckpointV1 `json:"v1,omitempty"` } +var _ checkpointmanager.Checkpoint = &Checkpoint{} + type CheckpointV1 struct { PreparedClaims PreparedClaims `json:"preparedClaims,omitempty"` } @@ -25,6 +32,32 @@ func newCheckpoint() *Checkpoint { return pc } +func (cp *Checkpoint) GetPreparedDevices(claimUID string) profiles.PreparedDevices { + if cp.V1 == nil { + return nil + } + if devices, ok := cp.V1.PreparedClaims[claimUID]; ok { + return devices + } + return nil +} + +func (cp *Checkpoint) AddPreparedDevices(claimUID string, pds profiles.PreparedDevices) { + if cp.V1 == nil { + return + } + + cp.V1.PreparedClaims[claimUID] = pds +} + +func (cp *Checkpoint) RemovePreparedDevices(claimUID string) { + if cp.V1 == nil { + return + } + + delete(cp.V1.PreparedClaims, claimUID) +} + func (cp *Checkpoint) MarshalCheckpoint() ([]byte, error) { cp.Checksum = 0 out, err := json.Marshal(*cp) diff --git a/cmd/dra-example-kubeletplugin/state.go b/cmd/dra-example-kubeletplugin/state.go index 824c880e..4a06a636 100644 --- a/cmd/dra-example-kubeletplugin/state.go +++ b/cmd/dra-example-kubeletplugin/state.go @@ -32,7 +32,6 @@ import ( ) type AllocatableDevices map[string]resourceapi.Device -type PreparedClaims map[string]profiles.PreparedDevices type OpaqueDeviceConfig struct { Requests []string @@ -134,10 +133,10 @@ func (s *DeviceState) Prepare(claim *resourceapi.ResourceClaim) ([]*drapbv1.Devi if err := s.checkpointManager.GetCheckpoint(DriverPluginCheckpointFile, checkpoint); err != nil { return nil, fmt.Errorf("unable to sync from checkpoint: %v", err) } - preparedClaims := checkpoint.V1.PreparedClaims - if preparedClaims[claimUID] != nil { - return preparedClaims[claimUID].GetDevices(), nil + preparedDevices := checkpoint.GetPreparedDevices(claimUID) + if preparedDevices != nil { + return preparedDevices.GetDevices(), nil } preparedDevices, err := s.prepareDevices(claim) if err != nil { @@ -148,12 +147,12 @@ func (s *DeviceState) Prepare(claim *resourceapi.ResourceClaim) ([]*drapbv1.Devi return nil, fmt.Errorf("unable to create CDI spec file for claim: %v", err) } - preparedClaims[claimUID] = preparedDevices + checkpoint.AddPreparedDevices(claimUID, preparedDevices) if err := s.checkpointManager.CreateCheckpoint(DriverPluginCheckpointFile, checkpoint); err != nil { return nil, fmt.Errorf("unable to sync to checkpoint: %v", err) } - return preparedClaims[claimUID].GetDevices(), nil + return preparedDevices.GetDevices(), nil } func (s *DeviceState) Unprepare(claimUID string) error { @@ -167,13 +166,13 @@ func (s *DeviceState) Unprepare(claimUID string) error { return fmt.Errorf("unable to create new checkpoint: %v", err) } } - preparedClaims := checkpoint.V1.PreparedClaims - if preparedClaims[claimUID] == nil { + preparedDevices := checkpoint.GetPreparedDevices(claimUID) + if preparedDevices == nil { return nil } - if err := s.unprepareDevices(claimUID, preparedClaims[claimUID]); err != nil { + if err := s.unprepareDevices(claimUID, preparedDevices); err != nil { return fmt.Errorf("unprepare failed: %v", err) } @@ -182,7 +181,7 @@ func (s *DeviceState) Unprepare(claimUID string) error { return fmt.Errorf("unable to delete CDI spec file for claim: %v", err) } - delete(preparedClaims, claimUID) + checkpoint.RemovePreparedDevices(claimUID) if err := s.checkpointManager.CreateCheckpoint(DriverPluginCheckpointFile, checkpoint); err != nil { return fmt.Errorf("unable to sync to checkpoint: %v", err) } diff --git a/cmd/dra-example-kubeletplugin/state_test.go b/internal/profiles/profiles_test.go similarity index 90% rename from cmd/dra-example-kubeletplugin/state_test.go rename to internal/profiles/profiles_test.go index d23b3859..e73d9348 100644 --- a/cmd/dra-example-kubeletplugin/state_test.go +++ b/internal/profiles/profiles_test.go @@ -14,21 +14,19 @@ * limitations under the License. */ -package main +package profiles import ( "testing" "github.com/stretchr/testify/assert" - "sigs.k8s.io/dra-example-driver/internal/profiles" - drapbv1 "k8s.io/kubelet/pkg/apis/dra/v1beta1" ) func TestPreparedDevicesGetDevices(t *testing.T) { tests := map[string]struct { - preparedDevices profiles.PreparedDevices + preparedDevices PreparedDevices expected []*drapbv1.Device }{ "nil PreparedDevices": { @@ -36,7 +34,7 @@ func TestPreparedDevicesGetDevices(t *testing.T) { expected: nil, }, "several PreparedDevices": { - preparedDevices: profiles.PreparedDevices{ + preparedDevices: PreparedDevices{ {Device: drapbv1.Device{DeviceName: "dev1"}}, {Device: drapbv1.Device{DeviceName: "dev2"}}, {Device: drapbv1.Device{DeviceName: "dev3"}}, From bbb148867973a506b36adec492bf10eafab1b807 Mon Sep 17 00:00:00 2001 From: Jon Huhn Date: Wed, 4 Feb 2026 20:53:48 -0600 Subject: [PATCH 02/12] Replace k/k checkpoint manager with file reads/writes --- cmd/dra-example-kubeletplugin/checkpoint.go | 103 +++++++++++------ .../checkpoint_test.go | 108 ++++++++++++++++++ cmd/dra-example-kubeletplugin/state.go | 71 ++++++------ go.mod | 3 +- go.sum | 6 +- 5 files changed, 209 insertions(+), 82 deletions(-) create mode 100644 cmd/dra-example-kubeletplugin/checkpoint_test.go diff --git a/cmd/dra-example-kubeletplugin/checkpoint.go b/cmd/dra-example-kubeletplugin/checkpoint.go index 160e163a..b2ff0a60 100644 --- a/cmd/dra-example-kubeletplugin/checkpoint.go +++ b/cmd/dra-example-kubeletplugin/checkpoint.go @@ -1,10 +1,26 @@ +/* + * Copyright The Kubernetes Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package main import ( "encoding/json" - - "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" - "k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum" + "fmt" + "os" + "path/filepath" "sigs.k8s.io/dra-example-driver/internal/profiles" ) @@ -12,26 +28,60 @@ import ( type PreparedClaims map[string]profiles.PreparedDevices type Checkpoint struct { - Checksum checksum.Checksum `json:"checksum"` - V1 *CheckpointV1 `json:"v1,omitempty"` + V1 *CheckpointV1 `json:"v1,omitempty"` } -var _ checkpointmanager.Checkpoint = &Checkpoint{} - type CheckpointV1 struct { PreparedClaims PreparedClaims `json:"preparedClaims,omitempty"` } func newCheckpoint() *Checkpoint { pc := &Checkpoint{ - Checksum: 0, - V1: &CheckpointV1{ - PreparedClaims: make(PreparedClaims), - }, + V1: &CheckpointV1{}, } return pc } +func readCheckpoint(path string) (*Checkpoint, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, err + } + checkpoint := new(Checkpoint) + err = json.Unmarshal(data, checkpoint) + if err != nil { + return nil, fmt.Errorf("unmarshal json from %s: %w", path, err) + } + return checkpoint, nil +} + +func writeCheckpoint(path string, checkpoint *Checkpoint) (err error) { + data, err := json.Marshal(checkpoint) + if err != nil { + return fmt.Errorf("marshal json: %w", err) + } + dir := filepath.Dir(path) + tmp, err := os.CreateTemp(dir, "tmp-checkpoint-*") + if err != nil { + return fmt.Errorf("create temp file in %s: %w", dir, err) + } + defer func() { + if err1 := tmp.Close(); err1 != nil && err == nil { + err = fmt.Errorf("close temp file: %w", err1) + } + }() + if _, err := tmp.Write(data); err != nil { + return fmt.Errorf("write to temp file %s: %w", tmp.Name(), err) + } + if err := tmp.Sync(); err != nil { + return fmt.Errorf("sync temp file: %w", err) + } + if err := os.Rename(tmp.Name(), path); err != nil { + return fmt.Errorf("rename %s to %s: %w", tmp.Name(), path, err) + } + return nil +} + func (cp *Checkpoint) GetPreparedDevices(claimUID string) profiles.PreparedDevices { if cp.V1 == nil { return nil @@ -47,6 +97,10 @@ func (cp *Checkpoint) AddPreparedDevices(claimUID string, pds profiles.PreparedD return } + if cp.V1.PreparedClaims == nil { + cp.V1.PreparedClaims = make(PreparedClaims) + } + cp.V1.PreparedClaims[claimUID] = pds } @@ -57,30 +111,3 @@ func (cp *Checkpoint) RemovePreparedDevices(claimUID string) { delete(cp.V1.PreparedClaims, claimUID) } - -func (cp *Checkpoint) MarshalCheckpoint() ([]byte, error) { - cp.Checksum = 0 - out, err := json.Marshal(*cp) - if err != nil { - return nil, err - } - cp.Checksum = checksum.New(out) - return json.Marshal(*cp) -} - -func (cp *Checkpoint) UnmarshalCheckpoint(data []byte) error { - return json.Unmarshal(data, cp) -} - -func (cp *Checkpoint) VerifyChecksum() error { - ck := cp.Checksum - cp.Checksum = 0 - defer func() { - cp.Checksum = ck - }() - out, err := json.Marshal(*cp) - if err != nil { - return err - } - return ck.Verify(out) -} diff --git a/cmd/dra-example-kubeletplugin/checkpoint_test.go b/cmd/dra-example-kubeletplugin/checkpoint_test.go new file mode 100644 index 00000000..9124ad7a --- /dev/null +++ b/cmd/dra-example-kubeletplugin/checkpoint_test.go @@ -0,0 +1,108 @@ +/* + * Copyright The Kubernetes Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "io/fs" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + drapbv1 "k8s.io/kubelet/pkg/apis/dra/v1beta1" + "tags.cncf.io/container-device-interface/pkg/cdi" + "tags.cncf.io/container-device-interface/specs-go" + + "sigs.k8s.io/dra-example-driver/internal/profiles" +) + +func TestReadWriteCheckpointRoundtrip(t *testing.T) { + tests := map[string]struct { + checkpoint *Checkpoint + }{ + "new checkpoint": { + checkpoint: newCheckpoint(), + }, + "populated checkpoint": { + checkpoint: &Checkpoint{ + V1: &CheckpointV1{ + PreparedClaims{ + "uid": profiles.PreparedDevices{ + { + Device: drapbv1.Device{ + RequestNames: []string{"req"}, + PoolName: "pool", + DeviceName: "dev", + CdiDeviceIds: []string{"id"}, + }, + ContainerEdits: &cdi.ContainerEdits{ + ContainerEdits: &specs.ContainerEdits{ + Env: []string{"KEY=value"}, + }, + }, + AdminAccess: true, + }, + }, + }, + }, + }, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, DriverPluginCheckpointFile) + + checkpoint, err := readCheckpoint(path) + assert.Nil(t, checkpoint) + assert.ErrorIs(t, err, fs.ErrNotExist) + + checkpoint = test.checkpoint + err = writeCheckpoint(path, checkpoint) + require.NoError(t, err) + + read, err := readCheckpoint(path) + require.NoError(t, err) + assert.Equal(t, test.checkpoint, read) + }) + } + + // The checkpoint format used to contain a checksum. This test ensures that + // checkpoints written in the old format can still be read. + t.Run("old checkpoint format", func(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, DriverPluginCheckpointFile) + + old := `{ + "checksum": 1, + "v1": { + "preparedClaims": { + "uid": [] + } + } + }` + + err := os.WriteFile(path, []byte(old), 0o600) + require.NoError(t, err) + + checkpoint, err := readCheckpoint(path) + assert.NoError(t, err) + assert.NotNil(t, checkpoint.V1.PreparedClaims) + }) +} diff --git a/cmd/dra-example-kubeletplugin/state.go b/cmd/dra-example-kubeletplugin/state.go index 4a06a636..7670d39b 100644 --- a/cmd/dra-example-kubeletplugin/state.go +++ b/cmd/dra-example-kubeletplugin/state.go @@ -17,7 +17,10 @@ package main import ( + "errors" "fmt" + "io/fs" + "path/filepath" "slices" "sync" @@ -26,7 +29,6 @@ import ( "k8s.io/apimachinery/pkg/runtime/serializer/json" "k8s.io/dynamic-resource-allocation/resourceslice" drapbv1 "k8s.io/kubelet/pkg/apis/dra/v1beta1" - "k8s.io/kubernetes/pkg/kubelet/checkpointmanager" "sigs.k8s.io/dra-example-driver/internal/profiles" ) @@ -40,13 +42,13 @@ type OpaqueDeviceConfig struct { type DeviceState struct { sync.Mutex - driverName string - cdi *CDIHandler - driverResources resourceslice.DriverResources - allocatable AllocatableDevices - checkpointManager checkpointmanager.CheckpointManager - configDecoder runtime.Decoder - configHandler profiles.ConfigHandler + driverName string + cdi *CDIHandler + driverResources resourceslice.DriverResources + allocatable AllocatableDevices + configDecoder runtime.Decoder + configHandler profiles.ConfigHandler + checkpointPath string } func NewDeviceState(config *Config) (*DeviceState, error) { @@ -65,11 +67,6 @@ func NewDeviceState(config *Config) (*DeviceState, error) { return nil, fmt.Errorf("unable to create CDI spec file for common edits: %v", err) } - checkpointManager, err := checkpointmanager.NewCheckpointManager(config.DriverPluginPath()) - if err != nil { - return nil, fmt.Errorf("unable to create checkpoint manager: %v", err) - } - configScheme := runtime.NewScheme() configHandler := config.profile sb := configHandler.SchemeBuilder() @@ -95,28 +92,24 @@ func NewDeviceState(config *Config) (*DeviceState, error) { } state := &DeviceState{ - driverName: config.flags.driverName, - cdi: cdi, - driverResources: driverResources, - allocatable: allocatable, - checkpointManager: checkpointManager, - configDecoder: decoder, - configHandler: configHandler, + driverName: config.flags.driverName, + cdi: cdi, + driverResources: driverResources, + allocatable: allocatable, + configDecoder: decoder, + configHandler: configHandler, + checkpointPath: filepath.Join(config.DriverPluginPath(), DriverPluginCheckpointFile), } - checkpoints, err := state.checkpointManager.ListCheckpoints() - if err != nil { - return nil, fmt.Errorf("unable to list checkpoints: %v", err) + _, err = readCheckpoint(state.checkpointPath) + if err != nil && !errors.Is(err, fs.ErrNotExist) { + return nil, fmt.Errorf("failed to read checkpoint: %w", err) } - - for _, c := range checkpoints { - if c == DriverPluginCheckpointFile { - return state, nil - } + if err == nil { + return state, nil } - checkpoint := newCheckpoint() - if err := state.checkpointManager.CreateCheckpoint(DriverPluginCheckpointFile, checkpoint); err != nil { + if err := writeCheckpoint(state.checkpointPath, newCheckpoint()); err != nil { return nil, fmt.Errorf("unable to sync to checkpoint: %v", err) } @@ -129,8 +122,8 @@ func (s *DeviceState) Prepare(claim *resourceapi.ResourceClaim) ([]*drapbv1.Devi claimUID := string(claim.UID) - checkpoint := newCheckpoint() - if err := s.checkpointManager.GetCheckpoint(DriverPluginCheckpointFile, checkpoint); err != nil { + checkpoint, err := readCheckpoint(s.checkpointPath) + if err != nil { return nil, fmt.Errorf("unable to sync from checkpoint: %v", err) } @@ -138,7 +131,7 @@ func (s *DeviceState) Prepare(claim *resourceapi.ResourceClaim) ([]*drapbv1.Devi if preparedDevices != nil { return preparedDevices.GetDevices(), nil } - preparedDevices, err := s.prepareDevices(claim) + preparedDevices, err = s.prepareDevices(claim) if err != nil { return nil, fmt.Errorf("prepare failed: %v", err) } @@ -148,7 +141,7 @@ func (s *DeviceState) Prepare(claim *resourceapi.ResourceClaim) ([]*drapbv1.Devi } checkpoint.AddPreparedDevices(claimUID, preparedDevices) - if err := s.checkpointManager.CreateCheckpoint(DriverPluginCheckpointFile, checkpoint); err != nil { + if err := writeCheckpoint(s.checkpointPath, checkpoint); err != nil { return nil, fmt.Errorf("unable to sync to checkpoint: %v", err) } @@ -159,10 +152,10 @@ func (s *DeviceState) Unprepare(claimUID string) error { s.Lock() defer s.Unlock() - checkpoint := newCheckpoint() - if err := s.checkpointManager.GetCheckpoint(DriverPluginCheckpointFile, checkpoint); err != nil { + checkpoint, err := readCheckpoint(s.checkpointPath) + if err != nil { checkpoint = newCheckpoint() - if err := s.checkpointManager.CreateCheckpoint(DriverPluginCheckpointFile, checkpoint); err != nil { + if err := writeCheckpoint(s.checkpointPath, checkpoint); err != nil { return fmt.Errorf("unable to create new checkpoint: %v", err) } } @@ -176,13 +169,13 @@ func (s *DeviceState) Unprepare(claimUID string) error { return fmt.Errorf("unprepare failed: %v", err) } - err := s.cdi.DeleteClaimSpecFile(claimUID) + err = s.cdi.DeleteClaimSpecFile(claimUID) if err != nil { return fmt.Errorf("unable to delete CDI spec file for claim: %v", err) } checkpoint.RemovePreparedDevices(claimUID) - if err := s.checkpointManager.CreateCheckpoint(DriverPluginCheckpointFile, checkpoint); err != nil { + if err := writeCheckpoint(s.checkpointPath, checkpoint); err != nil { return fmt.Errorf("unable to sync to checkpoint: %v", err) } diff --git a/go.mod b/go.mod index 6b505244..e68cbc4e 100644 --- a/go.mod +++ b/go.mod @@ -15,7 +15,6 @@ require ( k8s.io/dynamic-resource-allocation v0.35.3 k8s.io/klog/v2 v2.140.0 k8s.io/kubelet v0.35.3 - k8s.io/kubernetes v1.35.3 k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 tags.cncf.io/container-device-interface v1.1.0 tags.cncf.io/container-device-interface/specs-go v1.1.0 @@ -47,12 +46,14 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/opencontainers/runtime-spec v1.3.0 // indirect github.com/opencontainers/runtime-tools v0.9.1-0.20251114084447-edf4cb3d2116 // indirect + github.com/opencontainers/selinux v1.13.0 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/prometheus/client_golang v1.23.2 // indirect github.com/prometheus/client_model v0.6.2 // indirect github.com/prometheus/common v0.66.1 // indirect github.com/prometheus/procfs v0.16.1 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/sirupsen/logrus v1.9.3 // indirect github.com/spf13/cobra v1.10.0 // indirect github.com/x448/float16 v0.8.4 // indirect github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect diff --git a/go.sum b/go.sum index 33caf394..7a1b11f2 100644 --- a/go.sum +++ b/go.sum @@ -2,8 +2,6 @@ cyphar.com/go-pathrs v0.2.1 h1:9nx1vOgwVvX1mNBWDu93+vaceedpbsDqo+XuBGL40b8= cyphar.com/go-pathrs v0.2.1/go.mod h1:y8f1EMG7r+hCuFf/rXsKqMJrJAUoADZGNh5/vZPKcGc= github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0= github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= -github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= -github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= @@ -124,6 +122,7 @@ github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpE github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= @@ -173,6 +172,7 @@ golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw= golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ= golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/term v0.39.0 h1:RclSuaJf32jOqZz74CkPA9qFuVTX7vhLlpfj/IGWlqY= @@ -217,8 +217,6 @@ k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 h1:Y3gxNAuB0OBLImH611+UDZ k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912/go.mod h1:kdmbQkyfwUagLfXIad1y2TdrjPFWp2Q89B3qkRwf/pQ= k8s.io/kubelet v0.35.3 h1:Y6b9+U/aTBmou9JZ6qv18O4dpFbJOfl7cBe+ZksT7RY= k8s.io/kubelet v0.35.3/go.mod h1:aWoMogtyUEf/mTl8VjqHbSkW5ZZkB8vTkrg9Fi6TKwE= -k8s.io/kubernetes v1.35.3 h1:J3dk2wybKFHwoH4eydDUGHJo4HAD+9CZbSlvk/YQuao= -k8s.io/kubernetes v1.35.3/go.mod h1:AaPpCpiS8oAqRbEwpY5r3RitLpwpVp5lVXKFkJril58= k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 h1:SjGebBtkBqHFOli+05xYbK8YF1Dzkbzn+gDM4X9T4Ck= k8s.io/utils v0.0.0-20251002143259-bc988d571ff4/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg= From 840568511065fccaf844e64e05c8dea793d8d130 Mon Sep 17 00:00:00 2001 From: Jon Huhn Date: Thu, 16 Apr 2026 15:24:11 -0500 Subject: [PATCH 03/12] Rework Checkpoint API --- Makefile | 18 ++- cmd/dra-example-kubeletplugin/cdi.go | 4 +- cmd/dra-example-kubeletplugin/checkpoint.go | 69 ++-------- .../checkpoint_test.go | 69 +++------- cmd/dra-example-kubeletplugin/state.go | 125 +++++++++++++----- .../dra-example-kubeletplugin/state_test.go | 2 +- common.mk | 2 +- docker/Dockerfile.devel | 1 + internal/api/checkpoint/doc.go | 23 ++++ internal/api/checkpoint/install/install.go | 43 ++++++ .../api/checkpoint/install/roundtrip_test.go | 27 ++++ internal/api/checkpoint/register.go | 43 ++++++ internal/api/checkpoint/types.go | 41 ++++++ internal/api/checkpoint/v1alpha1/doc.go | 22 +++ internal/api/checkpoint/v1alpha1/register.go | 46 +++++++ internal/api/checkpoint/v1alpha1/types.go | 41 ++++++ .../v1alpha1/zz_generated.conversion.go | 101 ++++++++++++++ .../v1alpha1/zz_generated.deepcopy.go | 69 ++++++++++ .../api/checkpoint/zz_generated.deepcopy.go | 69 ++++++++++ internal/profiles/profiles.go | 17 --- 20 files changed, 661 insertions(+), 171 deletions(-) rename internal/profiles/profiles_test.go => cmd/dra-example-kubeletplugin/state_test.go (98%) create mode 100644 internal/api/checkpoint/doc.go create mode 100644 internal/api/checkpoint/install/install.go create mode 100644 internal/api/checkpoint/install/roundtrip_test.go create mode 100644 internal/api/checkpoint/register.go create mode 100644 internal/api/checkpoint/types.go create mode 100644 internal/api/checkpoint/v1alpha1/doc.go create mode 100644 internal/api/checkpoint/v1alpha1/register.go create mode 100644 internal/api/checkpoint/v1alpha1/types.go create mode 100644 internal/api/checkpoint/v1alpha1/zz_generated.conversion.go create mode 100644 internal/api/checkpoint/v1alpha1/zz_generated.deepcopy.go create mode 100644 internal/api/checkpoint/zz_generated.deepcopy.go diff --git a/Makefile b/Makefile index 586ed42e..3a70e993 100644 --- a/Makefile +++ b/Makefile @@ -102,15 +102,24 @@ coverage: test cat $(COVERAGE_FILE) | grep -v "_mock.go" > $(COVERAGE_FILE).no-mocks go tool cover -func=$(COVERAGE_FILE).no-mocks -generate: generate-deepcopy +generate: generate-deepcopy generate-conversion generate-deepcopy: for api in $(APIS); do \ - rm -f $(CURDIR)/api/$(VENDOR)/resource/$${api}/zz_generated.deepcopy.go; \ + rm -f $${api}/zz_generated.deepcopy.go; \ controller-gen \ object:headerFile=$(CURDIR)/hack/boilerplate.generatego.txt \ - paths=$(CURDIR)/api/$(VENDOR)/resource/$${api}/ \ - output:object:dir=$(CURDIR)/api/$(VENDOR)/resource/$${api}; \ + paths=$${api}/ \ + output:object:dir=$${api}; \ + done + +generate-conversion: + for api in $(APIS); do \ + rm -f $${api}/zz_generated.conversion.go; \ + conversion-gen \ + --go-header-file=$(CURDIR)/hack/boilerplate.generatego.txt \ + --output-file=zz_generated.conversion.go \ + $${api}/; \ done setup-e2e: @@ -128,7 +137,6 @@ teardown-e2e: .build-image: docker/Dockerfile.devel if [ x"$(SKIP_IMAGE_BUILD)" = x"" ]; then \ $(CONTAINER_TOOL) build \ - --progress=plain \ --build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \ --tag $(BUILDIMAGE) \ -f $(^) \ diff --git a/cmd/dra-example-kubeletplugin/cdi.go b/cmd/dra-example-kubeletplugin/cdi.go index 3fe2a84b..f761b318 100644 --- a/cmd/dra-example-kubeletplugin/cdi.go +++ b/cmd/dra-example-kubeletplugin/cdi.go @@ -25,8 +25,6 @@ import ( cdiapi "tags.cncf.io/container-device-interface/pkg/cdi" cdiparser "tags.cncf.io/container-device-interface/pkg/parser" cdispec "tags.cncf.io/container-device-interface/specs-go" - - "sigs.k8s.io/dra-example-driver/internal/profiles" ) const cdiCommonDeviceName = "common" @@ -85,7 +83,7 @@ func (cdi *CDIHandler) CreateCommonSpecFile() error { return cdi.cache.WriteSpec(spec, specName) } -func (cdi *CDIHandler) CreateClaimSpecFile(claimUID string, devices profiles.PreparedDevices) error { +func (cdi *CDIHandler) CreateClaimSpecFile(claimUID string, devices PreparedDevices) error { specName := cdiapi.GenerateTransientSpecName(cdi.vendor(), cdi.class, claimUID) spec := &cdispec.Spec{ diff --git a/cmd/dra-example-kubeletplugin/checkpoint.go b/cmd/dra-example-kubeletplugin/checkpoint.go index b2ff0a60..ed3087a8 100644 --- a/cmd/dra-example-kubeletplugin/checkpoint.go +++ b/cmd/dra-example-kubeletplugin/checkpoint.go @@ -17,49 +17,30 @@ package main import ( - "encoding/json" "fmt" "os" "path/filepath" - "sigs.k8s.io/dra-example-driver/internal/profiles" -) - -type PreparedClaims map[string]profiles.PreparedDevices - -type Checkpoint struct { - V1 *CheckpointV1 `json:"v1,omitempty"` -} + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/utils/ptr" -type CheckpointV1 struct { - PreparedClaims PreparedClaims `json:"preparedClaims,omitempty"` -} - -func newCheckpoint() *Checkpoint { - pc := &Checkpoint{ - V1: &CheckpointV1{}, - } - return pc -} + checkpointapi "sigs.k8s.io/dra-example-driver/internal/api/checkpoint" +) -func readCheckpoint(path string) (*Checkpoint, error) { +func readCheckpoint(path string, decoder runtime.Decoder) (*checkpointapi.Checkpoint, error) { data, err := os.ReadFile(path) if err != nil { return nil, err } - checkpoint := new(Checkpoint) - err = json.Unmarshal(data, checkpoint) + checkpoint := new(checkpointapi.Checkpoint) + _, _, err = decoder.Decode(data, ptr.To(checkpointapi.SchemeGroupVersion.WithKind("Checkpoint")), checkpoint) if err != nil { return nil, fmt.Errorf("unmarshal json from %s: %w", path, err) } return checkpoint, nil } -func writeCheckpoint(path string, checkpoint *Checkpoint) (err error) { - data, err := json.Marshal(checkpoint) - if err != nil { - return fmt.Errorf("marshal json: %w", err) - } +func writeCheckpoint(path string, encoder runtime.Encoder, checkpoint *checkpointapi.Checkpoint) (err error) { dir := filepath.Dir(path) tmp, err := os.CreateTemp(dir, "tmp-checkpoint-*") if err != nil { @@ -70,8 +51,8 @@ func writeCheckpoint(path string, checkpoint *Checkpoint) (err error) { err = fmt.Errorf("close temp file: %w", err1) } }() - if _, err := tmp.Write(data); err != nil { - return fmt.Errorf("write to temp file %s: %w", tmp.Name(), err) + if err := encoder.Encode(checkpoint, tmp); err != nil { + return fmt.Errorf("encode to temp file %s: %w", tmp.Name(), err) } if err := tmp.Sync(); err != nil { return fmt.Errorf("sync temp file: %w", err) @@ -81,33 +62,3 @@ func writeCheckpoint(path string, checkpoint *Checkpoint) (err error) { } return nil } - -func (cp *Checkpoint) GetPreparedDevices(claimUID string) profiles.PreparedDevices { - if cp.V1 == nil { - return nil - } - if devices, ok := cp.V1.PreparedClaims[claimUID]; ok { - return devices - } - return nil -} - -func (cp *Checkpoint) AddPreparedDevices(claimUID string, pds profiles.PreparedDevices) { - if cp.V1 == nil { - return - } - - if cp.V1.PreparedClaims == nil { - cp.V1.PreparedClaims = make(PreparedClaims) - } - - cp.V1.PreparedClaims[claimUID] = pds -} - -func (cp *Checkpoint) RemovePreparedDevices(claimUID string) { - if cp.V1 == nil { - return - } - - delete(cp.V1.PreparedClaims, claimUID) -} diff --git a/cmd/dra-example-kubeletplugin/checkpoint_test.go b/cmd/dra-example-kubeletplugin/checkpoint_test.go index 9124ad7a..c33c6ec3 100644 --- a/cmd/dra-example-kubeletplugin/checkpoint_test.go +++ b/cmd/dra-example-kubeletplugin/checkpoint_test.go @@ -18,47 +18,28 @@ package main import ( "io/fs" - "os" "path/filepath" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - drapbv1 "k8s.io/kubelet/pkg/apis/dra/v1beta1" - "tags.cncf.io/container-device-interface/pkg/cdi" - "tags.cncf.io/container-device-interface/specs-go" + "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/dra-example-driver/internal/profiles" + checkpointapi "sigs.k8s.io/dra-example-driver/internal/api/checkpoint" ) func TestReadWriteCheckpointRoundtrip(t *testing.T) { tests := map[string]struct { - checkpoint *Checkpoint + checkpoint *checkpointapi.Checkpoint }{ "new checkpoint": { - checkpoint: newCheckpoint(), + checkpoint: new(checkpointapi.Checkpoint), }, "populated checkpoint": { - checkpoint: &Checkpoint{ - V1: &CheckpointV1{ - PreparedClaims{ - "uid": profiles.PreparedDevices{ - { - Device: drapbv1.Device{ - RequestNames: []string{"req"}, - PoolName: "pool", - DeviceName: "dev", - CdiDeviceIds: []string{"id"}, - }, - ContainerEdits: &cdi.ContainerEdits{ - ContainerEdits: &specs.ContainerEdits{ - Env: []string{"KEY=value"}, - }, - }, - AdminAccess: true, - }, - }, - }, + checkpoint: &checkpointapi.Checkpoint{ + PreparedClaims: []checkpointapi.PreparedClaim{ + {UID: types.UID("123")}, + {UID: types.UID("456")}, }, }, }, @@ -69,40 +50,22 @@ func TestReadWriteCheckpointRoundtrip(t *testing.T) { dir := t.TempDir() path := filepath.Join(dir, DriverPluginCheckpointFile) - checkpoint, err := readCheckpoint(path) + decoder, encoder, err := checkpointSerializer() + if err != nil { + t.Fatal("failed to initialize checkpoint serializer:", err) + } + + checkpoint, err := readCheckpoint(path, decoder) assert.Nil(t, checkpoint) assert.ErrorIs(t, err, fs.ErrNotExist) checkpoint = test.checkpoint - err = writeCheckpoint(path, checkpoint) + err = writeCheckpoint(path, encoder, checkpoint) require.NoError(t, err) - read, err := readCheckpoint(path) + read, err := readCheckpoint(path, decoder) require.NoError(t, err) assert.Equal(t, test.checkpoint, read) }) } - - // The checkpoint format used to contain a checksum. This test ensures that - // checkpoints written in the old format can still be read. - t.Run("old checkpoint format", func(t *testing.T) { - dir := t.TempDir() - path := filepath.Join(dir, DriverPluginCheckpointFile) - - old := `{ - "checksum": 1, - "v1": { - "preparedClaims": { - "uid": [] - } - } - }` - - err := os.WriteFile(path, []byte(old), 0o600) - require.NoError(t, err) - - checkpoint, err := readCheckpoint(path) - assert.NoError(t, err) - assert.NotNil(t, checkpoint.V1.PreparedClaims) - }) } diff --git a/cmd/dra-example-kubeletplugin/state.go b/cmd/dra-example-kubeletplugin/state.go index 7670d39b..760f5de7 100644 --- a/cmd/dra-example-kubeletplugin/state.go +++ b/cmd/dra-example-kubeletplugin/state.go @@ -26,20 +26,40 @@ import ( resourceapi "k8s.io/api/resource/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/serializer" "k8s.io/apimachinery/pkg/runtime/serializer/json" + "k8s.io/apimachinery/pkg/types" "k8s.io/dynamic-resource-allocation/resourceslice" drapbv1 "k8s.io/kubelet/pkg/apis/dra/v1beta1" + cdiapi "tags.cncf.io/container-device-interface/pkg/cdi" + checkpointapi "sigs.k8s.io/dra-example-driver/internal/api/checkpoint" + checkpointv1alpha1 "sigs.k8s.io/dra-example-driver/internal/api/checkpoint/v1alpha1" "sigs.k8s.io/dra-example-driver/internal/profiles" ) type AllocatableDevices map[string]resourceapi.Device +type PreparedDevices []*PreparedDevice type OpaqueDeviceConfig struct { Requests []string Config runtime.Object } +type PreparedDevice struct { + drapbv1.Device + ContainerEdits *cdiapi.ContainerEdits + AdminAccess bool +} + +func (pds PreparedDevices) GetDevices() []*drapbv1.Device { + var devices []*drapbv1.Device + for _, pd := range pds { + devices = append(devices, &pd.Device) + } + return devices +} + type DeviceState struct { sync.Mutex driverName string @@ -48,7 +68,10 @@ type DeviceState struct { allocatable AllocatableDevices configDecoder runtime.Decoder configHandler profiles.ConfigHandler - checkpointPath string + + checkpointPath string + checkpointDecoder runtime.Decoder + checkpointEncoder runtime.Encoder } func NewDeviceState(config *Config) (*DeviceState, error) { @@ -91,17 +114,24 @@ func NewDeviceState(config *Config) (*DeviceState, error) { } } + checkpointDecoder, checkpointEncoder, err := checkpointSerializer() + if err != nil { + return nil, err + } + state := &DeviceState{ - driverName: config.flags.driverName, - cdi: cdi, - driverResources: driverResources, - allocatable: allocatable, - configDecoder: decoder, - configHandler: configHandler, - checkpointPath: filepath.Join(config.DriverPluginPath(), DriverPluginCheckpointFile), + driverName: config.flags.driverName, + cdi: cdi, + driverResources: driverResources, + allocatable: allocatable, + configDecoder: decoder, + configHandler: configHandler, + checkpointPath: filepath.Join(config.DriverPluginPath(), DriverPluginCheckpointFile), + checkpointDecoder: checkpointDecoder, + checkpointEncoder: checkpointEncoder, } - _, err = readCheckpoint(state.checkpointPath) + _, err = readCheckpoint(state.checkpointPath, state.checkpointDecoder) if err != nil && !errors.Is(err, fs.ErrNotExist) { return nil, fmt.Errorf("failed to read checkpoint: %w", err) } @@ -109,7 +139,7 @@ func NewDeviceState(config *Config) (*DeviceState, error) { return state, nil } - if err := writeCheckpoint(state.checkpointPath, newCheckpoint()); err != nil { + if err := writeCheckpoint(state.checkpointPath, state.checkpointEncoder, new(checkpointapi.Checkpoint)); err != nil { return nil, fmt.Errorf("unable to sync to checkpoint: %v", err) } @@ -120,14 +150,15 @@ func (s *DeviceState) Prepare(claim *resourceapi.ResourceClaim) ([]*drapbv1.Devi s.Lock() defer s.Unlock() - claimUID := string(claim.UID) - - checkpoint, err := readCheckpoint(s.checkpointPath) + checkpoint, err := readCheckpoint(s.checkpointPath, s.checkpointDecoder) if err != nil { return nil, fmt.Errorf("unable to sync from checkpoint: %v", err) } - preparedDevices := checkpoint.GetPreparedDevices(claimUID) + preparedDevices, err := s.restoreCheckpoint(checkpoint, claim) + if err != nil { + return nil, fmt.Errorf("unable to restore from checkpoint: %v", err) + } if preparedDevices != nil { return preparedDevices.GetDevices(), nil } @@ -136,12 +167,14 @@ func (s *DeviceState) Prepare(claim *resourceapi.ResourceClaim) ([]*drapbv1.Devi return nil, fmt.Errorf("prepare failed: %v", err) } - if err = s.cdi.CreateClaimSpecFile(claimUID, preparedDevices); err != nil { + if err = s.cdi.CreateClaimSpecFile(string(claim.UID), preparedDevices); err != nil { return nil, fmt.Errorf("unable to create CDI spec file for claim: %v", err) } - checkpoint.AddPreparedDevices(claimUID, preparedDevices) - if err := writeCheckpoint(s.checkpointPath, checkpoint); err != nil { + if !slices.ContainsFunc(checkpoint.PreparedClaims, func(c checkpointapi.PreparedClaim) bool { return c.UID == claim.UID }) { + checkpoint.PreparedClaims = append(checkpoint.PreparedClaims, checkpointapi.PreparedClaim{UID: claim.UID}) + } + if err := writeCheckpoint(s.checkpointPath, s.checkpointEncoder, checkpoint); err != nil { return nil, fmt.Errorf("unable to sync to checkpoint: %v", err) } @@ -152,20 +185,14 @@ func (s *DeviceState) Unprepare(claimUID string) error { s.Lock() defer s.Unlock() - checkpoint, err := readCheckpoint(s.checkpointPath) + checkpoint, err := readCheckpoint(s.checkpointPath, s.checkpointDecoder) if err != nil { - checkpoint = newCheckpoint() - if err := writeCheckpoint(s.checkpointPath, checkpoint); err != nil { + if err := writeCheckpoint(s.checkpointPath, s.checkpointEncoder, new(checkpointapi.Checkpoint)); err != nil { return fmt.Errorf("unable to create new checkpoint: %v", err) } } - preparedDevices := checkpoint.GetPreparedDevices(claimUID) - if preparedDevices == nil { - return nil - } - - if err := s.unprepareDevices(claimUID, preparedDevices); err != nil { + if err := s.unprepareDevices(claimUID); err != nil { return fmt.Errorf("unprepare failed: %v", err) } @@ -174,15 +201,15 @@ func (s *DeviceState) Unprepare(claimUID string) error { return fmt.Errorf("unable to delete CDI spec file for claim: %v", err) } - checkpoint.RemovePreparedDevices(claimUID) - if err := writeCheckpoint(s.checkpointPath, checkpoint); err != nil { + checkpoint.PreparedClaims = slices.DeleteFunc(checkpoint.PreparedClaims, func(c checkpointapi.PreparedClaim) bool { return c.UID == types.UID(claimUID) }) + if err := writeCheckpoint(s.checkpointPath, s.checkpointEncoder, checkpoint); err != nil { return fmt.Errorf("unable to sync to checkpoint: %v", err) } return nil } -func (s *DeviceState) prepareDevices(claim *resourceapi.ResourceClaim) (profiles.PreparedDevices, error) { +func (s *DeviceState) prepareDevices(claim *resourceapi.ResourceClaim) (PreparedDevices, error) { if claim.Status.Allocation == nil { return nil, fmt.Errorf("claim not yet allocated") } @@ -242,10 +269,10 @@ func (s *DeviceState) prepareDevices(claim *resourceapi.ResourceClaim) (profiles // Walk through each config and its associated device allocation results // and construct the list of prepared devices to return. - var preparedDevices profiles.PreparedDevices + var preparedDevices PreparedDevices for _, results := range configResultsMap { for _, result := range results { - device := &profiles.PreparedDevice{ + device := &PreparedDevice{ Device: drapbv1.Device{ RequestNames: []string{result.Request}, PoolName: result.Pool, @@ -262,10 +289,19 @@ func (s *DeviceState) prepareDevices(claim *resourceapi.ResourceClaim) (profiles return preparedDevices, nil } -func (s *DeviceState) unprepareDevices(claimUID string, devices profiles.PreparedDevices) error { +func (s *DeviceState) unprepareDevices(claimUID string) error { return nil } +// restoreCheckpoint returns the device definitions for devices already prepared +// for the given claim. If the claim has not yet been prepared, it returns nil. +func (s *DeviceState) restoreCheckpoint(checkpoint *checkpointapi.Checkpoint, claim *resourceapi.ResourceClaim) (PreparedDevices, error) { + if slices.ContainsFunc(checkpoint.PreparedClaims, func(c checkpointapi.PreparedClaim) bool { return c.UID == claim.UID }) { + return s.prepareDevices(claim) + } + return nil, nil +} + // checkAdminAccess determines if a resource claim requires admin access. func (s *DeviceState) checkAdminAccess(claim *resourceapi.ResourceClaim) bool { if claim != nil && claim.Status.Allocation != nil { @@ -278,6 +314,31 @@ func (s *DeviceState) checkAdminAccess(claim *resourceapi.ResourceClaim) bool { return false } +func checkpointSerializer() (runtime.Decoder, runtime.Encoder, error) { + checkpointScheme := runtime.NewScheme() + sb := runtime.NewSchemeBuilder( + checkpointapi.AddToScheme, + checkpointv1alpha1.AddToScheme, + ) + if err := sb.AddToScheme(checkpointScheme); err != nil { + return nil, nil, fmt.Errorf("create checkpoint scheme: %w", err) + } + checkpointJSON := json.NewSerializerWithOptions( + json.DefaultMetaFactory, + checkpointScheme, + checkpointScheme, + json.SerializerOptions{ + Pretty: true, + Strict: true, + }, + ) + checkpointCodecFactory := serializer.NewCodecFactory(checkpointScheme) + checkpointEncoder := checkpointCodecFactory.EncoderForVersion(checkpointJSON, checkpointv1alpha1.SchemeGroupVersion) + checkpointDecoder := checkpointCodecFactory.UniversalDecoder(checkpointapi.SchemeGroupVersion) + + return checkpointDecoder, checkpointEncoder, nil +} + // GetOpaqueDeviceConfigs returns an ordered list of the configs contained in possibleConfigs for this driver. // // Configs can either come from the resource claim itself or from the device diff --git a/internal/profiles/profiles_test.go b/cmd/dra-example-kubeletplugin/state_test.go similarity index 98% rename from internal/profiles/profiles_test.go rename to cmd/dra-example-kubeletplugin/state_test.go index e73d9348..8490ebb5 100644 --- a/internal/profiles/profiles_test.go +++ b/cmd/dra-example-kubeletplugin/state_test.go @@ -14,7 +14,7 @@ * limitations under the License. */ -package profiles +package main import ( "testing" diff --git a/common.mk b/common.mk index e7410017..c04d644f 100644 --- a/common.mk +++ b/common.mk @@ -21,7 +21,7 @@ VERSION ?= vVERSION := v$(VERSION:v%=%) VENDOR := example.com -APIS := gpu/v1alpha1 +APIS := $(CURDIR)/api/$(VENDOR)/resource/gpu/v1alpha1 $(CURDIR)/internal/api/checkpoint $(CURDIR)/internal/api/checkpoint/v1alpha1 PLURAL_EXCEPTIONS = DeviceClassParameters:DeviceClassParameters PLURAL_EXCEPTIONS += GpuClaimParameters:GpuClaimParameters diff --git a/docker/Dockerfile.devel b/docker/Dockerfile.devel index 230db194..fe563c85 100644 --- a/docker/Dockerfile.devel +++ b/docker/Dockerfile.devel @@ -20,3 +20,4 @@ RUN go install github.com/gordonklaus/ineffassign@latest && \ RUN go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.52.0 RUN go install sigs.k8s.io/controller-tools/cmd/controller-gen@v0.20.0 RUN go install k8s.io/code-generator/cmd/client-gen@v0.35.0 +RUN go install k8s.io/code-generator/cmd/conversion-gen@v0.35.0 diff --git a/internal/api/checkpoint/doc.go b/internal/api/checkpoint/doc.go new file mode 100644 index 00000000..474da329 --- /dev/null +++ b/internal/api/checkpoint/doc.go @@ -0,0 +1,23 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// +k8s:deepcopy-gen=package + +// Package checkpoint contains internal (unversioned) types for checkpoints. +// These types are the canonical in-memory representation that the driver +// programs against. Versioned types (e.g. v1alpha1) are converted to/from these +// internal types via the scheme. +package checkpoint diff --git a/internal/api/checkpoint/install/install.go b/internal/api/checkpoint/install/install.go new file mode 100644 index 00000000..2ea1dc62 --- /dev/null +++ b/internal/api/checkpoint/install/install.go @@ -0,0 +1,43 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package install registers the metadata internal and versioned types with a +// runtime.Scheme. Consumers should call NewScheme() to get a scheme that can +// decode any supported metadata version into the internal types. +package install + +import ( + "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + + "sigs.k8s.io/dra-example-driver/internal/api/checkpoint" + "sigs.k8s.io/dra-example-driver/internal/api/checkpoint/v1alpha1" +) + +// Install registers the internal and v1alpha1 metadata types with the given scheme. +func Install(scheme *runtime.Scheme) { + utilruntime.Must(checkpoint.AddToScheme(scheme)) + utilruntime.Must(v1alpha1.AddToScheme(scheme)) +} + +// NewScheme returns a new runtime.Scheme with all metadata versions registered. +// The returned scheme can decode any supported metadata version into the +// internal metadata.DeviceMetadata type. +func NewScheme() *runtime.Scheme { + scheme := runtime.NewScheme() + Install(scheme) + return scheme +} diff --git a/internal/api/checkpoint/install/roundtrip_test.go b/internal/api/checkpoint/install/roundtrip_test.go new file mode 100644 index 00000000..d0ba0c0a --- /dev/null +++ b/internal/api/checkpoint/install/roundtrip_test.go @@ -0,0 +1,27 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package install + +import ( + "testing" + + "k8s.io/apimachinery/pkg/api/apitesting/roundtrip" +) + +func TestRoundTrip(t *testing.T) { + roundtrip.RoundTripTestForAPIGroup(t, Install, nil) +} diff --git a/internal/api/checkpoint/register.go b/internal/api/checkpoint/register.go new file mode 100644 index 00000000..49673e03 --- /dev/null +++ b/internal/api/checkpoint/register.go @@ -0,0 +1,43 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package checkpoint + +import ( + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +// GroupName is the group name for checkpoints. +const GroupName = "checkpoint.internal.example.com" + +// SchemeGroupVersion is the group version used to register the internal types. +var SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: runtime.APIVersionInternal} + +var ( + // SchemeBuilder is used to add types to the scheme. + SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes) + + // AddToScheme applies all the stored functions to the scheme. + AddToScheme = SchemeBuilder.AddToScheme +) + +func addKnownTypes(scheme *runtime.Scheme) error { + scheme.AddKnownTypes(SchemeGroupVersion, + &Checkpoint{}, + ) + return nil +} diff --git a/internal/api/checkpoint/types.go b/internal/api/checkpoint/types.go new file mode 100644 index 00000000..6606fcd0 --- /dev/null +++ b/internal/api/checkpoint/types.go @@ -0,0 +1,41 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package checkpoint + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" +) + +// Checkpoint contains data about devices prepared for each ResourceClaim the +// driver is responsible for. It is serialized to a versioned JSON file that can +// be read by the driver to recover intermediate state. +// +// The example driver can deterministically reconstruct the entire CDI config +// for any given claim from the ResourceClaim, so it doesn't need to persist any +// other data. Other drivers may need to include more data in their checkpoints. +// +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +type Checkpoint struct { + metav1.TypeMeta + + PreparedClaims []PreparedClaim +} + +type PreparedClaim struct { + UID types.UID +} diff --git a/internal/api/checkpoint/v1alpha1/doc.go b/internal/api/checkpoint/v1alpha1/doc.go new file mode 100644 index 00000000..753f5f18 --- /dev/null +++ b/internal/api/checkpoint/v1alpha1/doc.go @@ -0,0 +1,22 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// +k8s:deepcopy-gen=package +// +k8s:conversion-gen=sigs.k8s.io/dra-example-driver/internal/api/checkpoint + +// Package v1alpha1 contains the v1alpha1 serialization format for checkpoints. +// These types include JSON tags and are used for reading/writing files on disk. +package v1alpha1 diff --git a/internal/api/checkpoint/v1alpha1/register.go b/internal/api/checkpoint/v1alpha1/register.go new file mode 100644 index 00000000..3d3898c1 --- /dev/null +++ b/internal/api/checkpoint/v1alpha1/register.go @@ -0,0 +1,46 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +// GroupName is the group name for checkpoints. +const GroupName = "checkpoint.internal.example.com" + +// SchemeGroupVersion is the group version for v1alpha1 types. +var SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: "v1alpha1"} + +var ( + // localSchemeBuilder is used to add types and conversion functions to the scheme. + // The generated conversion code references this variable by name. + localSchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes) + + // AddToScheme applies all the stored functions to the scheme. + AddToScheme = localSchemeBuilder.AddToScheme +) + +func addKnownTypes(scheme *runtime.Scheme) error { + scheme.AddKnownTypes(SchemeGroupVersion, + &Checkpoint{}, + ) + metav1.AddToGroupVersion(scheme, SchemeGroupVersion) + return nil +} diff --git a/internal/api/checkpoint/v1alpha1/types.go b/internal/api/checkpoint/v1alpha1/types.go new file mode 100644 index 00000000..c42ba977 --- /dev/null +++ b/internal/api/checkpoint/v1alpha1/types.go @@ -0,0 +1,41 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" +) + +// Checkpoint contains data about devices prepared for each ResourceClaim the +// driver is responsible for. It is serialized to a versioned JSON file that can +// be read by the driver to recover intermediate state. +// +// The example driver can deterministically reconstruct the entire CDI config +// for any given claim from the ResourceClaim, so it doesn't need to persist any +// other data. Other drivers may need to include more data in their checkpoints. +// +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +type Checkpoint struct { + metav1.TypeMeta `json:",inline"` + + PreparedClaims []PreparedClaim `json:"preparedClaims,omitempty"` +} + +type PreparedClaim struct { + UID types.UID `json:"uid,omitempty"` +} diff --git a/internal/api/checkpoint/v1alpha1/zz_generated.conversion.go b/internal/api/checkpoint/v1alpha1/zz_generated.conversion.go new file mode 100644 index 00000000..8d5bc98d --- /dev/null +++ b/internal/api/checkpoint/v1alpha1/zz_generated.conversion.go @@ -0,0 +1,101 @@ +//go:build !ignore_autogenerated +// +build !ignore_autogenerated + +/* + * Copyright The Kubernetes Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Code generated by conversion-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + unsafe "unsafe" + + conversion "k8s.io/apimachinery/pkg/conversion" + runtime "k8s.io/apimachinery/pkg/runtime" + types "k8s.io/apimachinery/pkg/types" + checkpoint "sigs.k8s.io/dra-example-driver/internal/api/checkpoint" +) + +func init() { + localSchemeBuilder.Register(RegisterConversions) +} + +// RegisterConversions adds conversion functions to the given scheme. +// Public to allow building arbitrary schemes. +func RegisterConversions(s *runtime.Scheme) error { + if err := s.AddGeneratedConversionFunc((*Checkpoint)(nil), (*checkpoint.Checkpoint)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_Checkpoint_To_checkpoint_Checkpoint(a.(*Checkpoint), b.(*checkpoint.Checkpoint), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*checkpoint.Checkpoint)(nil), (*Checkpoint)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_checkpoint_Checkpoint_To_v1alpha1_Checkpoint(a.(*checkpoint.Checkpoint), b.(*Checkpoint), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*PreparedClaim)(nil), (*checkpoint.PreparedClaim)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_PreparedClaim_To_checkpoint_PreparedClaim(a.(*PreparedClaim), b.(*checkpoint.PreparedClaim), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*checkpoint.PreparedClaim)(nil), (*PreparedClaim)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_checkpoint_PreparedClaim_To_v1alpha1_PreparedClaim(a.(*checkpoint.PreparedClaim), b.(*PreparedClaim), scope) + }); err != nil { + return err + } + return nil +} + +func autoConvert_v1alpha1_Checkpoint_To_checkpoint_Checkpoint(in *Checkpoint, out *checkpoint.Checkpoint, s conversion.Scope) error { + out.PreparedClaims = *(*[]checkpoint.PreparedClaim)(unsafe.Pointer(&in.PreparedClaims)) + return nil +} + +// Convert_v1alpha1_Checkpoint_To_checkpoint_Checkpoint is an autogenerated conversion function. +func Convert_v1alpha1_Checkpoint_To_checkpoint_Checkpoint(in *Checkpoint, out *checkpoint.Checkpoint, s conversion.Scope) error { + return autoConvert_v1alpha1_Checkpoint_To_checkpoint_Checkpoint(in, out, s) +} + +func autoConvert_checkpoint_Checkpoint_To_v1alpha1_Checkpoint(in *checkpoint.Checkpoint, out *Checkpoint, s conversion.Scope) error { + out.PreparedClaims = *(*[]PreparedClaim)(unsafe.Pointer(&in.PreparedClaims)) + return nil +} + +// Convert_checkpoint_Checkpoint_To_v1alpha1_Checkpoint is an autogenerated conversion function. +func Convert_checkpoint_Checkpoint_To_v1alpha1_Checkpoint(in *checkpoint.Checkpoint, out *Checkpoint, s conversion.Scope) error { + return autoConvert_checkpoint_Checkpoint_To_v1alpha1_Checkpoint(in, out, s) +} + +func autoConvert_v1alpha1_PreparedClaim_To_checkpoint_PreparedClaim(in *PreparedClaim, out *checkpoint.PreparedClaim, s conversion.Scope) error { + out.UID = types.UID(in.UID) + return nil +} + +// Convert_v1alpha1_PreparedClaim_To_checkpoint_PreparedClaim is an autogenerated conversion function. +func Convert_v1alpha1_PreparedClaim_To_checkpoint_PreparedClaim(in *PreparedClaim, out *checkpoint.PreparedClaim, s conversion.Scope) error { + return autoConvert_v1alpha1_PreparedClaim_To_checkpoint_PreparedClaim(in, out, s) +} + +func autoConvert_checkpoint_PreparedClaim_To_v1alpha1_PreparedClaim(in *checkpoint.PreparedClaim, out *PreparedClaim, s conversion.Scope) error { + out.UID = types.UID(in.UID) + return nil +} + +// Convert_checkpoint_PreparedClaim_To_v1alpha1_PreparedClaim is an autogenerated conversion function. +func Convert_checkpoint_PreparedClaim_To_v1alpha1_PreparedClaim(in *checkpoint.PreparedClaim, out *PreparedClaim, s conversion.Scope) error { + return autoConvert_checkpoint_PreparedClaim_To_v1alpha1_PreparedClaim(in, out, s) +} diff --git a/internal/api/checkpoint/v1alpha1/zz_generated.deepcopy.go b/internal/api/checkpoint/v1alpha1/zz_generated.deepcopy.go new file mode 100644 index 00000000..4e1ee9ce --- /dev/null +++ b/internal/api/checkpoint/v1alpha1/zz_generated.deepcopy.go @@ -0,0 +1,69 @@ +//go:build !ignore_autogenerated + +/* + * Copyright The Kubernetes Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Code generated by controller-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + "k8s.io/apimachinery/pkg/runtime" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Checkpoint) DeepCopyInto(out *Checkpoint) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.PreparedClaims != nil { + in, out := &in.PreparedClaims, &out.PreparedClaims + *out = make([]PreparedClaim, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Checkpoint. +func (in *Checkpoint) DeepCopy() *Checkpoint { + if in == nil { + return nil + } + out := new(Checkpoint) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *Checkpoint) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PreparedClaim) DeepCopyInto(out *PreparedClaim) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PreparedClaim. +func (in *PreparedClaim) DeepCopy() *PreparedClaim { + if in == nil { + return nil + } + out := new(PreparedClaim) + in.DeepCopyInto(out) + return out +} diff --git a/internal/api/checkpoint/zz_generated.deepcopy.go b/internal/api/checkpoint/zz_generated.deepcopy.go new file mode 100644 index 00000000..d7716bcb --- /dev/null +++ b/internal/api/checkpoint/zz_generated.deepcopy.go @@ -0,0 +1,69 @@ +//go:build !ignore_autogenerated + +/* + * Copyright The Kubernetes Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Code generated by controller-gen. DO NOT EDIT. + +package checkpoint + +import ( + "k8s.io/apimachinery/pkg/runtime" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Checkpoint) DeepCopyInto(out *Checkpoint) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.PreparedClaims != nil { + in, out := &in.PreparedClaims, &out.PreparedClaims + *out = make([]PreparedClaim, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Checkpoint. +func (in *Checkpoint) DeepCopy() *Checkpoint { + if in == nil { + return nil + } + out := new(Checkpoint) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *Checkpoint) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PreparedClaim) DeepCopyInto(out *PreparedClaim) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PreparedClaim. +func (in *PreparedClaim) DeepCopy() *PreparedClaim { + if in == nil { + return nil + } + out := new(PreparedClaim) + in.DeepCopyInto(out) + return out +} diff --git a/internal/profiles/profiles.go b/internal/profiles/profiles.go index 159df3e2..3864f247 100644 --- a/internal/profiles/profiles.go +++ b/internal/profiles/profiles.go @@ -22,28 +22,11 @@ import ( resourceapi "k8s.io/api/resource/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/dynamic-resource-allocation/resourceslice" - drapbv1 "k8s.io/kubelet/pkg/apis/dra/v1beta1" cdiapi "tags.cncf.io/container-device-interface/pkg/cdi" ) type PerDeviceCDIContainerEdits map[string]*cdiapi.ContainerEdits -type PreparedDevice struct { - drapbv1.Device - ContainerEdits *cdiapi.ContainerEdits - AdminAccess bool -} - -type PreparedDevices []*PreparedDevice - -func (pds PreparedDevices) GetDevices() []*drapbv1.Device { - var devices []*drapbv1.Device - for _, pd := range pds { - devices = append(devices, &pd.Device) - } - return devices -} - // Profile describes a kind of device that can be managed by the driver. type Profile interface { ConfigHandler From 848c141124dac9a018803ed5ac974bf6c53227ef Mon Sep 17 00:00:00 2001 From: Jon Huhn Date: Fri, 17 Apr 2026 13:46:05 -0500 Subject: [PATCH 04/12] Describe how to handle checkpoints --- cmd/dra-example-kubeletplugin/driver.go | 2 +- cmd/dra-example-kubeletplugin/state.go | 59 +++++++++++++++++------ internal/api/checkpoint/types.go | 4 +- internal/api/checkpoint/v1alpha1/types.go | 4 +- 4 files changed, 51 insertions(+), 18 deletions(-) diff --git a/cmd/dra-example-kubeletplugin/driver.go b/cmd/dra-example-kubeletplugin/driver.go index fc8b970b..5c7c7d56 100644 --- a/cmd/dra-example-kubeletplugin/driver.go +++ b/cmd/dra-example-kubeletplugin/driver.go @@ -132,7 +132,7 @@ func (d *driver) UnprepareResourceClaims(ctx context.Context, claims []kubeletpl } func (d *driver) unprepareResourceClaim(_ context.Context, claim kubeletplugin.NamespacedObject) error { - if err := d.state.Unprepare(string(claim.UID)); err != nil { + if err := d.state.Unprepare(claim.UID); err != nil { return fmt.Errorf("error unpreparing devices for claim %v: %w", claim.UID, err) } diff --git a/cmd/dra-example-kubeletplugin/state.go b/cmd/dra-example-kubeletplugin/state.go index 760f5de7..e95d986c 100644 --- a/cmd/dra-example-kubeletplugin/state.go +++ b/cmd/dra-example-kubeletplugin/state.go @@ -154,26 +154,24 @@ func (s *DeviceState) Prepare(claim *resourceapi.ResourceClaim) ([]*drapbv1.Devi if err != nil { return nil, fmt.Errorf("unable to sync from checkpoint: %v", err) } - - preparedDevices, err := s.restoreCheckpoint(checkpoint, claim) + restoredDevices, err := s.restoreCheckpoint(checkpoint, claim) if err != nil { return nil, fmt.Errorf("unable to restore from checkpoint: %v", err) } - if preparedDevices != nil { - return preparedDevices.GetDevices(), nil + if restoredDevices != nil { + return restoredDevices.GetDevices(), nil } - preparedDevices, err = s.prepareDevices(claim) + + preparedDevices, err := s.prepareDevices(claim) if err != nil { return nil, fmt.Errorf("prepare failed: %v", err) } + s.addClaimToCheckpoint(checkpoint, claim, preparedDevices) if err = s.cdi.CreateClaimSpecFile(string(claim.UID), preparedDevices); err != nil { return nil, fmt.Errorf("unable to create CDI spec file for claim: %v", err) } - if !slices.ContainsFunc(checkpoint.PreparedClaims, func(c checkpointapi.PreparedClaim) bool { return c.UID == claim.UID }) { - checkpoint.PreparedClaims = append(checkpoint.PreparedClaims, checkpointapi.PreparedClaim{UID: claim.UID}) - } if err := writeCheckpoint(s.checkpointPath, s.checkpointEncoder, checkpoint); err != nil { return nil, fmt.Errorf("unable to sync to checkpoint: %v", err) } @@ -181,7 +179,7 @@ func (s *DeviceState) Prepare(claim *resourceapi.ResourceClaim) ([]*drapbv1.Devi return preparedDevices.GetDevices(), nil } -func (s *DeviceState) Unprepare(claimUID string) error { +func (s *DeviceState) Unprepare(claimUID types.UID) error { s.Lock() defer s.Unlock() @@ -192,16 +190,16 @@ func (s *DeviceState) Unprepare(claimUID string) error { } } - if err := s.unprepareDevices(claimUID); err != nil { + if err := s.unprepareDevices(claimUID, checkpoint); err != nil { return fmt.Errorf("unprepare failed: %v", err) } + s.removeClaimFromCheckpoint(checkpoint, claimUID) - err = s.cdi.DeleteClaimSpecFile(claimUID) + err = s.cdi.DeleteClaimSpecFile(string(claimUID)) if err != nil { return fmt.Errorf("unable to delete CDI spec file for claim: %v", err) } - checkpoint.PreparedClaims = slices.DeleteFunc(checkpoint.PreparedClaims, func(c checkpointapi.PreparedClaim) bool { return c.UID == types.UID(claimUID) }) if err := writeCheckpoint(s.checkpointPath, s.checkpointEncoder, checkpoint); err != nil { return fmt.Errorf("unable to sync to checkpoint: %v", err) } @@ -209,7 +207,25 @@ func (s *DeviceState) Unprepare(claimUID string) error { return nil } +// prepareDevices performs one-time setup for the devices allocated to a +// ResourceClaim before being consumed by a Pod. func (s *DeviceState) prepareDevices(claim *resourceapi.ResourceClaim) (PreparedDevices, error) { + return s.computeDeviceConfig(claim) +} + +// unprepareDevices undoes any side-effects produced by +// [DeviceState.prepareDevices]. +func (s *DeviceState) unprepareDevices(claimUID types.UID, checkpoint *checkpointapi.Checkpoint) error { + return nil +} + +// computeDeviceConfig computes the CDI config for devices allocated to the claim +// designated for this driver. It is called each time the kubelet tells the +// driver to prepare a claim which may occur more than once, and therefore +// should be deterministic and produce no side-effects. Non-deterministic data or +// side-effects should be produced by [DeviceState.prepareDevices] directly and +// recorded in the checkpoint by [DeviceState.addClaimToCheckpoint]. +func (s *DeviceState) computeDeviceConfig(claim *resourceapi.ResourceClaim) (PreparedDevices, error) { if claim.Status.Allocation == nil { return nil, fmt.Errorf("claim not yet allocated") } @@ -289,15 +305,28 @@ func (s *DeviceState) prepareDevices(claim *resourceapi.ResourceClaim) (Prepared return preparedDevices, nil } -func (s *DeviceState) unprepareDevices(claimUID string) error { - return nil +// addClaimToCheckpoint updates the checkpoint with results of preparing the +// devices for the claim. If any parts of the [PreparedDevices] are +// non-deterministic or expensive to recompute, then those should also be added +// to the checkpoint here. +func (*DeviceState) addClaimToCheckpoint(checkpoint *checkpointapi.Checkpoint, claim *resourceapi.ResourceClaim, _ PreparedDevices) { + checkpoint.PreparedClaims = append(checkpoint.PreparedClaims, checkpointapi.PreparedClaim{UID: claim.UID}) +} + +// removeClaimFromCheckpoint updates the checkpoint to remove all data +// associated with the claim. +func (*DeviceState) removeClaimFromCheckpoint(checkpoint *checkpointapi.Checkpoint, claimUID types.UID) { + checkpoint.PreparedClaims = slices.DeleteFunc(checkpoint.PreparedClaims, func(c checkpointapi.PreparedClaim) bool { return c.UID == claimUID }) } // restoreCheckpoint returns the device definitions for devices already prepared // for the given claim. If the claim has not yet been prepared, it returns nil. func (s *DeviceState) restoreCheckpoint(checkpoint *checkpointapi.Checkpoint, claim *resourceapi.ResourceClaim) (PreparedDevices, error) { if slices.ContainsFunc(checkpoint.PreparedClaims, func(c checkpointapi.PreparedClaim) bool { return c.UID == claim.UID }) { - return s.prepareDevices(claim) + // If [DeviceState.addClaimToCheckpoint] associated any other data with + // the claim in the checkpoint, then that should be added to the + // returned [PreparedDevices] here. + return s.computeDeviceConfig(claim) } return nil, nil } diff --git a/internal/api/checkpoint/types.go b/internal/api/checkpoint/types.go index 6606fcd0..79c5a690 100644 --- a/internal/api/checkpoint/types.go +++ b/internal/api/checkpoint/types.go @@ -27,7 +27,9 @@ import ( // // The example driver can deterministically reconstruct the entire CDI config // for any given claim from the ResourceClaim, so it doesn't need to persist any -// other data. Other drivers may need to include more data in their checkpoints. +// other data. Other drivers may need to include more data in their checkpoints +// if first-time setup produces non-deterministic data or side-effects that need +// to be undone when the claim is unprepared. // // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object type Checkpoint struct { diff --git a/internal/api/checkpoint/v1alpha1/types.go b/internal/api/checkpoint/v1alpha1/types.go index c42ba977..72626774 100644 --- a/internal/api/checkpoint/v1alpha1/types.go +++ b/internal/api/checkpoint/v1alpha1/types.go @@ -27,7 +27,9 @@ import ( // // The example driver can deterministically reconstruct the entire CDI config // for any given claim from the ResourceClaim, so it doesn't need to persist any -// other data. Other drivers may need to include more data in their checkpoints. +// other data. Other drivers may need to include more data in their checkpoints +// if first-time setup produces non-deterministic data or side-effects that need +// to be undone when the claim is unprepared. // // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object type Checkpoint struct { From 08a2c1a360e71003385e1240db3ca334e836fe0e Mon Sep 17 00:00:00 2001 From: Jon Huhn Date: Tue, 21 Apr 2026 11:20:15 -0500 Subject: [PATCH 05/12] revert docker build output --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 3a70e993..18d3f940 100644 --- a/Makefile +++ b/Makefile @@ -137,6 +137,7 @@ teardown-e2e: .build-image: docker/Dockerfile.devel if [ x"$(SKIP_IMAGE_BUILD)" = x"" ]; then \ $(CONTAINER_TOOL) build \ + --progress=plain \ --build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \ --tag $(BUILDIMAGE) \ -f $(^) \ From 1f1debbd96a81229f20c094aa16d5609a03a7995 Mon Sep 17 00:00:00 2001 From: Jon Huhn Date: Tue, 21 Apr 2026 11:20:46 -0500 Subject: [PATCH 06/12] json -> JSON in error message --- cmd/dra-example-kubeletplugin/checkpoint.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/dra-example-kubeletplugin/checkpoint.go b/cmd/dra-example-kubeletplugin/checkpoint.go index ed3087a8..1ce95d52 100644 --- a/cmd/dra-example-kubeletplugin/checkpoint.go +++ b/cmd/dra-example-kubeletplugin/checkpoint.go @@ -35,7 +35,7 @@ func readCheckpoint(path string, decoder runtime.Decoder) (*checkpointapi.Checkp checkpoint := new(checkpointapi.Checkpoint) _, _, err = decoder.Decode(data, ptr.To(checkpointapi.SchemeGroupVersion.WithKind("Checkpoint")), checkpoint) if err != nil { - return nil, fmt.Errorf("unmarshal json from %s: %w", path, err) + return nil, fmt.Errorf("unmarshal JSON from %s: %w", path, err) } return checkpoint, nil } From 15f3412164f0463fd49fb4b3aa57c07fcd4551ff Mon Sep 17 00:00:00 2001 From: Jon Huhn Date: Tue, 21 Apr 2026 11:22:21 -0500 Subject: [PATCH 07/12] rename resourceCheckpoint -> restoreClaimFromCheckpoint --- cmd/dra-example-kubeletplugin/state.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/dra-example-kubeletplugin/state.go b/cmd/dra-example-kubeletplugin/state.go index e95d986c..1c9ec5d0 100644 --- a/cmd/dra-example-kubeletplugin/state.go +++ b/cmd/dra-example-kubeletplugin/state.go @@ -154,7 +154,7 @@ func (s *DeviceState) Prepare(claim *resourceapi.ResourceClaim) ([]*drapbv1.Devi if err != nil { return nil, fmt.Errorf("unable to sync from checkpoint: %v", err) } - restoredDevices, err := s.restoreCheckpoint(checkpoint, claim) + restoredDevices, err := s.restoreClaimFromCheckpoint(checkpoint, claim) if err != nil { return nil, fmt.Errorf("unable to restore from checkpoint: %v", err) } @@ -319,9 +319,9 @@ func (*DeviceState) removeClaimFromCheckpoint(checkpoint *checkpointapi.Checkpoi checkpoint.PreparedClaims = slices.DeleteFunc(checkpoint.PreparedClaims, func(c checkpointapi.PreparedClaim) bool { return c.UID == claimUID }) } -// restoreCheckpoint returns the device definitions for devices already prepared +// restoreClaimFromCheckpoint returns the device definitions for devices already prepared // for the given claim. If the claim has not yet been prepared, it returns nil. -func (s *DeviceState) restoreCheckpoint(checkpoint *checkpointapi.Checkpoint, claim *resourceapi.ResourceClaim) (PreparedDevices, error) { +func (s *DeviceState) restoreClaimFromCheckpoint(checkpoint *checkpointapi.Checkpoint, claim *resourceapi.ResourceClaim) (PreparedDevices, error) { if slices.ContainsFunc(checkpoint.PreparedClaims, func(c checkpointapi.PreparedClaim) bool { return c.UID == claim.UID }) { // If [DeviceState.addClaimToCheckpoint] associated any other data with // the claim in the checkpoint, then that should be added to the From 452ec5f4a42dc3aeb2dbf970c68d0e3797ac1a41 Mon Sep 17 00:00:00 2001 From: Jon Huhn Date: Tue, 21 Apr 2026 11:54:18 -0500 Subject: [PATCH 08/12] defer writing checkpoint until NodePrepareResources --- cmd/dra-example-kubeletplugin/checkpoint.go | 18 ++++++++++++++---- .../checkpoint_test.go | 5 ++--- cmd/dra-example-kubeletplugin/state.go | 14 -------------- 3 files changed, 16 insertions(+), 21 deletions(-) diff --git a/cmd/dra-example-kubeletplugin/checkpoint.go b/cmd/dra-example-kubeletplugin/checkpoint.go index 1ce95d52..bbd7c8e8 100644 --- a/cmd/dra-example-kubeletplugin/checkpoint.go +++ b/cmd/dra-example-kubeletplugin/checkpoint.go @@ -17,7 +17,9 @@ package main import ( + "errors" "fmt" + "io/fs" "os" "path/filepath" @@ -27,19 +29,27 @@ import ( checkpointapi "sigs.k8s.io/dra-example-driver/internal/api/checkpoint" ) +// readCheckpoint returns the Checkpoint at the given path in the format +// expected by the given decoder. If the path doesn't exist, returns an empty +// Checkpoint and no error. func readCheckpoint(path string, decoder runtime.Decoder) (*checkpointapi.Checkpoint, error) { data, err := os.ReadFile(path) - if err != nil { + if err != nil && !errors.Is(err, fs.ErrNotExist) { return nil, err } checkpoint := new(checkpointapi.Checkpoint) - _, _, err = decoder.Decode(data, ptr.To(checkpointapi.SchemeGroupVersion.WithKind("Checkpoint")), checkpoint) - if err != nil { - return nil, fmt.Errorf("unmarshal JSON from %s: %w", path, err) + if data != nil { + _, _, err = decoder.Decode(data, ptr.To(checkpointapi.SchemeGroupVersion.WithKind("Checkpoint")), checkpoint) + if err != nil { + return nil, fmt.Errorf("unmarshal JSON from %s: %w", path, err) + } } return checkpoint, nil } +// writeCheckpoint writes checkpoint to the file at path in +// the format prescribed by encoder. The file is overwritten if it already +// exists and is created if it does not already exist. func writeCheckpoint(path string, encoder runtime.Encoder, checkpoint *checkpointapi.Checkpoint) (err error) { dir := filepath.Dir(path) tmp, err := os.CreateTemp(dir, "tmp-checkpoint-*") diff --git a/cmd/dra-example-kubeletplugin/checkpoint_test.go b/cmd/dra-example-kubeletplugin/checkpoint_test.go index c33c6ec3..816db44d 100644 --- a/cmd/dra-example-kubeletplugin/checkpoint_test.go +++ b/cmd/dra-example-kubeletplugin/checkpoint_test.go @@ -17,7 +17,6 @@ package main import ( - "io/fs" "path/filepath" "testing" @@ -56,8 +55,8 @@ func TestReadWriteCheckpointRoundtrip(t *testing.T) { } checkpoint, err := readCheckpoint(path, decoder) - assert.Nil(t, checkpoint) - assert.ErrorIs(t, err, fs.ErrNotExist) + assert.NoError(t, err) + assert.Equal(t, new(checkpointapi.Checkpoint), checkpoint) checkpoint = test.checkpoint err = writeCheckpoint(path, encoder, checkpoint) diff --git a/cmd/dra-example-kubeletplugin/state.go b/cmd/dra-example-kubeletplugin/state.go index 1c9ec5d0..f25c0e33 100644 --- a/cmd/dra-example-kubeletplugin/state.go +++ b/cmd/dra-example-kubeletplugin/state.go @@ -17,9 +17,7 @@ package main import ( - "errors" "fmt" - "io/fs" "path/filepath" "slices" "sync" @@ -131,18 +129,6 @@ func NewDeviceState(config *Config) (*DeviceState, error) { checkpointEncoder: checkpointEncoder, } - _, err = readCheckpoint(state.checkpointPath, state.checkpointDecoder) - if err != nil && !errors.Is(err, fs.ErrNotExist) { - return nil, fmt.Errorf("failed to read checkpoint: %w", err) - } - if err == nil { - return state, nil - } - - if err := writeCheckpoint(state.checkpointPath, state.checkpointEncoder, new(checkpointapi.Checkpoint)); err != nil { - return nil, fmt.Errorf("unable to sync to checkpoint: %v", err) - } - return state, nil } From 4ee44afa6a6467d51804107086d89137cbf8f4c2 Mon Sep 17 00:00:00 2001 From: Jon Huhn Date: Tue, 21 Apr 2026 14:57:02 -0500 Subject: [PATCH 09/12] Use install package to create checkpoint scheme --- cmd/dra-example-kubeletplugin/state.go | 10 ++-------- internal/api/checkpoint/install/install.go | 12 ++++++------ 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/cmd/dra-example-kubeletplugin/state.go b/cmd/dra-example-kubeletplugin/state.go index f25c0e33..4d933012 100644 --- a/cmd/dra-example-kubeletplugin/state.go +++ b/cmd/dra-example-kubeletplugin/state.go @@ -32,6 +32,7 @@ import ( cdiapi "tags.cncf.io/container-device-interface/pkg/cdi" checkpointapi "sigs.k8s.io/dra-example-driver/internal/api/checkpoint" + checkpointinstall "sigs.k8s.io/dra-example-driver/internal/api/checkpoint/install" checkpointv1alpha1 "sigs.k8s.io/dra-example-driver/internal/api/checkpoint/v1alpha1" "sigs.k8s.io/dra-example-driver/internal/profiles" ) @@ -330,14 +331,7 @@ func (s *DeviceState) checkAdminAccess(claim *resourceapi.ResourceClaim) bool { } func checkpointSerializer() (runtime.Decoder, runtime.Encoder, error) { - checkpointScheme := runtime.NewScheme() - sb := runtime.NewSchemeBuilder( - checkpointapi.AddToScheme, - checkpointv1alpha1.AddToScheme, - ) - if err := sb.AddToScheme(checkpointScheme); err != nil { - return nil, nil, fmt.Errorf("create checkpoint scheme: %w", err) - } + checkpointScheme := checkpointinstall.NewScheme() checkpointJSON := json.NewSerializerWithOptions( json.DefaultMetaFactory, checkpointScheme, diff --git a/internal/api/checkpoint/install/install.go b/internal/api/checkpoint/install/install.go index 2ea1dc62..333e92a7 100644 --- a/internal/api/checkpoint/install/install.go +++ b/internal/api/checkpoint/install/install.go @@ -23,19 +23,19 @@ import ( "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" - "sigs.k8s.io/dra-example-driver/internal/api/checkpoint" + checkpointapi "sigs.k8s.io/dra-example-driver/internal/api/checkpoint" "sigs.k8s.io/dra-example-driver/internal/api/checkpoint/v1alpha1" ) -// Install registers the internal and v1alpha1 metadata types with the given scheme. +// Install registers the internal and v1alpha1 checkpoint types with the given scheme. func Install(scheme *runtime.Scheme) { - utilruntime.Must(checkpoint.AddToScheme(scheme)) + utilruntime.Must(checkpointapi.AddToScheme(scheme)) utilruntime.Must(v1alpha1.AddToScheme(scheme)) } -// NewScheme returns a new runtime.Scheme with all metadata versions registered. -// The returned scheme can decode any supported metadata version into the -// internal metadata.DeviceMetadata type. +// NewScheme returns a new runtime.Scheme with all checkpoint versions registered. +// The returned scheme can decode any supported checkpoint version into the +// internal [checkpointapi.Checkpoint] type. func NewScheme() *runtime.Scheme { scheme := runtime.NewScheme() Install(scheme) From 5c68cb5f0c4d0c1b1068581f59e93fd2c1b830a5 Mon Sep 17 00:00:00 2001 From: Jon Huhn Date: Tue, 21 Apr 2026 15:47:49 -0500 Subject: [PATCH 10/12] Describe how to update the Checkpoint API --- internal/api/checkpoint/doc.go | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/internal/api/checkpoint/doc.go b/internal/api/checkpoint/doc.go index 474da329..7caa3628 100644 --- a/internal/api/checkpoint/doc.go +++ b/internal/api/checkpoint/doc.go @@ -19,5 +19,27 @@ limitations under the License. // Package checkpoint contains internal (unversioned) types for checkpoints. // These types are the canonical in-memory representation that the driver // programs against. Versioned types (e.g. v1alpha1) are converted to/from these -// internal types via the scheme. +// internal types via the scheme. All versions are expected to follow the +// Kubernetes [API Conventions]. +// +// # Changing the API +// +// As the driver evolves, so will the information included in checkpoints. +// Changes to the checkpoint API are made like other Kubernetes [API changes]. +// +// Compatible changes may be made to existing API versions. When incompatible +// changes are required, then a new API version must be defined. The internal +// [Checkpoint] type must also be updated such that it can be converted to and +// from the new API version. +// +// The driver should be able to read API versions written by older versions of +// the driver to facilitate upgrades of the driver. The driver writes an API +// version which fulfills the needs of its allocated devices. If the set of +// allocated devices requires the latest checkpoint API version, then that +// version must be written and the driver cannot be downgraded until after the +// devices have been unprepared and the driver can write the checkpoint in an +// older API version. +// +// [API Conventions]: https://github.com/kubernetes/community/blob/047598ce8b0932b9be921471dd301b6a82db210f/contributors/devel/sig-architecture/api-conventions.md +// [API changes]: https://github.com/kubernetes/community/blob/047598ce8b0932b9be921471dd301b6a82db210f/contributors/devel/sig-architecture/api_changes.md package checkpoint From d10295600503acd4669a896c2974b8d55bff90ae Mon Sep 17 00:00:00 2001 From: Jon Huhn Date: Wed, 22 Apr 2026 20:59:56 -0500 Subject: [PATCH 11/12] Justify strict/non-strict decoding --- cmd/dra-example-kubeletplugin/state.go | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/cmd/dra-example-kubeletplugin/state.go b/cmd/dra-example-kubeletplugin/state.go index 4d933012..8a7257fd 100644 --- a/cmd/dra-example-kubeletplugin/state.go +++ b/cmd/dra-example-kubeletplugin/state.go @@ -97,12 +97,18 @@ func NewDeviceState(config *Config) (*DeviceState, error) { } // Set up a json serializer to decode our types. - decoder := json.NewSerializerWithOptions( + configDecoder := json.NewSerializerWithOptions( json.DefaultMetaFactory, configScheme, configScheme, json.SerializerOptions{ - Pretty: true, Strict: true, + Pretty: true, + // Config objects are defined by users in ResourceClaims. Strict + // decoding helps prevent mistakes. + // + // Note: this flag only produces errors when decoding objects that + // define duplicate keys. Unknown fields are still silently dropped. + Strict: true, }, ) @@ -123,7 +129,7 @@ func NewDeviceState(config *Config) (*DeviceState, error) { cdi: cdi, driverResources: driverResources, allocatable: allocatable, - configDecoder: decoder, + configDecoder: configDecoder, configHandler: configHandler, checkpointPath: filepath.Join(config.DriverPluginPath(), DriverPluginCheckpointFile), checkpointDecoder: checkpointDecoder, @@ -338,7 +344,10 @@ func checkpointSerializer() (runtime.Decoder, runtime.Encoder, error) { checkpointScheme, json.SerializerOptions{ Pretty: true, - Strict: true, + // Checkpoints are meant to be read and written only by the driver, + // so there is minimal risk that strict decoding will identify any + // mistakes. Performance is the better trade-off. + Strict: false, }, ) checkpointCodecFactory := serializer.NewCodecFactory(checkpointScheme) From ff58c142ac2c7a8fd92f13409b98b3ea9fb1ae95 Mon Sep 17 00:00:00 2001 From: Jon Huhn Date: Wed, 22 Apr 2026 22:58:06 -0500 Subject: [PATCH 12/12] Rename Checkpoint v1alpha1 -> v1 --- cmd/dra-example-kubeletplugin/state.go | 2 +- common.mk | 2 +- internal/api/checkpoint/install/install.go | 6 +-- .../api/checkpoint/{v1alpha1 => v1}/doc.go | 4 +- .../checkpoint/{v1alpha1 => v1}/register.go | 6 +-- .../api/checkpoint/{v1alpha1 => v1}/types.go | 2 +- .../zz_generated.conversion.go | 42 +++++++++---------- .../{v1alpha1 => v1}/zz_generated.deepcopy.go | 2 +- 8 files changed, 33 insertions(+), 33 deletions(-) rename internal/api/checkpoint/{v1alpha1 => v1}/doc.go (88%) rename internal/api/checkpoint/{v1alpha1 => v1}/register.go (93%) rename internal/api/checkpoint/{v1alpha1 => v1}/types.go (98%) rename internal/api/checkpoint/{v1alpha1 => v1}/zz_generated.conversion.go (51%) rename internal/api/checkpoint/{v1alpha1 => v1}/zz_generated.deepcopy.go (99%) diff --git a/cmd/dra-example-kubeletplugin/state.go b/cmd/dra-example-kubeletplugin/state.go index 8a7257fd..37f4365b 100644 --- a/cmd/dra-example-kubeletplugin/state.go +++ b/cmd/dra-example-kubeletplugin/state.go @@ -33,7 +33,7 @@ import ( checkpointapi "sigs.k8s.io/dra-example-driver/internal/api/checkpoint" checkpointinstall "sigs.k8s.io/dra-example-driver/internal/api/checkpoint/install" - checkpointv1alpha1 "sigs.k8s.io/dra-example-driver/internal/api/checkpoint/v1alpha1" + checkpointv1alpha1 "sigs.k8s.io/dra-example-driver/internal/api/checkpoint/v1" "sigs.k8s.io/dra-example-driver/internal/profiles" ) diff --git a/common.mk b/common.mk index c04d644f..b603b1c5 100644 --- a/common.mk +++ b/common.mk @@ -21,7 +21,7 @@ VERSION ?= vVERSION := v$(VERSION:v%=%) VENDOR := example.com -APIS := $(CURDIR)/api/$(VENDOR)/resource/gpu/v1alpha1 $(CURDIR)/internal/api/checkpoint $(CURDIR)/internal/api/checkpoint/v1alpha1 +APIS := $(CURDIR)/api/$(VENDOR)/resource/gpu/v1alpha1 $(CURDIR)/internal/api/checkpoint $(CURDIR)/internal/api/checkpoint/v1 PLURAL_EXCEPTIONS = DeviceClassParameters:DeviceClassParameters PLURAL_EXCEPTIONS += GpuClaimParameters:GpuClaimParameters diff --git a/internal/api/checkpoint/install/install.go b/internal/api/checkpoint/install/install.go index 333e92a7..b9560931 100644 --- a/internal/api/checkpoint/install/install.go +++ b/internal/api/checkpoint/install/install.go @@ -24,13 +24,13 @@ import ( utilruntime "k8s.io/apimachinery/pkg/util/runtime" checkpointapi "sigs.k8s.io/dra-example-driver/internal/api/checkpoint" - "sigs.k8s.io/dra-example-driver/internal/api/checkpoint/v1alpha1" + v1 "sigs.k8s.io/dra-example-driver/internal/api/checkpoint/v1" ) -// Install registers the internal and v1alpha1 checkpoint types with the given scheme. +// Install registers the internal and v1 checkpoint types with the given scheme. func Install(scheme *runtime.Scheme) { utilruntime.Must(checkpointapi.AddToScheme(scheme)) - utilruntime.Must(v1alpha1.AddToScheme(scheme)) + utilruntime.Must(v1.AddToScheme(scheme)) } // NewScheme returns a new runtime.Scheme with all checkpoint versions registered. diff --git a/internal/api/checkpoint/v1alpha1/doc.go b/internal/api/checkpoint/v1/doc.go similarity index 88% rename from internal/api/checkpoint/v1alpha1/doc.go rename to internal/api/checkpoint/v1/doc.go index 753f5f18..a2082ddb 100644 --- a/internal/api/checkpoint/v1alpha1/doc.go +++ b/internal/api/checkpoint/v1/doc.go @@ -17,6 +17,6 @@ limitations under the License. // +k8s:deepcopy-gen=package // +k8s:conversion-gen=sigs.k8s.io/dra-example-driver/internal/api/checkpoint -// Package v1alpha1 contains the v1alpha1 serialization format for checkpoints. +// Package v1 contains the v1 serialization format for checkpoints. // These types include JSON tags and are used for reading/writing files on disk. -package v1alpha1 +package v1 diff --git a/internal/api/checkpoint/v1alpha1/register.go b/internal/api/checkpoint/v1/register.go similarity index 93% rename from internal/api/checkpoint/v1alpha1/register.go rename to internal/api/checkpoint/v1/register.go index 3d3898c1..dbeccd1c 100644 --- a/internal/api/checkpoint/v1alpha1/register.go +++ b/internal/api/checkpoint/v1/register.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package v1alpha1 +package v1 import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -25,8 +25,8 @@ import ( // GroupName is the group name for checkpoints. const GroupName = "checkpoint.internal.example.com" -// SchemeGroupVersion is the group version for v1alpha1 types. -var SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: "v1alpha1"} +// SchemeGroupVersion is the group version for v1 types. +var SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: "v1"} var ( // localSchemeBuilder is used to add types and conversion functions to the scheme. diff --git a/internal/api/checkpoint/v1alpha1/types.go b/internal/api/checkpoint/v1/types.go similarity index 98% rename from internal/api/checkpoint/v1alpha1/types.go rename to internal/api/checkpoint/v1/types.go index 72626774..7ab274fa 100644 --- a/internal/api/checkpoint/v1alpha1/types.go +++ b/internal/api/checkpoint/v1/types.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package v1alpha1 +package v1 import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" diff --git a/internal/api/checkpoint/v1alpha1/zz_generated.conversion.go b/internal/api/checkpoint/v1/zz_generated.conversion.go similarity index 51% rename from internal/api/checkpoint/v1alpha1/zz_generated.conversion.go rename to internal/api/checkpoint/v1/zz_generated.conversion.go index 8d5bc98d..b5792a06 100644 --- a/internal/api/checkpoint/v1alpha1/zz_generated.conversion.go +++ b/internal/api/checkpoint/v1/zz_generated.conversion.go @@ -19,7 +19,7 @@ // Code generated by conversion-gen. DO NOT EDIT. -package v1alpha1 +package v1 import ( unsafe "unsafe" @@ -38,64 +38,64 @@ func init() { // Public to allow building arbitrary schemes. func RegisterConversions(s *runtime.Scheme) error { if err := s.AddGeneratedConversionFunc((*Checkpoint)(nil), (*checkpoint.Checkpoint)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_Checkpoint_To_checkpoint_Checkpoint(a.(*Checkpoint), b.(*checkpoint.Checkpoint), scope) + return Convert_v1_Checkpoint_To_checkpoint_Checkpoint(a.(*Checkpoint), b.(*checkpoint.Checkpoint), scope) }); err != nil { return err } if err := s.AddGeneratedConversionFunc((*checkpoint.Checkpoint)(nil), (*Checkpoint)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_checkpoint_Checkpoint_To_v1alpha1_Checkpoint(a.(*checkpoint.Checkpoint), b.(*Checkpoint), scope) + return Convert_checkpoint_Checkpoint_To_v1_Checkpoint(a.(*checkpoint.Checkpoint), b.(*Checkpoint), scope) }); err != nil { return err } if err := s.AddGeneratedConversionFunc((*PreparedClaim)(nil), (*checkpoint.PreparedClaim)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha1_PreparedClaim_To_checkpoint_PreparedClaim(a.(*PreparedClaim), b.(*checkpoint.PreparedClaim), scope) + return Convert_v1_PreparedClaim_To_checkpoint_PreparedClaim(a.(*PreparedClaim), b.(*checkpoint.PreparedClaim), scope) }); err != nil { return err } if err := s.AddGeneratedConversionFunc((*checkpoint.PreparedClaim)(nil), (*PreparedClaim)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_checkpoint_PreparedClaim_To_v1alpha1_PreparedClaim(a.(*checkpoint.PreparedClaim), b.(*PreparedClaim), scope) + return Convert_checkpoint_PreparedClaim_To_v1_PreparedClaim(a.(*checkpoint.PreparedClaim), b.(*PreparedClaim), scope) }); err != nil { return err } return nil } -func autoConvert_v1alpha1_Checkpoint_To_checkpoint_Checkpoint(in *Checkpoint, out *checkpoint.Checkpoint, s conversion.Scope) error { +func autoConvert_v1_Checkpoint_To_checkpoint_Checkpoint(in *Checkpoint, out *checkpoint.Checkpoint, s conversion.Scope) error { out.PreparedClaims = *(*[]checkpoint.PreparedClaim)(unsafe.Pointer(&in.PreparedClaims)) return nil } -// Convert_v1alpha1_Checkpoint_To_checkpoint_Checkpoint is an autogenerated conversion function. -func Convert_v1alpha1_Checkpoint_To_checkpoint_Checkpoint(in *Checkpoint, out *checkpoint.Checkpoint, s conversion.Scope) error { - return autoConvert_v1alpha1_Checkpoint_To_checkpoint_Checkpoint(in, out, s) +// Convert_v1_Checkpoint_To_checkpoint_Checkpoint is an autogenerated conversion function. +func Convert_v1_Checkpoint_To_checkpoint_Checkpoint(in *Checkpoint, out *checkpoint.Checkpoint, s conversion.Scope) error { + return autoConvert_v1_Checkpoint_To_checkpoint_Checkpoint(in, out, s) } -func autoConvert_checkpoint_Checkpoint_To_v1alpha1_Checkpoint(in *checkpoint.Checkpoint, out *Checkpoint, s conversion.Scope) error { +func autoConvert_checkpoint_Checkpoint_To_v1_Checkpoint(in *checkpoint.Checkpoint, out *Checkpoint, s conversion.Scope) error { out.PreparedClaims = *(*[]PreparedClaim)(unsafe.Pointer(&in.PreparedClaims)) return nil } -// Convert_checkpoint_Checkpoint_To_v1alpha1_Checkpoint is an autogenerated conversion function. -func Convert_checkpoint_Checkpoint_To_v1alpha1_Checkpoint(in *checkpoint.Checkpoint, out *Checkpoint, s conversion.Scope) error { - return autoConvert_checkpoint_Checkpoint_To_v1alpha1_Checkpoint(in, out, s) +// Convert_checkpoint_Checkpoint_To_v1_Checkpoint is an autogenerated conversion function. +func Convert_checkpoint_Checkpoint_To_v1_Checkpoint(in *checkpoint.Checkpoint, out *Checkpoint, s conversion.Scope) error { + return autoConvert_checkpoint_Checkpoint_To_v1_Checkpoint(in, out, s) } -func autoConvert_v1alpha1_PreparedClaim_To_checkpoint_PreparedClaim(in *PreparedClaim, out *checkpoint.PreparedClaim, s conversion.Scope) error { +func autoConvert_v1_PreparedClaim_To_checkpoint_PreparedClaim(in *PreparedClaim, out *checkpoint.PreparedClaim, s conversion.Scope) error { out.UID = types.UID(in.UID) return nil } -// Convert_v1alpha1_PreparedClaim_To_checkpoint_PreparedClaim is an autogenerated conversion function. -func Convert_v1alpha1_PreparedClaim_To_checkpoint_PreparedClaim(in *PreparedClaim, out *checkpoint.PreparedClaim, s conversion.Scope) error { - return autoConvert_v1alpha1_PreparedClaim_To_checkpoint_PreparedClaim(in, out, s) +// Convert_v1_PreparedClaim_To_checkpoint_PreparedClaim is an autogenerated conversion function. +func Convert_v1_PreparedClaim_To_checkpoint_PreparedClaim(in *PreparedClaim, out *checkpoint.PreparedClaim, s conversion.Scope) error { + return autoConvert_v1_PreparedClaim_To_checkpoint_PreparedClaim(in, out, s) } -func autoConvert_checkpoint_PreparedClaim_To_v1alpha1_PreparedClaim(in *checkpoint.PreparedClaim, out *PreparedClaim, s conversion.Scope) error { +func autoConvert_checkpoint_PreparedClaim_To_v1_PreparedClaim(in *checkpoint.PreparedClaim, out *PreparedClaim, s conversion.Scope) error { out.UID = types.UID(in.UID) return nil } -// Convert_checkpoint_PreparedClaim_To_v1alpha1_PreparedClaim is an autogenerated conversion function. -func Convert_checkpoint_PreparedClaim_To_v1alpha1_PreparedClaim(in *checkpoint.PreparedClaim, out *PreparedClaim, s conversion.Scope) error { - return autoConvert_checkpoint_PreparedClaim_To_v1alpha1_PreparedClaim(in, out, s) +// Convert_checkpoint_PreparedClaim_To_v1_PreparedClaim is an autogenerated conversion function. +func Convert_checkpoint_PreparedClaim_To_v1_PreparedClaim(in *checkpoint.PreparedClaim, out *PreparedClaim, s conversion.Scope) error { + return autoConvert_checkpoint_PreparedClaim_To_v1_PreparedClaim(in, out, s) } diff --git a/internal/api/checkpoint/v1alpha1/zz_generated.deepcopy.go b/internal/api/checkpoint/v1/zz_generated.deepcopy.go similarity index 99% rename from internal/api/checkpoint/v1alpha1/zz_generated.deepcopy.go rename to internal/api/checkpoint/v1/zz_generated.deepcopy.go index 4e1ee9ce..048e52dd 100644 --- a/internal/api/checkpoint/v1alpha1/zz_generated.deepcopy.go +++ b/internal/api/checkpoint/v1/zz_generated.deepcopy.go @@ -18,7 +18,7 @@ // Code generated by controller-gen. DO NOT EDIT. -package v1alpha1 +package v1 import ( "k8s.io/apimachinery/pkg/runtime"