@@ -18,7 +18,9 @@ package e2e
1818
1919import (
2020 "context"
21+ "fmt"
2122
23+ "github.com/google/uuid"
2224 "github.com/onsi/ginkgo/v2"
2325 . "github.com/onsi/gomega"
2426 v1 "k8s.io/api/core/v1"
@@ -189,5 +191,125 @@ var _ = ginkgo.Describe("nvmeof", func() {
189191 validateRBDImageCount (f , 0 , nvmeofPool )
190192 validateOmapCount (f , 0 , rbdType , nvmeofPool , volumesType )
191193 })
194+
195+ ginkgo .It ("Test GroupLock: Concurrent Create/Delete Pods Only" , func () {
196+ // This test validates the GroupLock implementation in the NVMeoF NodeServer
197+ // by creating and deleting multiple Pods (not PVCs) concurrently.
198+ //
199+ // Test flow:
200+ // 1. Create 3 PVCs sequentially and validate they're Bound
201+ // 2. Create 3 Pods concurrently using those PVCs (triggers NodeStage -> Group A lock)
202+ // 3. Wait for all Pods to be Running
203+ // 4. Delete all 3 Pods concurrently (triggers NodeUnstage -> Group B lock)
204+ // 5. Delete all 3 PVCs sequentially
205+ // 6. Verify no timeouts/deadlocks and all operations succeed
206+ //
207+ // This tests GroupLock in the NodeServer without involving ControllerServer operations.
208+ totalCount := 3
209+
210+ ginkgo .By ("Creating PVCs sequentially" )
211+ pvc , err := loadPVC (pvcPath )
212+ Expect (err ).ShouldNot (HaveOccurred ())
213+ pvc .Namespace = f .UniqueName
214+ pvc .Spec .StorageClassName = & nvmeofStorageClass
215+
216+ pvcBaseName := uuid .NewString ()
217+ for i := range totalCount {
218+ pvcName := fmt .Sprintf ("%s-%d" , pvcBaseName , i )
219+ pvcCopy := pvc .DeepCopy ()
220+ pvcCopy .Name = pvcName
221+
222+ framework .Logf ("Creating PVC %d/%d: %s" , i + 1 , totalCount , pvcName )
223+ err = createPVCAndvalidatePV (f .ClientSet , pvcCopy , deployTimeout )
224+ Expect (err ).ShouldNot (HaveOccurred ())
225+ }
226+
227+ ginkgo .By ("Validating backend RBD images were created" )
228+ validateRBDImageCount (f , totalCount , nvmeofPool )
229+ validateOmapCount (f , totalCount , rbdType , nvmeofPool , volumesType )
230+
231+ ginkgo .By ("Creating Pods concurrently using those PVCs" )
232+ createResult := createConcurrentPods (totalCount , pvcBaseName , 0 , appPath , f )
233+
234+ // Log any errors
235+ if createResult .HasErrors () {
236+ createResult .LogErrors ()
237+ }
238+
239+ // Verify all creations succeeded
240+ Expect (createResult .failed ).To (Equal (0 ),
241+ "Expected all %d Pod create operations to succeed, but %d failed" ,
242+ totalCount , createResult .failed )
243+
244+ ginkgo .By ("Waiting for all Pods to be Running" )
245+ for i := range totalCount {
246+ podName := fmt .Sprintf ("%s-%d" , createResult .uniqueName , i )
247+ err = waitForPodInRunningState (podName , f .UniqueName , f .ClientSet , deployTimeout , noError )
248+ Expect (err ).ShouldNot (HaveOccurred ())
249+ }
250+
251+ ginkgo .By ("Deleting Pods concurrently" )
252+ deleteResult := deleteConcurrentPods (createResult , f )
253+
254+ // Log any errors
255+ if deleteResult .HasErrors () {
256+ deleteResult .LogErrors ()
257+ }
258+
259+ // Verify all deletions succeeded
260+ Expect (deleteResult .failed ).To (Equal (0 ),
261+ "Expected all %d Pod delete operations to succeed, but %d failed" ,
262+ totalCount , deleteResult .failed )
263+
264+ ginkgo .By ("Deleting PVCs sequentially" )
265+ for i := range totalCount {
266+ pvcName := fmt .Sprintf ("%s-%d" , pvcBaseName , i )
267+ pvcCopy := pvc .DeepCopy ()
268+ pvcCopy .Name = pvcName
269+
270+ framework .Logf ("Deleting PVC %d/%d: %s" , i + 1 , totalCount , pvcName )
271+ err = deletePVCAndValidatePV (f .ClientSet , pvcCopy , deployTimeout )
272+ Expect (err ).ShouldNot (HaveOccurred ())
273+ }
274+
275+ ginkgo .By ("Validating all backend RBD images were deleted" )
276+ validateRBDImageCount (f , 0 , nvmeofPool )
277+ validateOmapCount (f , 0 , rbdType , nvmeofPool , volumesType )
278+
279+ framework .Logf ("GroupLock test passed: %d concurrent Pod creates and %d concurrent Pod deletes completed successfully" ,
280+ totalCount , totalCount )
281+ })
282+
283+ ginkgo .It ("Test GroupLock: Mixed Create/Delete Pods with Rapid Switching" , func () {
284+ // This test validates the GroupLock implementation under rapid switching
285+ // between Group A (NodeStage) and Group B (NodeUnstage) operations.
286+ //
287+ // Test flow:
288+ // 1. Create 15 PVCs sequentially
289+ // 2. Create 5 Pods using PVCs 0-4 (Group A)
290+ // 3. Concurrently: Create 5 Pods using PVCs 5-9 (Group A) + Delete previous 5 Pods (Group B)
291+ // 4. Concurrently: Create 5 Pods using PVCs 10-14 (Group A) + Delete previous 5 Pods (Group B)
292+ // 5. Delete final 5 Pods
293+ // 6. Delete all 15 PVCs sequentially
294+ //
295+ // This tests rapid GroupLock switching between Group A and B in the NodeServer only,
296+ // without involving ControllerServer operations.
297+ totalCount := 15
298+ batchSize := 5
299+
300+ ginkgo .By (fmt .Sprintf ("Running Pods-only mixed test: %d total PVCs, batches of %d Pods" ,
301+ totalCount , batchSize ))
302+
303+ err := mixedCreateDeletePodsOnly (totalCount , batchSize , pvcPath , appPath , nvmeofStorageClass , f )
304+ Expect (err ).ShouldNot (HaveOccurred (),
305+ "Mixed Pods-only operations should complete without errors" )
306+
307+ ginkgo .By ("Validating all backend RBD images were cleaned up" )
308+ validateRBDImageCount (f , 0 , nvmeofPool )
309+ validateOmapCount (f , 0 , rbdType , nvmeofPool , volumesType )
310+
311+ framework .Logf ("GroupLock Pods-only test passed: %d Pods created and deleted with rapid Group A/B switching" ,
312+ totalCount )
313+ })
192314 })
193315})
0 commit comments