Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 8b5cc66

Browse files
committedMar 11, 2025·
vsan stretch cluster fixes
1 parent eff5e67 commit 8b5cc66

4 files changed

+49
-27
lines changed
 

‎tests/e2e/util.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -6176,7 +6176,7 @@ func enableFullSyncTriggerFss(ctx context.Context, client clientset.Interface, n
61766176
for _, pod := range csipods.Items {
61776177
fpod.DeletePodOrFail(ctx, client, csiSystemNamespace, pod.Name)
61786178
}
6179-
err = fpod.WaitForPodsRunningReady(ctx, client, csiSystemNamespace, int(csipods.Size()),
6179+
err = fpod.WaitForPodsRunningReady(ctx, client, csiSystemNamespace, len(csipods.Items),
61806180
time.Duration(pollTimeout))
61816181
gomega.Expect(err).NotTo(gomega.HaveOccurred())
61826182
break

‎tests/e2e/vm_service_vsan_stretch_cluster.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,7 @@ var _ bool = ginkgo.Describe("[vsan-stretch-vmsvc] vm service with csi vol tests
467467
ch := make(chan *vmopv1.VirtualMachine)
468468
var wg sync.WaitGroup
469469
var lock sync.Mutex
470+
done := make(chan bool)
470471
ginkgo.By("Creating VM in parallel to site failure")
471472
wg.Add(2)
472473
go createVMServiceVmInParallel(ctx, vmopC, namespace, vmClass, pvclaimsList,
@@ -476,7 +477,7 @@ var _ bool = ginkgo.Describe("[vsan-stretch-vmsvc] vm service with csi vol tests
476477
vms = append(vms, v)
477478
}
478479
}()
479-
go siteFailureInParallel(ctx, true, &wg)
480+
go siteFailureInParallel(ctx, true, &wg, done)
480481
wg.Wait()
481482
close(ch)
482483

@@ -622,9 +623,10 @@ var _ bool = ginkgo.Describe("[vsan-stretch-vmsvc] vm service with csi vol tests
622623

623624
var wg sync.WaitGroup
624625
ginkgo.By("Deleting VM in parallel to secondary site failure")
626+
done := make(chan bool)
625627
wg.Add(2)
626628
go deleteVMServiceVmInParallel(ctx, vmopC, vms, namespace, &wg)
627-
go siteFailureInParallel(ctx, false, &wg)
629+
go siteFailureInParallel(ctx, false, &wg, done)
628630
wg.Wait()
629631

630632
defer func() {

‎tests/e2e/vsan_stretched_cluster.go

+40-22
Original file line numberDiff line numberDiff line change
@@ -484,7 +484,7 @@ var _ = ginkgo.Describe("[vsan-stretch-vanilla] vsan stretched cluster tests", f
484484
dep1ReplicaCount = 1
485485
dep2ReplicaCount = 1
486486
}
487-
sts1Replicas = 1
487+
sts1Replicas = 3
488488
sts2Replicas = 5
489489
statefulset1, deployment1, _ := createStsDeployment(ctx, client, namespace, sc, true,
490490
false, sts1Replicas, "web", dep1ReplicaCount, accessMode)
@@ -601,12 +601,12 @@ var _ = ginkgo.Describe("[vsan-stretch-vanilla] vsan stretched cluster tests", f
601601
ss2PodsBeforeScaleDown, sts2Replicas, false, true)
602602

603603
// Scaling up statefulset sts1
604-
sts1Replicas += 2
604+
sts1Replicas -= 2
605605
scaleUpStsAndVerifyPodMetadata(ctx, client, namespace, statefulset1,
606606
sts1Replicas, true, false)
607607

608608
// Scaling down statefulset sts2
609-
sts2Replicas -= 2
609+
sts2Replicas += 2
610610
scaleDownStsAndVerifyPodMetadata(ctx, client, namespace, statefulset2,
611611
ss2PodsBeforeScaleDown, sts2Replicas, true, false)
612612
}
@@ -767,9 +767,10 @@ var _ = ginkgo.Describe("[vsan-stretch-vanilla] vsan stretched cluster tests", f
767767

768768
ginkgo.By("Bring down the primary site while deleting pods")
769769
var wg sync.WaitGroup
770+
done := make(chan bool)
770771
wg.Add(2)
771772
go deletePodsInParallel(ctx, client, namespace, pods, &wg)
772-
go siteFailureInParallel(ctx, true, &wg)
773+
go siteFailureInParallel(ctx, true, &wg, done)
773774
wg.Wait()
774775

775776
defer func() {
@@ -862,14 +863,15 @@ var _ = ginkgo.Describe("[vsan-stretch-vanilla] vsan stretched cluster tests", f
862863
var wg sync.WaitGroup
863864
ch := make(chan *v1.PersistentVolumeClaim)
864865
lock := &sync.Mutex{}
866+
done := make(chan bool)
865867
wg.Add(2)
866868
go createPvcInParallel(ctx, client, namespace, diskSize, sc, ch, lock, &wg, volumeOpsScale)
867869
go func() {
868870
for v := range ch {
869871
pvclaims = append(pvclaims, v)
870872
}
871873
}()
872-
go siteFailureInParallel(ctx, true, &wg)
874+
go siteFailureInParallel(ctx, true, &wg, done)
873875
wg.Wait()
874876
close(ch)
875877

@@ -1203,9 +1205,10 @@ var _ = ginkgo.Describe("[vsan-stretch-vanilla] vsan stretched cluster tests", f
12031205

12041206
ginkgo.By("Bring down the primary site while deleting pvcs")
12051207
var wg sync.WaitGroup
1208+
done := make(chan bool)
12061209
wg.Add(2)
1207-
go deletePvcInParallel(ctx, client, pvclaims, namespace, &wg)
1208-
go siteFailureInParallel(ctx, true, &wg)
1210+
go deletePvcInParallel(ctx, client, pvclaims, namespace, &wg, done)
1211+
go siteFailureInParallel(ctx, true, &wg, done)
12091212
wg.Wait()
12101213

12111214
defer func() {
@@ -1240,6 +1243,9 @@ var _ = ginkgo.Describe("[vsan-stretch-vanilla] vsan stretched cluster tests", f
12401243
volumeHandle := pv.Spec.CSI.VolumeHandle
12411244
err := fpv.WaitForPersistentVolumeDeleted(ctx, client, pv.Name, poll,
12421245
pollTimeout)
1246+
eventList, _ := client.CoreV1().Events(namespace).List(ctx,
1247+
metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s", pv.Name)})
1248+
framework.Logf("events for PV: %v", eventList)
12431249
errMsg := "The object or item referred to could not be found"
12441250
if err != nil && checkForEventWithMessage(client, "", pv.Name, errMsg) {
12451251
framework.Logf("Persistent Volume %v still not deleted with err %v", pv.Name, errMsg)
@@ -1347,14 +1353,15 @@ var _ = ginkgo.Describe("[vsan-stretch-vanilla] vsan stretched cluster tests", f
13471353
var wg sync.WaitGroup
13481354
wg.Add(2)
13491355
ch := make(chan *v1.Pod)
1356+
done := make(chan bool)
13501357
lock := &sync.Mutex{}
13511358
go createPodsInParallel(client, namespace, pvclaims, ctx, lock, ch, &wg, volumeOpsScale)
13521359
go func() {
13531360
for v := range ch {
13541361
pods = append(pods, v)
13551362
}
13561363
}()
1357-
go siteFailureInParallel(ctx, true, &wg)
1364+
go siteFailureInParallel(ctx, true, &wg, done)
13581365
wg.Wait()
13591366
close(ch)
13601367

@@ -1501,12 +1508,13 @@ var _ = ginkgo.Describe("[vsan-stretch-vanilla] vsan stretched cluster tests", f
15011508

15021509
ginkgo.By("Bring down the primary site while adding labels to PVCs and PVs")
15031510
var wg sync.WaitGroup
1511+
done := make(chan bool)
15041512
labels := make(map[string]string)
15051513
labels[labelKey] = labelValue
15061514
wg.Add(3)
15071515
go updatePvcLabelsInParallel(ctx, client, namespace, labels, pvclaims, &wg)
15081516
go updatePvLabelsInParallel(ctx, client, namespace, labels, persistentvolumes, &wg)
1509-
go siteFailureInParallel(ctx, true, &wg)
1517+
go siteFailureInParallel(ctx, true, &wg, done)
15101518
wg.Wait()
15111519

15121520
if vanillaCluster {
@@ -1618,14 +1626,15 @@ var _ = ginkgo.Describe("[vsan-stretch-vanilla] vsan stretched cluster tests", f
16181626
var wg sync.WaitGroup
16191627
ch := make(chan *v1.PersistentVolumeClaim)
16201628
lock := &sync.Mutex{}
1629+
done := make(chan bool)
16211630
wg.Add(2)
16221631
go createPvcInParallel(ctx, client, namespace, diskSize, sc, ch, lock, &wg, volumeOpsScale)
16231632
go func() {
16241633
for v := range ch {
16251634
pvclaims = append(pvclaims, v)
16261635
}
16271636
}()
1628-
go siteFailureInParallel(ctx, false, &wg)
1637+
go siteFailureInParallel(ctx, false, &wg, done)
16291638
wg.Wait()
16301639
close(ch)
16311640

@@ -1736,9 +1745,10 @@ var _ = ginkgo.Describe("[vsan-stretch-vanilla] vsan stretched cluster tests", f
17361745

17371746
ginkgo.By("Bring down the secondary site while deleting pvcs")
17381747
var wg sync.WaitGroup
1748+
done := make(chan bool)
17391749
wg.Add(2)
1740-
go deletePvcInParallel(ctx, client, pvclaims, namespace, &wg)
1741-
go siteFailureInParallel(ctx, false, &wg)
1750+
go deletePvcInParallel(ctx, client, pvclaims, namespace, &wg, done)
1751+
go siteFailureInParallel(ctx, false, &wg, done)
17421752
wg.Wait()
17431753

17441754
defer func() {
@@ -1886,13 +1896,14 @@ var _ = ginkgo.Describe("[vsan-stretch-vanilla] vsan stretched cluster tests", f
18861896
wg.Add(2)
18871897
ch := make(chan *v1.Pod)
18881898
lock := &sync.Mutex{}
1899+
done := make(chan bool)
18891900
go createPodsInParallel(client, namespace, pvclaims, ctx, lock, ch, &wg, volumeOpsScale)
18901901
go func() {
18911902
for v := range ch {
18921903
pods = append(pods, v)
18931904
}
18941905
}()
1895-
go siteFailureInParallel(ctx, false, &wg)
1906+
go siteFailureInParallel(ctx, false, &wg, done)
18961907
wg.Wait()
18971908
close(ch)
18981909

@@ -2035,9 +2046,10 @@ var _ = ginkgo.Describe("[vsan-stretch-vanilla] vsan stretched cluster tests", f
20352046

20362047
ginkgo.By("Bring down the secondary site while deleting pods")
20372048
var wg sync.WaitGroup
2049+
done := make(chan bool)
20382050
wg.Add(2)
20392051
go deletePodsInParallel(ctx, client, namespace, pods, &wg)
2040-
go siteFailureInParallel(ctx, false, &wg)
2052+
go siteFailureInParallel(ctx, false, &wg, done)
20412053
wg.Wait()
20422054

20432055
defer func() {
@@ -2909,11 +2921,12 @@ var _ = ginkgo.Describe("[vsan-stretch-vanilla] vsan stretched cluster tests", f
29092921
var wg sync.WaitGroup
29102922
labels := make(map[string]string)
29112923
labels[labelKey] = labelValue
2924+
done := make(chan bool)
29122925

29132926
wg.Add(3)
29142927
go updatePvcLabelsInParallel(ctx, client, namespace, labels, pvclaims, &wg)
29152928
go updatePvLabelsInParallel(ctx, client, namespace, labels, persistentvolumes, &wg)
2916-
go siteFailureInParallel(ctx, false, &wg)
2929+
go siteFailureInParallel(ctx, false, &wg, done)
29172930
wg.Wait()
29182931

29192932
ginkgo.By("Wait for k8s cluster to be healthy")
@@ -3461,18 +3474,19 @@ var _ = ginkgo.Describe("[vsan-stretch-vanilla] vsan stretched cluster tests", f
34613474
var wg sync.WaitGroup
34623475
ch := make(chan *v1.PersistentVolumeClaim)
34633476
lock := &sync.Mutex{}
3477+
done := make(chan bool)
34643478
wg.Add(5)
34653479
go scaleStsReplicaInParallel(ctx, client, stsList, prefix1, replicas1, &wg)
34663480
go scaleStsReplicaInParallel(ctx, client, stsList, prefix2, replicas2, &wg)
3467-
go deletePvcInParallel(ctx, client, pvclaims, namespace, &wg)
3481+
go deletePvcInParallel(ctx, client, pvclaims, namespace, &wg, done)
34683482
go createPvcInParallel(ctx, client, namespace, diskSize, sc, ch, lock, &wg, operationStormScale)
34693483
go func() {
34703484
for v := range ch {
34713485
pvcList = append(pvcList, v)
34723486
}
34733487
}()
34743488

3475-
go siteFailureInParallel(ctx, false, &wg)
3489+
go siteFailureInParallel(ctx, false, &wg, done)
34763490
wg.Wait()
34773491
close(ch)
34783492

@@ -3776,10 +3790,11 @@ var _ = ginkgo.Describe("[vsan-stretch-vanilla] vsan stretched cluster tests", f
37763790

37773791
ginkgo.By("Bring down the secondary site while deleting pv")
37783792
var wg sync.WaitGroup
3793+
done := make(chan bool)
37793794
wg.Add(3)
3780-
go deletePvcInParallel(ctx, client, pvcs, namespace, &wg)
3795+
go deletePvcInParallel(ctx, client, pvcs, namespace, &wg, done)
37813796
go deletePvInParallel(ctx, client, pvs, &wg)
3782-
go siteFailureInParallel(ctx, false, &wg)
3797+
go siteFailureInParallel(ctx, false, &wg, done)
37833798
wg.Wait()
37843799

37853800
defer func() {
@@ -3962,6 +3977,7 @@ var _ = ginkgo.Describe("[vsan-stretch-vanilla] vsan stretched cluster tests", f
39623977
var wg sync.WaitGroup
39633978
ch := make(chan *v1.PersistentVolumeClaim)
39643979
lock := &sync.Mutex{}
3980+
done := make(chan bool)
39653981
wg.Add(2)
39663982
if vanillaCluster {
39673983
go createStaticPvAndPvcInParallel(client, ctx, fcdIDs, ch, namespace, &wg, volumeOpsScale)
@@ -3974,7 +3990,7 @@ var _ = ginkgo.Describe("[vsan-stretch-vanilla] vsan stretched cluster tests", f
39743990
pvclaims = append(pvclaims, v)
39753991
}
39763992
}()
3977-
go siteFailureInParallel(ctx, false, &wg)
3993+
go siteFailureInParallel(ctx, false, &wg, done)
39783994
wg.Wait()
39793995
close(ch)
39803996

@@ -4357,6 +4373,7 @@ var _ = ginkgo.Describe("[vsan-stretch-vanilla] vsan stretched cluster tests", f
43574373

43584374
csipods, err := client.CoreV1().Pods(csiSystemNamespace).List(ctx, metav1.ListOptions{})
43594375
gomega.Expect(err).NotTo(gomega.HaveOccurred())
4376+
done := make(chan bool)
43604377
if vanillaCluster {
43614378
// Get restConfig.
43624379
restConfig := getRestConfigClient()
@@ -4368,7 +4385,7 @@ var _ = ginkgo.Describe("[vsan-stretch-vanilla] vsan stretched cluster tests", f
43684385

43694386
wg.Add(2)
43704387
go triggerFullSyncInParallel(ctx, cnsOperatorClient, &wg)
4371-
go siteFailureInParallel(ctx, true, &wg)
4388+
go siteFailureInParallel(ctx, true, &wg, done)
43724389
wg.Wait()
43734390
} else {
43744391
framework.Logf("Sleeping full-sync interval time")
@@ -4732,9 +4749,10 @@ var _ = ginkgo.Describe("[vsan-stretch-vanilla] vsan stretched cluster tests", f
47324749
enableFullSyncTriggerFss(ctx, client, csiSystemNamespace, fullSyncFss)
47334750
ginkgo.By("Bring down the secondary site while full sync is going on")
47344751
var wg sync.WaitGroup
4752+
done := make(chan bool)
47354753
wg.Add(2)
47364754
go triggerFullSyncInParallel(ctx, cnsOperatorClient, &wg)
4737-
go siteFailureInParallel(ctx, false, &wg)
4755+
go siteFailureInParallel(ctx, false, &wg, done)
47384756
wg.Wait()
47394757

47404758
defer func() {

‎tests/e2e/vsan_stretched_cluster_utils.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,10 @@ func initialiseFdsVar(ctx context.Context) {
9595
}
9696

9797
// siteFailureInParallel causes site Failure in multiple hosts of the site in parallel
98-
func siteFailureInParallel(ctx context.Context, primarySite bool, wg *sync.WaitGroup) {
98+
func siteFailureInParallel(ctx context.Context, primarySite bool, wg *sync.WaitGroup, done chan bool) {
9999
defer ginkgo.GinkgoRecover()
100100
defer wg.Done()
101+
<-done
101102
siteFailover(ctx, primarySite)
102103
}
103104

@@ -428,13 +429,14 @@ func toggleNetworkFailureParallel(hosts []string, causeNetworkFailure bool) {
428429

429430
// deletePVCInParallel deletes PVC in a given namespace in parallel
430431
func deletePvcInParallel(ctx context.Context, client clientset.Interface, pvclaims []*v1.PersistentVolumeClaim,
431-
namespace string, wg *sync.WaitGroup) {
432+
namespace string, wg *sync.WaitGroup, done chan bool) {
432433
defer ginkgo.GinkgoRecover()
433434
defer wg.Done()
434435
for _, pvclaim := range pvclaims {
435436
err := fpv.DeletePersistentVolumeClaim(ctx, client, pvclaim.Name, namespace)
436437
gomega.Expect(err).NotTo(gomega.HaveOccurred())
437438
}
439+
close(done)
438440
}
439441

440442
// createPodsInParallel creates Pods in a given namespace in parallel

0 commit comments

Comments
 (0)
Please sign in to comment.