@@ -22,7 +22,6 @@ import (
22
22
"io"
23
23
"net/http"
24
24
"sort"
25
- "strconv"
26
25
"strings"
27
26
"testing"
28
27
"time"
@@ -358,7 +357,10 @@ func TestDeckTenantIDs(t *testing.T) {
358
357
func TestRerun (t * testing.T ) {
359
358
t .Parallel ()
360
359
const rerunJobConfigFile = "rerun-test.yaml"
360
+
361
361
jobName := "rerun-test-job-" + RandomString (t )
362
+ prowJobSelector := labels .SelectorFromSet (map [string ]string {kube .ProwJobAnnotation : jobName })
363
+
362
364
var rerunJobConfigTemplate = `periodics:
363
365
- interval: 1h
364
366
name: %s
@@ -379,137 +381,137 @@ func TestRerun(t *testing.T) {
379
381
t .Fatalf ("Failed creating clients for cluster %q: %v" , clusterContext , err )
380
382
}
381
383
382
- rerunJobConfig := fmt .Sprintf (rerunJobConfigTemplate , jobName , "foo" )
383
- if err := updateJobConfig (context .Background (), kubeClient , rerunJobConfigFile , []byte (rerunJobConfig )); err != nil {
384
- t .Fatalf ("Failed update job config: %v" , err )
385
- }
384
+ ctx := context .Background ()
386
385
387
- // Now we are waiting on Horologium to create the first prow job so that we
388
- // can rerun from.
389
- // Horologium itself is pretty good at handling the configmap update, but
390
- // not kubelet, according to
391
- // https://github.com/kubernetes/kubernetes/issues/30189 kubelet syncs
392
- // configmap updates on existing pods every minute, which is a long wait.
393
- // The proposed fix in the issue was updating the deployment, which imo
394
- // should be better handled by just refreshing pods.
395
- // So here comes forcing restart of horologium pods.
396
- if err := refreshProwPods (kubeClient , context .Background (), "horologium" ); err != nil {
397
- t .Fatalf ("Failed refreshing horologium pods: %v" , err )
398
- }
399
- // Same with deck
400
- if err := refreshProwPods (kubeClient , context .Background (), "deck" ); err != nil {
401
- t .Fatalf ("Failed refreshing deck pods: %v" , err )
386
+ redeployJobConfig := func (jobConfig string ) {
387
+ if err := updateJobConfig (ctx , kubeClient , rerunJobConfigFile , []byte (jobConfig )); err != nil {
388
+ t .Fatalf ("Failed update job config: %v" , err )
389
+ }
390
+
391
+ // Now we are waiting on Horologium to create the first prow job so that we
392
+ // can rerun from.
393
+ // Horologium itself is pretty good at handling the configmap update, but
394
+ // not kubelet, according to
395
+ // https://github.com/kubernetes/kubernetes/issues/30189 kubelet syncs
396
+ // configmap updates on existing pods every minute, which is a long wait.
397
+ // It's quicker to rollout the affected Deployments.
398
+ if err := rolloutDeployment (t , ctx , kubeClient , "horologium" ); err != nil {
399
+ t .Fatalf ("Failed rolling out Horologium: %v" , err )
400
+ }
401
+ // Same with deck
402
+ if err := rolloutDeployment (t , ctx , kubeClient , "deck" ); err != nil {
403
+ t .Fatalf ("Failed rolling out Deck: %v" , err )
404
+ }
402
405
}
403
406
407
+ // Deploy the initial template with "foo" as the label.
408
+ redeployJobConfig (fmt .Sprintf (rerunJobConfigTemplate , jobName , "foo" ))
409
+
404
410
t .Cleanup (func () {
405
- if err := updateJobConfig (context . Background () , kubeClient , rerunJobConfigFile , []byte {}); err != nil {
411
+ if err := updateJobConfig (ctx , kubeClient , rerunJobConfigFile , []byte {}); err != nil {
406
412
t .Logf ("ERROR CLEANUP: %v" , err )
407
413
}
408
- labels , err := labels .Parse ("prow.k8s.io/job = " + jobName )
409
- if err != nil {
410
- t .Logf ("Skip cleaning up jobs, as failed parsing label: %v" , err )
411
- return
414
+ // Prevent horologium from immediately creating the "missing" ProwJob after the
415
+ // DeleteAll call further down, because horologium still runs with the last
416
+ // non-empty configuration (foo=bar).
417
+ if err := rolloutDeployment (t , ctx , kubeClient , "horologium" ); err != nil {
418
+ t .Logf ("Failed rolling out Horologium: %v" , err )
412
419
}
413
- if err := kubeClient .DeleteAllOf (context .Background (), & prowjobv1.ProwJob {}, & ctrlruntimeclient.DeleteAllOfOptions {
414
- ListOptions : ctrlruntimeclient.ListOptions {LabelSelector : labels },
420
+ if err := kubeClient .DeleteAllOf (ctx , & prowjobv1.ProwJob {}, & ctrlruntimeclient.DeleteAllOfOptions {
421
+ ListOptions : ctrlruntimeclient.ListOptions {
422
+ Namespace : defaultNamespace ,
423
+ LabelSelector : prowJobSelector ,
424
+ },
415
425
}); err != nil {
416
426
t .Logf ("ERROR CLEANUP: %v" , err )
417
427
}
418
428
})
419
- ctx := context . Background ()
429
+
420
430
getLatestJob := func (t * testing.T , jobName string , lastRun * v1.Time ) * prowjobv1.ProwJob {
421
431
var res * prowjobv1.ProwJob
422
432
if err := wait .PollUntilContextTimeout (ctx , time .Second , 90 * time .Second , true , func (ctx context.Context ) (bool , error ) {
423
433
pjs := & prowjobv1.ProwJobList {}
424
- err = kubeClient .List (ctx , pjs , & ctrlruntimeclient.ListOptions {
425
- LabelSelector : labels . SelectorFromSet ( map [ string ] string { kube . ProwJobAnnotation : jobName }) ,
434
+ err : = kubeClient .List (ctx , pjs , & ctrlruntimeclient.ListOptions {
435
+ LabelSelector : prowJobSelector ,
426
436
Namespace : defaultNamespace ,
427
437
})
428
438
if err != nil {
429
439
return false , fmt .Errorf ("failed listing prow jobs: %w" , err )
430
440
}
441
+
431
442
sort .Slice (pjs .Items , func (i , j int ) bool {
432
- revi , _ := strconv . Atoi ( pjs .Items [i ].ResourceVersion )
433
- revj , _ := strconv . Atoi ( pjs .Items [j ].ResourceVersion )
434
- return revi > revj
443
+ createdi := pjs .Items [i ].CreationTimestamp
444
+ createdj := pjs .Items [j ].CreationTimestamp
445
+ return createdj . Before ( & createdi )
435
446
})
447
+
436
448
if len (pjs .Items ) > 0 {
437
449
if lastRun != nil && pjs .Items [0 ].CreationTimestamp .Before (lastRun ) {
438
450
return false , nil
439
451
}
440
452
res = & pjs .Items [0 ]
441
453
}
454
+
442
455
return res != nil , nil
443
456
}); err != nil {
444
457
t .Fatalf ("Failed waiting for job %q: %v" , jobName , err )
445
458
}
446
459
return res
447
460
}
448
- rerun := func (t * testing.T , jobName string , mode string ) {
449
- req , err := http .NewRequest ("POST" , fmt .Sprintf ("http://localhost/rerun?mode=%v&prowjob=%v" , mode , jobName ), nil )
450
- if err != nil {
451
- t .Fatalf ("Could not generate a request %v" , err )
452
- }
453
461
454
- // Deck might not have been informed about the job config update, retry
455
- // for this case.
456
- waitDur := time .Second * 5
457
- var lastErr error
458
- for i := 0 ; i < 3 ; i ++ {
459
- lastErr = nil
460
- res , err := http .DefaultClient .Do (req )
461
- if err != nil {
462
- lastErr = fmt .Errorf ("could not make post request %v" , err )
463
- res .Body .Close ()
464
- break
465
- }
466
- // The only retry condition is status not ok
467
- if res .StatusCode != http .StatusOK {
468
- lastErr = fmt .Errorf ("status not expected: %d" , res .StatusCode )
469
- res .Body .Close ()
470
- waitDur *= 2
471
- time .Sleep (waitDur )
472
- continue
473
- }
474
- body , err := io .ReadAll (res .Body )
475
- if err != nil {
476
- lastErr = fmt .Errorf ("could not read body response %v" , err )
477
- res .Body .Close ()
478
- break
479
- }
480
- t .Logf ("Response body: %s" , string (body ))
481
- break
482
- }
483
- if lastErr != nil {
484
- t .Fatalf ("Failed trigger rerun: %v" , lastErr )
485
- }
486
- }
487
- jobToRerun := getLatestJob (t , jobName , nil )
488
- rerunJobConfig = fmt .Sprintf (rerunJobConfigTemplate , jobName , "bar" )
489
- if err := updateJobConfig (context .Background (), kubeClient , rerunJobConfigFile , []byte (rerunJobConfig )); err != nil {
490
- t .Fatalf ("Failed update job config: %v" , err )
491
- }
492
- var passed bool
493
- // It may take some time for the new ProwJob to show up, so we will
494
- // check every 30s interval three times for it to appear
495
- latestRun := jobToRerun
496
- for i := 0 ; i < 3 ; i ++ {
497
- time .Sleep (30 * time .Second )
498
- rerun (t , jobToRerun .Name , "latest" )
499
- if latestRun = getLatestJob (t , jobName , & latestRun .CreationTimestamp ); latestRun .Labels ["foo" ] == "bar" {
500
- passed = true
501
- break
502
- }
503
- }
504
- if ! passed {
505
- t .Fatal ("Expected updated job." )
462
+ // Wait for the first job to be created by horologium.
463
+ initialJob := getLatestJob (t , jobName , nil )
464
+
465
+ // Update the job configuration with a new label.
466
+ redeployJobConfig (fmt .Sprintf (rerunJobConfigTemplate , jobName , "bar" ))
467
+
468
+ // Rerun the job using the latest config.
469
+ rerunJob (t , ctx , initialJob .Name , "latest" )
470
+
471
+ // Wait until the desired ProwJob shows up.
472
+ latestJob := getLatestJob (t , jobName , & initialJob .CreationTimestamp )
473
+ if latestJob .Labels ["foo" ] != "bar" {
474
+ t .Fatalf ("Failed waiting for ProwJob %q using latest config with foo=bar." , jobName )
506
475
}
507
476
477
+ // Prevent Deck from being too fast and recreating the new job in the same second
478
+ // as the previous one.
479
+ time .Sleep (1 * time .Second )
480
+
508
481
// Deck scheduled job from latest configuration, rerun with "original"
509
482
// should still go with original configuration.
510
- rerun (t , jobToRerun .Name , "original" )
511
- if latestRun := getLatestJob (t , jobName , & latestRun .CreationTimestamp ); latestRun .Labels ["foo" ] != "foo" {
512
- t .Fatalf ("Job label mismatch. Want: 'foo', got: '%s'" , latestRun .Labels ["foo" ])
483
+ rerunJob (t , ctx , initialJob .Name , "original" )
484
+
485
+ originalJob := getLatestJob (t , jobName , & latestJob .CreationTimestamp )
486
+ if originalJob .Labels ["foo" ] != "foo" {
487
+ t .Fatalf ("Failed waiting for ProwJob %q using original config with foo=foo." , jobName )
488
+ }
489
+ }
490
+
491
+ func rerunJob (t * testing.T , ctx context.Context , jobName string , mode string ) {
492
+ req , err := http .NewRequestWithContext (ctx , "POST" , fmt .Sprintf ("http://localhost/rerun?mode=%v&prowjob=%v" , mode , jobName ), nil )
493
+ if err != nil {
494
+ t .Fatalf ("Could not generate a request %v" , err )
495
+ }
496
+
497
+ // Deck might not be fully ready yet, so we must retry.
498
+ if err := wait .PollUntilContextTimeout (ctx , time .Second , 10 * time .Second , true , func (ctx context.Context ) (bool , error ) {
499
+ res , err := http .DefaultClient .Do (req )
500
+ if err != nil {
501
+ return false , fmt .Errorf ("could not make post request: %w" , err )
502
+ }
503
+ defer res .Body .Close ()
504
+
505
+ body , err := io .ReadAll (res .Body )
506
+ if err != nil {
507
+ t .Logf ("Failed to read response body: %v" , err )
508
+ return false , nil
509
+ }
510
+ t .Logf ("Response body: %s" , string (body ))
511
+
512
+ return res .StatusCode == http .StatusOK , nil
513
+ }); err != nil {
514
+ t .Fatalf ("Failed to rerun job %q with %s config: %v" , jobName , mode , err )
513
515
}
514
516
}
515
517
0 commit comments