@@ -61,7 +61,8 @@ const ControllerName = "plank"
61
61
62
62
// PodStatus constants
63
63
const (
64
- Evicted = "Evicted"
64
+ Evicted = "Evicted"
65
+ Terminated = "Terminated"
65
66
)
66
67
67
68
// NodeStatus constants
@@ -480,6 +481,34 @@ func (r *reconciler) syncPendingJob(ctx context.Context, pj *prowv1.ProwJob) (*r
480
481
r .log .WithField ("name" , pj .ObjectMeta .Name ).Debug ("Delete Pod." )
481
482
return nil , ctrlruntimeclient .IgnoreNotFound (client .Delete (ctx , pod ))
482
483
}
484
+ } else if pod .Status .Reason == Terminated {
485
+ // Pod was terminated.
486
+ if pj .Spec .ErrorOnTermination {
487
+ // ErrorOnTermination is enabled, complete the PJ and mark it as
488
+ // errored.
489
+ r .log .WithField ("error-on-termination" , true ).WithFields (pjutil .ProwJobFields (pj )).Info ("Pods Node got terminated, fail job." )
490
+ pj .SetComplete ()
491
+ pj .Status .State = prowv1 .ErrorState
492
+ pj .Status .Description = "Job pod's node was terminated."
493
+ } else {
494
+ // ErrorOnTermination is disabled. Delete the pod now and recreate it in
495
+ // the next resync.
496
+ r .log .WithFields (pjutil .ProwJobFields (pj )).Info ("Pods Node got terminated, deleting & next sync loop will restart pod" )
497
+ client , ok := r .buildClients [pj .ClusterAlias ()]
498
+ if ! ok {
499
+ return nil , TerminalError (fmt .Errorf ("terminated pod %s: unknown cluster alias %q" , pod .Name , pj .ClusterAlias ()))
500
+ }
501
+ if finalizers := sets .New [string ](pod .Finalizers ... ); finalizers .Has (kubernetesreporterapi .FinalizerName ) {
502
+ // We want the end user to not see this, so we have to remove the finalizer, otherwise the pod hangs
503
+ oldPod := pod .DeepCopy ()
504
+ pod .Finalizers = finalizers .Delete (kubernetesreporterapi .FinalizerName ).UnsortedList ()
505
+ if err := client .Patch (ctx , pod , ctrlruntimeclient .MergeFrom (oldPod )); err != nil {
506
+ return nil , fmt .Errorf ("failed to patch pod trying to remove %s finalizer: %w" , kubernetesreporterapi .FinalizerName , err )
507
+ }
508
+ }
509
+ r .log .WithField ("name" , pj .ObjectMeta .Name ).Debug ("Delete Pod." )
510
+ return nil , ctrlruntimeclient .IgnoreNotFound (client .Delete (ctx , pod ))
511
+ }
483
512
} else if pod .DeletionTimestamp != nil && pod .Status .Reason == NodeUnreachablePodReason {
484
513
// This can happen in any phase and means the node got evicted after it became unresponsive. Delete the finalizer so the pod
485
514
// vanishes and we will silently re-create it in the next iteration.
0 commit comments