Skip to content

Commit a7fc58f

Browse files
committed
Remediate unhealthy MachinePool machines
1 parent 2bb2099 commit a7fc58f

File tree

9 files changed

+354
-23
lines changed

9 files changed

+354
-23
lines changed

config/crd/bases/cluster.x-k8s.io_machinepools.yaml

+34
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

exp/api/v1beta1/machinepool_types.go

+20
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,11 @@ type MachinePoolSpec struct {
6060
// failureDomains is the list of failure domains this MachinePool should be attached to.
6161
// +optional
6262
FailureDomains []string `json:"failureDomains,omitempty"`
63+
64+
// The strategy for replacing existing machines with
65+
// new ones.
66+
// +optional
67+
Strategy *MachinePoolStrategy `json:"strategy,omitempty"`
6368
}
6469

6570
// ANCHOR_END: MachinePoolSpec
@@ -161,6 +166,21 @@ type MachinePoolV1Beta2Status struct {
161166

162167
// ANCHOR_END: MachinePoolStatus
163168

169+
// ANCHOR: MachinePoolStrategy
170+
171+
// MachinePoolStrategy defines how to replace existing machines
172+
// with new ones.
173+
type MachinePoolStrategy struct {
174+
// Remediation controls the strategy of remediating unhealthy machines
175+
// as marked by a MachineHealthCheck. This only applies to infrastructure
176+
// providers supporting "MachinePool Machines". For other providers,
177+
// no remediation is done.
178+
// +optional
179+
Remediation *clusterv1.RemediationStrategy `json:"remediation,omitempty"`
180+
}
181+
182+
// ANCHOR_END: MachinePoolStrategy
183+
164184
// MachinePoolPhase is a string representation of a MachinePool Phase.
165185
//
166186
// This type is a high-level indicator of the status of the MachinePool as it is provisioned,

exp/api/v1beta1/zz_generated.deepcopy.go

+25
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

exp/internal/controllers/machinepool_controller_phases.go

+142-12
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,17 @@ import (
2020
"context"
2121
"fmt"
2222
"reflect"
23+
"sort"
2324
"time"
2425

2526
"github.com/pkg/errors"
27+
"golang.org/x/exp/slices"
2628
corev1 "k8s.io/api/core/v1"
2729
apierrors "k8s.io/apimachinery/pkg/api/errors"
2830
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2931
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
3032
kerrors "k8s.io/apimachinery/pkg/util/errors"
33+
"k8s.io/apimachinery/pkg/util/intstr"
3134
"k8s.io/apimachinery/pkg/util/wait"
3235
"k8s.io/klog/v2"
3336
"k8s.io/utils/ptr"
@@ -44,6 +47,7 @@ import (
4447
"sigs.k8s.io/cluster-api/internal/util/ssa"
4548
"sigs.k8s.io/cluster-api/util"
4649
"sigs.k8s.io/cluster-api/util/annotations"
50+
"sigs.k8s.io/cluster-api/util/collections"
4751
"sigs.k8s.io/cluster-api/util/conditions"
4852
utilconversion "sigs.k8s.io/cluster-api/util/conversion"
4953
"sigs.k8s.io/cluster-api/util/labels"
@@ -294,15 +298,18 @@ func (r *MachinePoolReconciler) reconcileInfrastructure(ctx context.Context, s *
294298
// Get the nodeRefsMap from the cluster.
295299
s.nodeRefMap, getNodeRefsErr = r.getNodeRefMap(ctx, clusterClient)
296300

297-
err = r.reconcileMachines(ctx, s, infraConfig)
301+
res := ctrl.Result{}
302+
303+
reconcileMachinesRes, err := r.reconcileMachines(ctx, s, infraConfig)
304+
res = util.LowestNonZeroResult(res, reconcileMachinesRes)
298305

299306
if err != nil || getNodeRefsErr != nil {
300307
return ctrl.Result{}, kerrors.NewAggregate([]error{errors.Wrapf(err, "failed to reconcile Machines for MachinePool %s", klog.KObj(mp)), errors.Wrapf(getNodeRefsErr, "failed to get nodeRefs for MachinePool %s", klog.KObj(mp))})
301308
}
302309

303310
if !mp.Status.InfrastructureReady {
304311
log.Info("Infrastructure provider is not yet ready", infraConfig.GetKind(), klog.KObj(infraConfig))
305-
return ctrl.Result{}, nil
312+
return res, nil
306313
}
307314

308315
var providerIDList []string
@@ -321,7 +328,7 @@ func (r *MachinePoolReconciler) reconcileInfrastructure(ctx context.Context, s *
321328

322329
if len(providerIDList) == 0 && mp.Status.Replicas != 0 {
323330
log.Info("Retrieved empty spec.providerIDList from infrastructure provider but status.replicas is not zero.", "replicas", mp.Status.Replicas)
324-
return ctrl.Result{}, nil
331+
return res, nil
325332
}
326333

327334
if !reflect.DeepEqual(mp.Spec.ProviderIDList, providerIDList) {
@@ -331,7 +338,7 @@ func (r *MachinePoolReconciler) reconcileInfrastructure(ctx context.Context, s *
331338
mp.Status.UnavailableReplicas = mp.Status.Replicas
332339
}
333340

334-
return ctrl.Result{}, nil
341+
return res, nil
335342
}
336343

337344
// reconcileMachines reconciles Machines associated with a MachinePool.
@@ -341,18 +348,18 @@ func (r *MachinePoolReconciler) reconcileInfrastructure(ctx context.Context, s *
341348
// infrastructure is created accordingly.
342349
// Note: When supported by the cloud provider implementation of the MachinePool, machines will provide a means to interact
343350
// with the corresponding infrastructure (e.g. delete a specific machine in case MachineHealthCheck detects it is unhealthy).
344-
func (r *MachinePoolReconciler) reconcileMachines(ctx context.Context, s *scope, infraMachinePool *unstructured.Unstructured) error {
351+
func (r *MachinePoolReconciler) reconcileMachines(ctx context.Context, s *scope, infraMachinePool *unstructured.Unstructured) (ctrl.Result, error) {
345352
log := ctrl.LoggerFrom(ctx)
346353
mp := s.machinePool
347354

348355
var infraMachineKind string
349356
if err := util.UnstructuredUnmarshalField(infraMachinePool, &infraMachineKind, "status", "infrastructureMachineKind"); err != nil {
350357
if errors.Is(err, util.ErrUnstructuredFieldNotFound) {
351358
log.V(4).Info("MachinePool Machines not supported, no infraMachineKind found")
352-
return nil
359+
return ctrl.Result{}, nil
353360
}
354361

355-
return errors.Wrapf(err, "failed to retrieve infraMachineKind from infrastructure provider for MachinePool %s", klog.KObj(mp))
362+
return ctrl.Result{}, errors.Wrapf(err, "failed to retrieve infraMachineKind from infrastructure provider for MachinePool %s", klog.KObj(mp))
356363
}
357364

358365
infraMachineSelector := metav1.LabelSelector{
@@ -369,7 +376,7 @@ func (r *MachinePoolReconciler) reconcileMachines(ctx context.Context, s *scope,
369376
infraMachineList.SetAPIVersion(infraMachinePool.GetAPIVersion())
370377
infraMachineList.SetKind(infraMachineKind + "List")
371378
if err := r.Client.List(ctx, &infraMachineList, client.InNamespace(mp.Namespace), client.MatchingLabels(infraMachineSelector.MatchLabels)); err != nil {
372-
return errors.Wrapf(err, "failed to list infra machines for MachinePool %q in namespace %q", mp.Name, mp.Namespace)
379+
return ctrl.Result{}, errors.Wrapf(err, "failed to list infra machines for MachinePool %q in namespace %q", mp.Name, mp.Namespace)
373380
}
374381

375382
// Add watcher for infraMachine, if there isn't one already; this will allow this controller to reconcile
@@ -380,21 +387,26 @@ func (r *MachinePoolReconciler) reconcileMachines(ctx context.Context, s *scope,
380387

381388
// Add watcher for infraMachine, if there isn't one already.
382389
if err := r.externalTracker.Watch(log, sampleInfraMachine, handler.EnqueueRequestsFromMapFunc(r.infraMachineToMachinePoolMapper)); err != nil {
383-
return err
390+
return ctrl.Result{}, err
384391
}
385392

386393
// Get the list of machines managed by this controller, and align it with the infra machines managed by
387394
// the InfraMachinePool controller.
388395
machineList := &clusterv1.MachineList{}
389396
if err := r.Client.List(ctx, machineList, client.InNamespace(mp.Namespace), client.MatchingLabels(infraMachineSelector.MatchLabels)); err != nil {
390-
return err
397+
return ctrl.Result{}, err
391398
}
392399

393400
if err := r.createOrUpdateMachines(ctx, s, machineList.Items, infraMachineList.Items); err != nil {
394-
return errors.Wrapf(err, "failed to create machines for MachinePool %q in namespace %q", mp.Name, mp.Namespace)
401+
return ctrl.Result{}, errors.Wrapf(err, "failed to create machines for MachinePool %q in namespace %q", mp.Name, mp.Namespace)
395402
}
396403

397-
return nil
404+
res, err := r.reconcileUnhealthyMachines(ctx, s, machineList.Items)
405+
if err != nil {
406+
return ctrl.Result{}, errors.Wrapf(err, "failed to reconcile unhealthy machines for MachinePool %s", klog.KObj(mp))
407+
}
408+
409+
return res, nil
398410
}
399411

400412
// createOrUpdateMachines creates a MachinePool Machine for each infraMachine if it doesn't already exist and sets the owner reference and infraRef.
@@ -594,3 +606,121 @@ func (r *MachinePoolReconciler) getNodeRefMap(ctx context.Context, c client.Clie
594606

595607
return nodeRefsMap, nil
596608
}
609+
610+
func (r *MachinePoolReconciler) reconcileUnhealthyMachines(ctx context.Context, s *scope, machines []clusterv1.Machine) (ctrl.Result, error) {
611+
if len(machines) == 0 {
612+
return ctrl.Result{}, nil
613+
}
614+
615+
log := ctrl.LoggerFrom(ctx)
616+
mp := s.machinePool
617+
618+
machinesWithHealthCheck := slices.DeleteFunc(slices.Clone(machines), func(machine clusterv1.Machine) bool {
619+
return !conditions.Has(&machine, clusterv1.MachineHealthCheckSucceededCondition)
620+
})
621+
if len(machinesWithHealthCheck) == 0 {
622+
// This means there is no MachineHealthCheck selecting any machines
623+
// of this machine pool. In this case, do not requeue so often,
624+
// but still check regularly in case a MachineHealthCheck became
625+
// deployed or activated. This long interval shouldn't be a problem
626+
// at cluster creation, since newly-created nodes should anyway
627+
// trigger MachinePool reconciliation as the infrastructure provider
628+
// creates the InfraMachines.
629+
log.V(4).Info("Skipping reconciliation of unhealthy MachinePool machines because there are no health-checked machines")
630+
return ctrl.Result{RequeueAfter: 10 * time.Minute}, nil
631+
}
632+
633+
unhealthyMachines := slices.DeleteFunc(slices.Clone(machines), func(machine clusterv1.Machine) bool {
634+
return !collections.IsUnhealthyAndOwnerRemediated(&machine)
635+
})
636+
log.V(4).Info("Reconciling unhealthy MachinePool machines", "unhealthyMachines", len(unhealthyMachines))
637+
638+
// Calculate how many in flight machines we should remediate.
639+
// By default, we allow all machines to be remediated at the same time.
640+
maxInFlight := len(unhealthyMachines)
641+
if mp.Spec.Strategy != nil && mp.Spec.Strategy.Remediation != nil {
642+
if mp.Spec.Strategy.Remediation.MaxInFlight != nil {
643+
var err error
644+
replicas := int(ptr.Deref(mp.Spec.Replicas, 1))
645+
maxInFlight, err = intstr.GetScaledValueFromIntOrPercent(mp.Spec.Strategy.Remediation.MaxInFlight, replicas, true)
646+
if err != nil {
647+
return ctrl.Result{}, fmt.Errorf("failed to calculate maxInFlight to remediate machines: %v", err)
648+
}
649+
log = log.WithValues("maxInFlight", maxInFlight, "replicas", replicas)
650+
}
651+
}
652+
653+
machinesToRemediate := make([]*clusterv1.Machine, 0, len(unhealthyMachines))
654+
inFlight := 0
655+
for _, m := range unhealthyMachines {
656+
if !m.DeletionTimestamp.IsZero() {
657+
if conditions.IsTrue(&m, clusterv1.MachineOwnerRemediatedCondition) {
658+
// Machine has been remediated by this controller and still in flight.
659+
inFlight++
660+
}
661+
continue
662+
}
663+
if conditions.IsFalse(&m, clusterv1.MachineOwnerRemediatedCondition) {
664+
machinesToRemediate = append(machinesToRemediate, &m)
665+
}
666+
}
667+
log = log.WithValues("inFlight", inFlight)
668+
669+
if len(machinesToRemediate) == 0 {
670+
// There's a MachineHealthCheck monitoring the machines, but currently
671+
// no action to be taken. A machine could require remediation at any
672+
// time, so use a short interval until next reconciliation.
673+
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
674+
}
675+
676+
if inFlight >= maxInFlight {
677+
log.V(3).Info("Remediation strategy is set, and maximum in flight has been reached", "machinesToBeRemediated", len(machinesToRemediate))
678+
679+
// Check soon again whether the already-remediating (= deleting) machines are gone
680+
// so that more machines can be remediated
681+
return ctrl.Result{RequeueAfter: 15 * time.Second}, nil
682+
}
683+
684+
// Sort the machines from newest to oldest.
685+
// We are trying to remediate machines failing to come up first because
686+
// there is a chance that they are not hosting any workloads (minimize disruption).
687+
sort.SliceStable(machinesToRemediate, func(i, j int) bool {
688+
return machinesToRemediate[i].CreationTimestamp.After(machinesToRemediate[j].CreationTimestamp.Time)
689+
})
690+
691+
haveMoreMachinesToRemediate := false
692+
if len(machinesToRemediate) > (maxInFlight - inFlight) {
693+
haveMoreMachinesToRemediate = true
694+
log.V(5).Info("Remediation strategy is set, limiting in flight operations", "machinesToBeRemediated", len(machinesToRemediate))
695+
machinesToRemediate = machinesToRemediate[:(maxInFlight - inFlight)]
696+
}
697+
698+
// Remediate unhealthy machines by deleting them
699+
var errs []error
700+
for _, m := range machinesToRemediate {
701+
log.Info("Deleting unhealthy Machine", "Machine", klog.KObj(m))
702+
patch := client.MergeFrom(m.DeepCopy())
703+
if err := r.Client.Delete(ctx, m); err != nil {
704+
if apierrors.IsNotFound(err) {
705+
continue
706+
}
707+
errs = append(errs, errors.Wrapf(err, "failed to delete Machine %s", klog.KObj(m)))
708+
continue
709+
}
710+
conditions.MarkTrue(m, clusterv1.MachineOwnerRemediatedCondition)
711+
if err := r.Client.Status().Patch(ctx, m, patch); err != nil && !apierrors.IsNotFound(err) {
712+
errs = append(errs, errors.Wrapf(err, "failed to update status of Machine %s", klog.KObj(m)))
713+
}
714+
}
715+
716+
if len(errs) > 0 {
717+
return ctrl.Result{}, errors.Wrapf(kerrors.NewAggregate(errs), "failed to delete unhealthy Machines")
718+
}
719+
720+
if haveMoreMachinesToRemediate {
721+
// More machines need remediation, so reconcile again sooner
722+
return ctrl.Result{RequeueAfter: 15 * time.Second}, nil
723+
}
724+
725+
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
726+
}

0 commit comments

Comments
 (0)