Skip to content

Commit c7718eb

Browse files
committed
implement reconciliation for VirtualMachineSetResourcePolicy
1 parent a0aeaa7 commit c7718eb

9 files changed

+420
-81
lines changed

controllers/vmware/test/controllers_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -478,7 +478,7 @@ var _ = Describe("Reconciliation tests", func() {
478478
Eventually(func() error {
479479
return k8sClient.Get(ctx, rpKey, resourcePolicy)
480480
}, time.Second*30).Should(Succeed())
481-
Expect(len(resourcePolicy.Spec.ClusterModuleGroups)).To(BeEquivalentTo(2))
481+
Expect(len(resourcePolicy.Spec.ClusterModuleGroups)).To(BeEquivalentTo(1))
482482

483483
By("Create the CAPI Machine and wait for it to exist")
484484
machineKey, machine := deployCAPIMachine(ns.Name, cluster, k8sClient)

controllers/vmware/vspherecluster_reconciler.go

+31-1
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ type ClusterReconciler struct {
6969
// +kubebuilder:rbac:groups=netoperator.vmware.com,resources=networks,verbs=get;list;watch
7070
// +kubebuilder:rbac:groups="",resources=persistentvolumeclaims,verbs=get;list;watch;update;create;delete
7171
// +kubebuilder:rbac:groups="",resources=persistentvolumeclaims/status,verbs=get;update;patch
72+
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinedeployments,verbs=get;list;watch
73+
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines,verbs=get;list;watch
7274

7375
func (r *ClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) {
7476
log := ctrl.LoggerFrom(ctx)
@@ -172,7 +174,7 @@ func (r *ClusterReconciler) reconcileNormal(ctx context.Context, clusterCtx *vmw
172174
// Reconcile ResourcePolicy before we create the machines. If the ResourcePolicy is not reconciled before we create the Node VMs,
173175
// it will be handled by vm operator by relocating the VMs to the ResourcePool and Folder specified by the ResourcePolicy.
174176
// Reconciling the ResourcePolicy early potentially saves us the extra relocate operation.
175-
resourcePolicyName, err := r.ResourcePolicyService.ReconcileResourcePolicy(ctx, clusterCtx)
177+
resourcePolicyName, err := r.ResourcePolicyService.ReconcileResourcePolicy(ctx, clusterCtx.Cluster, clusterCtx.VSphereCluster)
176178
if err != nil {
177179
conditions.MarkFalse(clusterCtx.VSphereCluster, vmwarev1.ResourcePolicyReadyCondition, vmwarev1.ResourcePolicyCreationFailedReason, clusterv1.ConditionSeverityWarning, err.Error())
178180
return errors.Wrapf(err,
@@ -370,6 +372,34 @@ func (r *ClusterReconciler) VSphereMachineToCluster(ctx context.Context, o clien
370372
}}
371373
}
372374

375+
// MachineDeploymentToCluster adds reconcile requests for a Cluster when one of its machineDeployments has an event.
376+
func (r *ClusterReconciler) MachineDeploymentToCluster(ctx context.Context, o client.Object) []reconcile.Request {
377+
log := ctrl.LoggerFrom(ctx)
378+
379+
machineDeployment, ok := o.(*clusterv1.MachineDeployment)
380+
if !ok {
381+
log.Error(nil, fmt.Sprintf("Expected a MachineDeployment but got a %T", o))
382+
return nil
383+
}
384+
log = log.WithValues("MachineDeployment", klog.KObj(machineDeployment))
385+
ctx = ctrl.LoggerInto(ctx, log)
386+
387+
vsphereCluster, err := util.GetVMwareVSphereClusterFromMachineDeployment(ctx, r.Client, machineDeployment)
388+
if err != nil {
389+
log.V(4).Error(err, "Failed to get VSphereCluster from MachineDeployment")
390+
return nil
391+
}
392+
393+
// Can add further filters on Cluster state so that we don't keep reconciling Cluster
394+
log.V(6).Info("Triggering VSphereCluster reconcile from MachineDeployment")
395+
return []ctrl.Request{{
396+
NamespacedName: types.NamespacedName{
397+
Namespace: vsphereCluster.Namespace,
398+
Name: vsphereCluster.Name,
399+
},
400+
}}
401+
}
402+
373403
// ZoneToVSphereClusters adds reconcile requests for VSphereClusters when Zone has an event.
374404
func (r *ClusterReconciler) ZoneToVSphereClusters(ctx context.Context, o client.Object) []reconcile.Request {
375405
log := ctrl.LoggerFrom(ctx)

controllers/vspherecluster_controller.go

+4
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,10 @@ func AddClusterControllerToManager(ctx context.Context, controllerManagerCtx *ca
8383
&vmwarev1.VSphereMachine{},
8484
handler.EnqueueRequestsFromMapFunc(reconciler.VSphereMachineToCluster),
8585
).
86+
Watches(
87+
&clusterv1.MachineDeployment{},
88+
handler.EnqueueRequestsFromMapFunc(reconciler.MachineDeploymentToCluster),
89+
).
8690
WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(mgr.GetScheme(), predicateLog, controllerManagerCtx.WatchFilterValue))
8791

8892
// Conditionally add a Watch for topologyv1.Zone when the feature gate is enabled

pkg/services/interfaces.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
"sigs.k8s.io/controller-runtime/pkg/reconcile"
2828

2929
infrav1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/v1beta1"
30+
vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1"
3031
capvcontext "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context"
3132
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/context/vmware"
3233
)
@@ -63,7 +64,7 @@ type ControlPlaneEndpointService interface {
6364
type ResourcePolicyService interface {
6465
// ReconcileResourcePolicy ensures that a VirtualMachineSetResourcePolicy exists for the cluster
6566
// Returns the name of a policy if it exists, otherwise returns an error
66-
ReconcileResourcePolicy(ctx context.Context, clusterCtx *vmware.ClusterContext) (string, error)
67+
ReconcileResourcePolicy(ctx context.Context, cluster *clusterv1.Cluster, vSphereCluster *vmwarev1.VSphereCluster) (string, error)
6768
}
6869

6970
// NetworkProvider provision network resources and configures VM based on network type.

pkg/services/vmoperator/constants.go

+2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ const (
2222

2323
// ControlPlaneVMClusterModuleGroupName is the name used for the control plane Cluster Module.
2424
ControlPlaneVMClusterModuleGroupName = "control-plane-group"
25+
// ClusterWorkerVMClusterModuleGroupName is the name used for the worker Cluster Module when using mode Cluster.
26+
ClusterWorkerVMClusterModuleGroupName = "workers-group"
2527
// ClusterModuleNameAnnotationKey is key for the Cluster Module annotation.
2628
ClusterModuleNameAnnotationKey = "vsphere-cluster-module-group"
2729
// ProviderTagsAnnotationKey is the key used for the provider tags annotation.

pkg/services/vmoperator/resource_policy.go

+158-49
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,22 @@ package vmoperator
1818

1919
import (
2020
"context"
21+
"fmt"
22+
"sort"
2123

2224
"github.com/pkg/errors"
2325
vmoprv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha2"
2426
apierrors "k8s.io/apimachinery/pkg/api/errors"
2527
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
28+
"k8s.io/apimachinery/pkg/util/sets"
29+
"k8s.io/klog/v2"
30+
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
31+
"sigs.k8s.io/cluster-api/util/patch"
2632
"sigs.k8s.io/controller-runtime/pkg/client"
2733
ctrlutil "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
2834

29-
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/context/vmware"
35+
vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1"
36+
"sigs.k8s.io/cluster-api-provider-vsphere/feature"
3037
)
3138

3239
// RPService represents the ability to reconcile a VirtualMachineSetResourcePolicy via vmoperator.
@@ -36,73 +43,175 @@ type RPService struct {
3643

3744
// ReconcileResourcePolicy ensures that a VirtualMachineSetResourcePolicy exists for the cluster
3845
// Returns the name of a policy if it exists, otherwise returns an error.
39-
func (s *RPService) ReconcileResourcePolicy(ctx context.Context, clusterCtx *vmware.ClusterContext) (string, error) {
40-
resourcePolicy, err := s.getVirtualMachineSetResourcePolicy(ctx, clusterCtx)
46+
func (s *RPService) ReconcileResourcePolicy(ctx context.Context, cluster *clusterv1.Cluster, vSphereCluster *vmwarev1.VSphereCluster) (string, error) {
47+
clusterModuleGroups, err := getTargetClusterModuleGroups(ctx, s.Client, cluster, vSphereCluster)
4148
if err != nil {
49+
return "", err
50+
}
51+
52+
resourcePolicyName := cluster.Name
53+
resourcePolicy := &vmoprv1.VirtualMachineSetResourcePolicy{}
54+
55+
if err := s.Client.Get(ctx, client.ObjectKey{Namespace: cluster.Namespace, Name: resourcePolicyName}, resourcePolicy); err != nil {
4256
if !apierrors.IsNotFound(err) {
43-
return "", errors.Errorf("unexpected error in getting the Resource policy: %+v", err)
57+
return "", errors.Wrap(err, "failed to get existing VirtualMachineSetResourcePolicy")
4458
}
45-
resourcePolicy, err = s.createVirtualMachineSetResourcePolicy(ctx, clusterCtx)
46-
if err != nil {
47-
return "", errors.Errorf("failed to create Resource Policy: %+v", err)
59+
60+
resourcePolicy = &vmoprv1.VirtualMachineSetResourcePolicy{
61+
ObjectMeta: metav1.ObjectMeta{
62+
Namespace: cluster.Namespace,
63+
Name: resourcePolicyName,
64+
},
65+
}
66+
67+
if err := s.mutateResourcePolicy(resourcePolicy, clusterModuleGroups, cluster, vSphereCluster, true); err != nil {
68+
return "", errors.Wrap(err, "failed to mutate VirtualMachineSetResourcePolicy")
4869
}
70+
71+
if err := s.Client.Create(ctx, resourcePolicy); err != nil {
72+
return "", errors.Wrap(err, "failed to create VirtualMachineSetResourcePolicy")
73+
}
74+
75+
return resourcePolicyName, nil
76+
}
77+
78+
// Ensure .spec.clusterModuleGroups is up to date.
79+
helper, err := patch.NewHelper(resourcePolicy, s.Client)
80+
if err != nil {
81+
return "", err
82+
}
83+
84+
if err := s.mutateResourcePolicy(resourcePolicy, clusterModuleGroups, cluster, vSphereCluster, false); err != nil {
85+
return "", errors.Wrap(err, "failed to mutate VirtualMachineSetResourcePolicy")
86+
}
87+
88+
resourcePolicy.Spec.ClusterModuleGroups = clusterModuleGroups
89+
if err := helper.Patch(ctx, resourcePolicy); err != nil {
90+
return "", err
4991
}
5092

51-
return resourcePolicy.Name, nil
93+
return resourcePolicyName, nil
5294
}
5395

54-
func (s *RPService) newVirtualMachineSetResourcePolicy(clusterCtx *vmware.ClusterContext) *vmoprv1.VirtualMachineSetResourcePolicy {
55-
return &vmoprv1.VirtualMachineSetResourcePolicy{
56-
ObjectMeta: metav1.ObjectMeta{
57-
Namespace: clusterCtx.Cluster.Namespace,
58-
Name: clusterCtx.Cluster.Name,
59-
},
96+
func (s *RPService) mutateResourcePolicy(resourcePolicy *vmoprv1.VirtualMachineSetResourcePolicy, clusterModuleGroups []string, cluster *clusterv1.Cluster, vSphereCluster *vmwarev1.VSphereCluster, isCreate bool) error {
97+
// Always ensure the owner reference
98+
if err := ctrlutil.SetOwnerReference(vSphereCluster, resourcePolicy, s.Client.Scheme()); err != nil {
99+
return errors.Wrapf(err, "failed to set owner reference for virtualMachineSetResourcePolicy %s for cluster %s", klog.KObj(resourcePolicy), klog.KObj(vSphereCluster))
100+
}
101+
102+
// Always ensure the clusterModuleGroups are up-to-date.
103+
resourcePolicy.Spec.ClusterModuleGroups = clusterModuleGroups
104+
105+
// On create: Also set resourcePool and folder
106+
if isCreate {
107+
resourcePolicy.Spec.Folder = cluster.Name
108+
resourcePolicy.Spec.ResourcePool = vmoprv1.ResourcePoolSpec{
109+
Name: cluster.Name,
110+
}
60111
}
112+
113+
return nil
61114
}
62115

63-
func (s *RPService) getVirtualMachineSetResourcePolicy(ctx context.Context, clusterCtx *vmware.ClusterContext) (*vmoprv1.VirtualMachineSetResourcePolicy, error) {
116+
func getVirtualMachineSetResourcePolicy(ctx context.Context, ctrlClient client.Client, cluster *clusterv1.Cluster) (*vmoprv1.VirtualMachineSetResourcePolicy, error) {
64117
vmResourcePolicy := &vmoprv1.VirtualMachineSetResourcePolicy{}
65118
vmResourcePolicyName := client.ObjectKey{
66-
Namespace: clusterCtx.Cluster.Namespace,
67-
Name: clusterCtx.Cluster.Name,
119+
Namespace: cluster.Namespace,
120+
Name: cluster.Name,
68121
}
69-
err := s.Client.Get(ctx, vmResourcePolicyName, vmResourcePolicy)
70-
return vmResourcePolicy, err
122+
if err := ctrlClient.Get(ctx, vmResourcePolicyName, vmResourcePolicy); err != nil {
123+
return nil, err
124+
}
125+
126+
return vmResourcePolicy, nil
71127
}
72128

73-
func (s *RPService) createVirtualMachineSetResourcePolicy(ctx context.Context, clusterCtx *vmware.ClusterContext) (*vmoprv1.VirtualMachineSetResourcePolicy, error) {
74-
vmResourcePolicy := s.newVirtualMachineSetResourcePolicy(clusterCtx)
129+
func getFallbackWorkerClusterModuleGroupName(clusterName string) string {
130+
return fmt.Sprintf("%s-workers-0", clusterName)
131+
}
75132

76-
_, err := ctrlutil.CreateOrPatch(ctx, s.Client, vmResourcePolicy, func() error {
77-
vmResourcePolicy.Spec = vmoprv1.VirtualMachineSetResourcePolicySpec{
78-
ResourcePool: vmoprv1.ResourcePoolSpec{
79-
Name: clusterCtx.Cluster.Name,
80-
},
81-
Folder: clusterCtx.Cluster.Name,
82-
ClusterModuleGroups: []string{
83-
ControlPlaneVMClusterModuleGroupName,
84-
getMachineDeploymentNameForCluster(clusterCtx.Cluster),
85-
},
86-
}
87-
// Ensure that the VirtualMachineSetResourcePolicy is owned by the VSphereCluster
88-
if err := ctrlutil.SetOwnerReference(
89-
clusterCtx.VSphereCluster,
90-
vmResourcePolicy,
91-
s.Client.Scheme(),
92-
); err != nil {
93-
return errors.Wrapf(
94-
err,
95-
"error setting %s/%s as owner of %s/%s",
96-
clusterCtx.VSphereCluster.Namespace,
97-
clusterCtx.VSphereCluster.Name,
98-
vmResourcePolicy.Namespace,
99-
vmResourcePolicy.Name,
100-
)
133+
func getWorkerAntiAffinityMode(vSphereCluster *vmwarev1.VSphereCluster) vmwarev1.VSphereClusterWorkerAntiAffinityMode {
134+
if vSphereCluster.Spec.Placement == nil || vSphereCluster.Spec.Placement.WorkerAntiAffinity == nil {
135+
return vmwarev1.VSphereClusterWorkerAntiAffinityModeCluster
136+
}
137+
138+
return vSphereCluster.Spec.Placement.WorkerAntiAffinity.Mode
139+
}
140+
141+
func getTargetClusterModuleGroups(ctx context.Context, ctrlClient client.Client, cluster *clusterv1.Cluster, vSphereCluster *vmwarev1.VSphereCluster) ([]string, error) {
142+
if !feature.Gates.Enabled(feature.WorkerAntiAffinity) {
143+
// Fallback to old behaviour
144+
return []string{
145+
ControlPlaneVMClusterModuleGroupName,
146+
getFallbackWorkerClusterModuleGroupName(cluster.Name),
147+
}, nil
148+
}
149+
// Always add a cluster module for control plane machines.
150+
modules := []string{
151+
ControlPlaneVMClusterModuleGroupName,
152+
}
153+
154+
switch mode := getWorkerAntiAffinityMode(vSphereCluster); mode {
155+
case vmwarev1.VSphereClusterWorkerAntiAffinityModeNone:
156+
// Only configure a cluster module for control-plane nodes
157+
case vmwarev1.VSphereClusterWorkerAntiAffinityModeCluster:
158+
// Add an additional cluster module for workers when using Cluster mode.
159+
modules = append(modules, ClusterWorkerVMClusterModuleGroupName)
160+
case vmwarev1.VSphereClusterWorkerAntiAffinityModeMachineDeployment:
161+
// Add an additional cluster module for each MachineDeployment workers when using MachineDeployment mode.
162+
machineDeploymentNames, err := getMachineDeploymentNamesForCluster(ctx, ctrlClient, cluster)
163+
if err != nil {
164+
return nil, err
101165
}
102-
return nil
103-
})
166+
167+
modules = append(modules, machineDeploymentNames...)
168+
default:
169+
return nil, errors.Errorf("unknown mode %q configured for WorkerAntiAffinity", mode)
170+
}
171+
172+
// Add cluster modules from existing VirtualMachines and deduplicate with the target ones.
173+
existingModules, err := getVirtualMachineClusterModulesForCluster(ctx, ctrlClient, cluster)
104174
if err != nil {
105175
return nil, err
106176
}
107-
return vmResourcePolicy, nil
177+
modules = existingModules.Insert(modules...).UnsortedList()
178+
179+
// Sort elements to have deterministic output.
180+
sort.Strings(modules)
181+
182+
return modules, nil
183+
}
184+
185+
func getVirtualMachineClusterModulesForCluster(ctx context.Context, ctrlClient client.Client, cluster *clusterv1.Cluster) (sets.Set[string], error) {
186+
labels := map[string]string{clusterv1.ClusterNameLabel: cluster.GetName()}
187+
virtualMachineList := &vmoprv1.VirtualMachineList{}
188+
if err := ctrlClient.List(
189+
ctx, virtualMachineList,
190+
client.InNamespace(cluster.GetNamespace()),
191+
client.MatchingLabels(labels)); err != nil {
192+
return nil, errors.Wrapf(err, "failed to list MachineDeployment objects")
193+
}
194+
195+
clusterModules := sets.Set[string]{}
196+
for _, virtualMachine := range virtualMachineList.Items {
197+
if clusterModule, ok := virtualMachine.Annotations[ClusterModuleNameAnnotationKey]; ok {
198+
clusterModules = clusterModules.Insert(clusterModule)
199+
}
200+
}
201+
return clusterModules, nil
202+
}
203+
204+
func checkClusterModuleGroup(ctx context.Context, ctrlClient client.Client, cluster *clusterv1.Cluster, clusterModuleGroupName string) error {
205+
resourcePolicy, err := getVirtualMachineSetResourcePolicy(ctx, ctrlClient, cluster)
206+
if err != nil {
207+
return err
208+
}
209+
210+
for _, cm := range resourcePolicy.Status.ClusterModules {
211+
if cm.GroupName == clusterModuleGroupName {
212+
return nil
213+
}
214+
}
215+
216+
return errors.Errorf("VirtualMachineSetResourcePolicy's .status.clusterModules does not yet contain group %q", clusterModuleGroupName)
108217
}

0 commit comments

Comments
 (0)