Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] 🌱 Add zone placement group label #3373

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions pkg/services/vmoperator/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,27 @@ package vmoperator
const (
kubeTopologyZoneLabelKey = "topology.kubernetes.io/zone"

// ZonePlacementOrgLabelKey is the name of a label that may be used with
// a shared value across a group of Zone Placement Groups (ZPG), also
// known as a Zone Placement Org (ZPO). This is to ensure all ZPGs within
// a ZPO are best-effort spread across available zones.
//
// When the number of ZPGs in a ZPO exceed the number of available zones,
// the ZPGs that are placed after the number of zones are exceeded can end
// up in any zone.
ZonePlacementOrgLabelKey = "vmoperator.vmware.com/zone-placement-org"

// ZonePlacementGroupLabelKey is the name of a label that may be used with
// a shared value across a group of VMs to ensure all the VMs honor the
// zone placement result of the first VM from the group to receive a
// placement recommendation.
//
// This label should only be used for concepts like a pool of VMs that
// otherwise use the same VM class, image and storage class, as placement
// will only consider these requirements for the first VM that requests
// placement out of the entire group.
ZonePlacementGroupLabelKey = "vmoperator.vmware.com/zone-placement-group"

// ControlPlaneVMClusterModuleGroupName is the name used for the control plane Cluster Module.
ControlPlaneVMClusterModuleGroupName = "control-plane-group"
// ClusterModuleNameAnnotationKey is key for the Cluster Module annotation.
Expand Down
39 changes: 33 additions & 6 deletions pkg/services/vmoperator/vmopmachine.go
Original file line number Diff line number Diff line change
Expand Up @@ -685,18 +685,45 @@ func getVMLabels(supervisorMachineCtx *vmware.SupervisorMachineContext, vmLabels
}

// getTopologyLabels returns the labels related to a VM's topology.
//
// TODO(akutz): Currently this function just returns the availability zone,
//
// and thus the code is optimized as such. However, in the future
// this function may return a more diverse topology.
func getTopologyLabels(supervisorMachineCtx *vmware.SupervisorMachineContext) map[string]string {
if fd := supervisorMachineCtx.VSphereMachine.Spec.FailureDomain; fd != nil && *fd != "" {
return map[string]string{
kubeTopologyZoneLabelKey: *fd,
}
}
return nil

// if the machine is not explicitly assigned to a zone, we rely on zone placement groups (ZPG) and
// zone placement organizations (ZPO), both managed by VM Operator. This ensures:
//
// - all VMs in a ZPG should be in same zone
// - all ZPGs in a ZPO should best effort be spread across zones
//
// In CAPV:
//
// - the cluster is a ZPO
// - each machine deployment is a ZPG

labels := map[string]string{
ZonePlacementOrgLabelKey: supervisorMachineCtx.Cluster.Name,
}

if mdName, ok := supervisorMachineCtx.Machine.Labels[clusterv1.MachineDeploymentNameLabel]; ok {
labels[ZonePlacementGroupLabelKey] = mdName
} else {
// In theory there might be cases where the machine do not belong to a machine deployment (and don't have a specific zone assigned):
// - Standalone worker machines
// - Machines controlled by standalone machine sets
// - Control plane machines (either standalone or controller by a ControlPlane component)
//
// The first two cases can't happen when using a cluster with spec.topology set (with ClusterClasses);
// the last case (control plane machines) should never happen because in a supervisor cluster the is always
// at least one zone defined, the default zone.
//
// However, for sake of defensive programming, we put all the exception into a ZPG named like the cluster
// thus enforcing that VM and related PVC ends up in the same zone.
labels[ZonePlacementGroupLabelKey] = supervisorMachineCtx.Cluster.Name
}
return labels
}

// getMachineDeploymentName returns the MachineDeployment name for a Cluster.
Expand Down
77 changes: 76 additions & 1 deletion pkg/services/vmoperator/vmopmachine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package vmoperator

import (
"context"
"fmt"
"testing"
"time"

Expand Down Expand Up @@ -65,6 +66,7 @@ func updateReconciledVMStatus(ctx context.Context, vmService VmopMachineService,

const (
machineName = "test-machine"
failureDomain = "test-zone"
clusterName = "test-cluster"
controlPlaneLabelTrue = true
k8sVersion = "test-k8sVersion"
Expand Down Expand Up @@ -118,14 +120,15 @@ var _ = Describe("VirtualMachine tests", func() {
vsphereCluster = util.CreateVSphereCluster(clusterName)
vsphereCluster.Status.ResourcePolicyName = resourcePolicyName
machine = util.CreateMachine(machineName, clusterName, k8sVersion, controlPlaneLabelTrue)
machine.Spec.FailureDomain = ptr.To(failureDomain)
vsphereMachine = util.CreateVSphereMachine(machineName, clusterName, className, imageName, storageClass, controlPlaneLabelTrue)
clusterContext, controllerManagerContext := util.CreateClusterContext(cluster, vsphereCluster)
supervisorMachineContext = util.CreateMachineContext(clusterContext, machine, vsphereMachine)
supervisorMachineContext.ControllerManagerContext = controllerManagerContext
vmService = VmopMachineService{Client: controllerManagerContext.Client, ConfigureControlPlaneVMReadinessProbe: network.DummyLBNetworkProvider().SupportsVMReadinessProbe()}
})

Context("Reconcile VirtualMachine", func() {
Context("Reconcile VirtualMachine for a controlPlane with a failure domain set", func() {
verifyOutput := func(machineContext *vmware.SupervisorMachineContext) {
Expect(err != nil).Should(Equal(expectReconcileError))
Expect(requeue).Should(Equal(expectedRequeue))
Expand Down Expand Up @@ -160,6 +163,9 @@ var _ = Describe("VirtualMachine tests", func() {
Expect(vmopVM.Labels[clusterNameLabel]).To(Equal(clusterName))
Expect(vmopVM.Labels[clusterSelectorKey]).To(Equal(clusterName))
Expect(vmopVM.Labels[nodeSelectorKey]).To(Equal(roleControlPlane))
Expect(vmopVM.Labels[kubeTopologyZoneLabelKey]).To(Equal(failureDomain))
Expect(vmopVM.Labels).ToNot(HaveKey(ZonePlacementOrgLabelKey))
Expect(vmopVM.Labels).ToNot(HaveKey(ZonePlacementGroupLabelKey))
// for backward compatibility, will be removed in the future
Expect(vmopVM.Labels[legacyClusterSelectorKey]).To(Equal(clusterName))
Expect(vmopVM.Labels[legacyNodeSelectorKey]).To(Equal(roleControlPlane))
Expand Down Expand Up @@ -576,6 +582,75 @@ var _ = Describe("VirtualMachine tests", func() {
})
})

Context("Reconcile a VirtualMachine for a worker Machine without a failure domain", func() {

Specify("Reconcile valid Machine", func() {
const (
machineName = "test-machine2"
machineDeploymentName = "test-machine-deployment"
controlPlaneLabelFalse = false
expectedRequeue = true
)
var (
workersClusterModule = fmt.Sprintf("%s-workers-0", cluster.Name)
)

machine2 := util.CreateMachine(machineName, clusterName, k8sVersion, controlPlaneLabelFalse)
machine2.Labels[clusterv1.MachineDeploymentNameLabel] = machineDeploymentName
vsphereMachine2 := util.CreateVSphereMachine(machineName, clusterName, className, imageName, storageClass, controlPlaneLabelFalse)
clusterContext, controllerManagerContext := util.CreateClusterContext(cluster, vsphereCluster)
supervisorMachineContext = util.CreateMachineContext(clusterContext, machine2, vsphereMachine2)
supervisorMachineContext.ControllerManagerContext = controllerManagerContext
vmService = VmopMachineService{Client: controllerManagerContext.Client, ConfigureControlPlaneVMReadinessProbe: network.DummyLBNetworkProvider().SupportsVMReadinessProbe()}

// Do the bare minimum that will cause a vmoperator VirtualMachine to be created
// Note that the VM returned is not a vmoperator type, but is intentionally implementation agnostic
By("VirtualMachine is created")
requeue, err = vmService.ReconcileNormal(ctx, supervisorMachineContext)

Expect(err != nil).Should(Equal(expectReconcileError))
Expect(requeue).Should(Equal(expectedRequeue))
vsphereMachine := supervisorMachineContext.VSphereMachine

Expect(vsphereMachine).ShouldNot(BeNil())
Expect(vsphereMachine.Name).Should(Equal(machineName))
if expectedBiosUUID == "" {
Expect(vsphereMachine.Status.ID).To(BeNil())
} else {
Expect(*vsphereMachine.Status.ID).Should(Equal(expectedBiosUUID))
}
Expect(vsphereMachine.Status.IPAddr).Should(Equal(expectedVMIP))
Expect(vsphereMachine.Status.VMStatus).Should(Equal(expectedState))

vmopVM = getReconciledVM(ctx, vmService, supervisorMachineContext)
Expect(vmopVM != nil).Should(Equal(expectVMOpVM))

if vmopVM != nil {
vms, _ := vmService.getVirtualMachinesInCluster(ctx, supervisorMachineContext)
Expect(vms).Should(HaveLen(1))
Expect(vmopVM.Spec.ImageName).To(Equal(expectedImageName))
Expect(vmopVM.Spec.ClassName).To(Equal(className))
Expect(vmopVM.Spec.StorageClass).To(Equal(storageClass))
Expect(vmopVM.Spec.Reserved).ToNot(BeNil())
Expect(vmopVM.Spec.Reserved.ResourcePolicyName).To(Equal(resourcePolicyName))
Expect(vmopVM.Spec.MinHardwareVersion).To(Equal(minHardwareVersion))
Expect(vmopVM.Spec.PowerState).To(Equal(vmoprv1.VirtualMachinePowerStateOn))
Expect(vmopVM.ObjectMeta.Annotations[ClusterModuleNameAnnotationKey]).To(Equal(workersClusterModule))
Expect(vmopVM.ObjectMeta.Annotations[ProviderTagsAnnotationKey]).To(Equal(WorkerVMVMAntiAffinityTagValue))

Expect(vmopVM.Labels[clusterNameLabel]).To(Equal(clusterName))
Expect(vmopVM.Labels[clusterSelectorKey]).To(Equal(clusterName))
Expect(vmopVM.Labels[nodeSelectorKey]).To(Equal(roleNode))
Expect(vmopVM.Labels).ToNot(HaveKey(kubeTopologyZoneLabelKey))
Expect(vmopVM.Labels[ZonePlacementOrgLabelKey]).To(Equal(clusterName))
Expect(vmopVM.Labels[ZonePlacementGroupLabelKey]).To(Equal(machineDeploymentName))
// for backward compatibility, will be removed in the future
Expect(vmopVM.Labels[legacyClusterSelectorKey]).To(Equal(clusterName))
Expect(vmopVM.Labels[legacyNodeSelectorKey]).To(Equal(roleNode))
}
})
})

Context("Delete tests", func() {
timeout := time.Second * 5
interval := time.Second * 1
Expand Down
Loading