diff --git a/controllers/vmware/test/controllers_test.go b/controllers/vmware/test/controllers_test.go index 93b95d44df..be7af78b34 100644 --- a/controllers/vmware/test/controllers_test.go +++ b/controllers/vmware/test/controllers_test.go @@ -478,7 +478,7 @@ var _ = Describe("Reconciliation tests", func() { Eventually(func() error { return k8sClient.Get(ctx, rpKey, resourcePolicy) }, time.Second*30).Should(Succeed()) - Expect(len(resourcePolicy.Spec.ClusterModuleGroups)).To(BeEquivalentTo(2)) + Expect(len(resourcePolicy.Spec.ClusterModuleGroups)).To(BeEquivalentTo(1)) By("Create the CAPI Machine and wait for it to exist") machineKey, machine := deployCAPIMachine(ns.Name, cluster, k8sClient) diff --git a/controllers/vmware/vspherecluster_reconciler.go b/controllers/vmware/vspherecluster_reconciler.go index 397663de19..2dc6012a6d 100644 --- a/controllers/vmware/vspherecluster_reconciler.go +++ b/controllers/vmware/vspherecluster_reconciler.go @@ -69,6 +69,8 @@ type ClusterReconciler struct { // +kubebuilder:rbac:groups=netoperator.vmware.com,resources=networks,verbs=get;list;watch // +kubebuilder:rbac:groups="",resources=persistentvolumeclaims,verbs=get;list;watch;update;create;delete // +kubebuilder:rbac:groups="",resources=persistentvolumeclaims/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinedeployments,verbs=get;list;watch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines,verbs=get;list;watch func (r *ClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { log := ctrl.LoggerFrom(ctx) @@ -172,7 +174,7 @@ func (r *ClusterReconciler) reconcileNormal(ctx context.Context, clusterCtx *vmw // Reconcile ResourcePolicy before we create the machines. If the ResourcePolicy is not reconciled before we create the Node VMs, // it will be handled by vm operator by relocating the VMs to the ResourcePool and Folder specified by the ResourcePolicy. // Reconciling the ResourcePolicy early potentially saves us the extra relocate operation. - resourcePolicyName, err := r.ResourcePolicyService.ReconcileResourcePolicy(ctx, clusterCtx) + resourcePolicyName, err := r.ResourcePolicyService.ReconcileResourcePolicy(ctx, clusterCtx.Cluster, clusterCtx.VSphereCluster) if err != nil { conditions.MarkFalse(clusterCtx.VSphereCluster, vmwarev1.ResourcePolicyReadyCondition, vmwarev1.ResourcePolicyCreationFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) return errors.Wrapf(err, @@ -370,6 +372,34 @@ func (r *ClusterReconciler) VSphereMachineToCluster(ctx context.Context, o clien }} } +// MachineDeploymentToCluster adds reconcile requests for a Cluster when one of its machineDeployments has an event. +func (r *ClusterReconciler) MachineDeploymentToCluster(ctx context.Context, o client.Object) []reconcile.Request { + log := ctrl.LoggerFrom(ctx) + + machineDeployment, ok := o.(*clusterv1.MachineDeployment) + if !ok { + log.Error(nil, fmt.Sprintf("Expected a MachineDeployment but got a %T", o)) + return nil + } + log = log.WithValues("MachineDeployment", klog.KObj(machineDeployment)) + ctx = ctrl.LoggerInto(ctx, log) + + vsphereCluster, err := util.GetVMwareVSphereClusterFromMachineDeployment(ctx, r.Client, machineDeployment) + if err != nil { + log.V(4).Error(err, "Failed to get VSphereCluster from MachineDeployment") + return nil + } + + // Can add further filters on Cluster state so that we don't keep reconciling Cluster + log.V(6).Info("Triggering VSphereCluster reconcile from MachineDeployment") + return []ctrl.Request{{ + NamespacedName: types.NamespacedName{ + Namespace: vsphereCluster.Namespace, + Name: vsphereCluster.Name, + }, + }} +} + // ZoneToVSphereClusters adds reconcile requests for VSphereClusters when Zone has an event. func (r *ClusterReconciler) ZoneToVSphereClusters(ctx context.Context, o client.Object) []reconcile.Request { log := ctrl.LoggerFrom(ctx) diff --git a/controllers/vspherecluster_controller.go b/controllers/vspherecluster_controller.go index 47d1ee9ce4..47430f3560 100644 --- a/controllers/vspherecluster_controller.go +++ b/controllers/vspherecluster_controller.go @@ -83,6 +83,10 @@ func AddClusterControllerToManager(ctx context.Context, controllerManagerCtx *ca &vmwarev1.VSphereMachine{}, handler.EnqueueRequestsFromMapFunc(reconciler.VSphereMachineToCluster), ). + Watches( + &clusterv1.MachineDeployment{}, + handler.EnqueueRequestsFromMapFunc(reconciler.MachineDeploymentToCluster), + ). WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(mgr.GetScheme(), predicateLog, controllerManagerCtx.WatchFilterValue)) // Conditionally add a Watch for topologyv1.Zone when the feature gate is enabled diff --git a/pkg/services/interfaces.go b/pkg/services/interfaces.go index df4685cddb..791d05c8d9 100644 --- a/pkg/services/interfaces.go +++ b/pkg/services/interfaces.go @@ -27,6 +27,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" infrav1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/v1beta1" + vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1" capvcontext "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context" "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context/vmware" ) @@ -63,7 +64,7 @@ type ControlPlaneEndpointService interface { type ResourcePolicyService interface { // ReconcileResourcePolicy ensures that a VirtualMachineSetResourcePolicy exists for the cluster // Returns the name of a policy if it exists, otherwise returns an error - ReconcileResourcePolicy(ctx context.Context, clusterCtx *vmware.ClusterContext) (string, error) + ReconcileResourcePolicy(ctx context.Context, cluster *clusterv1.Cluster, vSphereCluster *vmwarev1.VSphereCluster) (string, error) } // NetworkProvider provision network resources and configures VM based on network type. diff --git a/pkg/services/vmoperator/constants.go b/pkg/services/vmoperator/constants.go index 011082a06c..61405c0b37 100644 --- a/pkg/services/vmoperator/constants.go +++ b/pkg/services/vmoperator/constants.go @@ -22,6 +22,8 @@ const ( // ControlPlaneVMClusterModuleGroupName is the name used for the control plane Cluster Module. ControlPlaneVMClusterModuleGroupName = "control-plane-group" + // ClusterWorkerVMClusterModuleGroupName is the name used for the worker Cluster Module when using mode Cluster. + ClusterWorkerVMClusterModuleGroupName = "workers-group" // ClusterModuleNameAnnotationKey is key for the Cluster Module annotation. ClusterModuleNameAnnotationKey = "vsphere-cluster-module-group" // ProviderTagsAnnotationKey is the key used for the provider tags annotation. diff --git a/pkg/services/vmoperator/resource_policy.go b/pkg/services/vmoperator/resource_policy.go index 61fd255e02..00a37ea76e 100644 --- a/pkg/services/vmoperator/resource_policy.go +++ b/pkg/services/vmoperator/resource_policy.go @@ -18,15 +18,22 @@ package vmoperator import ( "context" + "fmt" + "sort" "github.com/pkg/errors" vmoprv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha2" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/util/patch" "sigs.k8s.io/controller-runtime/pkg/client" ctrlutil "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" - "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context/vmware" + vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1" + "sigs.k8s.io/cluster-api-provider-vsphere/feature" ) // RPService represents the ability to reconcile a VirtualMachineSetResourcePolicy via vmoperator. @@ -36,73 +43,175 @@ type RPService struct { // ReconcileResourcePolicy ensures that a VirtualMachineSetResourcePolicy exists for the cluster // Returns the name of a policy if it exists, otherwise returns an error. -func (s *RPService) ReconcileResourcePolicy(ctx context.Context, clusterCtx *vmware.ClusterContext) (string, error) { - resourcePolicy, err := s.getVirtualMachineSetResourcePolicy(ctx, clusterCtx) +func (s *RPService) ReconcileResourcePolicy(ctx context.Context, cluster *clusterv1.Cluster, vSphereCluster *vmwarev1.VSphereCluster) (string, error) { + clusterModuleGroups, err := getTargetClusterModuleGroups(ctx, s.Client, cluster, vSphereCluster) if err != nil { + return "", err + } + + resourcePolicyName := cluster.Name + resourcePolicy := &vmoprv1.VirtualMachineSetResourcePolicy{} + + if err := s.Client.Get(ctx, client.ObjectKey{Namespace: cluster.Namespace, Name: resourcePolicyName}, resourcePolicy); err != nil { if !apierrors.IsNotFound(err) { - return "", errors.Errorf("unexpected error in getting the Resource policy: %+v", err) + return "", errors.Wrap(err, "failed to get existing VirtualMachineSetResourcePolicy") } - resourcePolicy, err = s.createVirtualMachineSetResourcePolicy(ctx, clusterCtx) - if err != nil { - return "", errors.Errorf("failed to create Resource Policy: %+v", err) + + resourcePolicy = &vmoprv1.VirtualMachineSetResourcePolicy{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: cluster.Namespace, + Name: resourcePolicyName, + }, + } + + if err := s.mutateResourcePolicy(resourcePolicy, clusterModuleGroups, cluster, vSphereCluster, true); err != nil { + return "", errors.Wrap(err, "failed to mutate VirtualMachineSetResourcePolicy") } + + if err := s.Client.Create(ctx, resourcePolicy); err != nil { + return "", errors.Wrap(err, "failed to create VirtualMachineSetResourcePolicy") + } + + return resourcePolicyName, nil + } + + // Ensure .spec.clusterModuleGroups is up to date. + helper, err := patch.NewHelper(resourcePolicy, s.Client) + if err != nil { + return "", err + } + + if err := s.mutateResourcePolicy(resourcePolicy, clusterModuleGroups, cluster, vSphereCluster, false); err != nil { + return "", errors.Wrap(err, "failed to mutate VirtualMachineSetResourcePolicy") + } + + resourcePolicy.Spec.ClusterModuleGroups = clusterModuleGroups + if err := helper.Patch(ctx, resourcePolicy); err != nil { + return "", err } - return resourcePolicy.Name, nil + return resourcePolicyName, nil } -func (s *RPService) newVirtualMachineSetResourcePolicy(clusterCtx *vmware.ClusterContext) *vmoprv1.VirtualMachineSetResourcePolicy { - return &vmoprv1.VirtualMachineSetResourcePolicy{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: clusterCtx.Cluster.Namespace, - Name: clusterCtx.Cluster.Name, - }, +func (s *RPService) mutateResourcePolicy(resourcePolicy *vmoprv1.VirtualMachineSetResourcePolicy, clusterModuleGroups []string, cluster *clusterv1.Cluster, vSphereCluster *vmwarev1.VSphereCluster, isCreate bool) error { + // Always ensure the owner reference + if err := ctrlutil.SetOwnerReference(vSphereCluster, resourcePolicy, s.Client.Scheme()); err != nil { + return errors.Wrapf(err, "failed to set owner reference for virtualMachineSetResourcePolicy %s for cluster %s", klog.KObj(resourcePolicy), klog.KObj(vSphereCluster)) + } + + // Always ensure the clusterModuleGroups are up-to-date. + resourcePolicy.Spec.ClusterModuleGroups = clusterModuleGroups + + // On create: Also set resourcePool and folder + if isCreate { + resourcePolicy.Spec.Folder = cluster.Name + resourcePolicy.Spec.ResourcePool = vmoprv1.ResourcePoolSpec{ + Name: cluster.Name, + } } + + return nil } -func (s *RPService) getVirtualMachineSetResourcePolicy(ctx context.Context, clusterCtx *vmware.ClusterContext) (*vmoprv1.VirtualMachineSetResourcePolicy, error) { +func getVirtualMachineSetResourcePolicy(ctx context.Context, ctrlClient client.Client, cluster *clusterv1.Cluster) (*vmoprv1.VirtualMachineSetResourcePolicy, error) { vmResourcePolicy := &vmoprv1.VirtualMachineSetResourcePolicy{} vmResourcePolicyName := client.ObjectKey{ - Namespace: clusterCtx.Cluster.Namespace, - Name: clusterCtx.Cluster.Name, + Namespace: cluster.Namespace, + Name: cluster.Name, } - err := s.Client.Get(ctx, vmResourcePolicyName, vmResourcePolicy) - return vmResourcePolicy, err + if err := ctrlClient.Get(ctx, vmResourcePolicyName, vmResourcePolicy); err != nil { + return nil, err + } + + return vmResourcePolicy, nil } -func (s *RPService) createVirtualMachineSetResourcePolicy(ctx context.Context, clusterCtx *vmware.ClusterContext) (*vmoprv1.VirtualMachineSetResourcePolicy, error) { - vmResourcePolicy := s.newVirtualMachineSetResourcePolicy(clusterCtx) +func getFallbackWorkerClusterModuleGroupName(clusterName string) string { + return fmt.Sprintf("%s-workers-0", clusterName) +} - _, err := ctrlutil.CreateOrPatch(ctx, s.Client, vmResourcePolicy, func() error { - vmResourcePolicy.Spec = vmoprv1.VirtualMachineSetResourcePolicySpec{ - ResourcePool: vmoprv1.ResourcePoolSpec{ - Name: clusterCtx.Cluster.Name, - }, - Folder: clusterCtx.Cluster.Name, - ClusterModuleGroups: []string{ - ControlPlaneVMClusterModuleGroupName, - getMachineDeploymentNameForCluster(clusterCtx.Cluster), - }, - } - // Ensure that the VirtualMachineSetResourcePolicy is owned by the VSphereCluster - if err := ctrlutil.SetOwnerReference( - clusterCtx.VSphereCluster, - vmResourcePolicy, - s.Client.Scheme(), - ); err != nil { - return errors.Wrapf( - err, - "error setting %s/%s as owner of %s/%s", - clusterCtx.VSphereCluster.Namespace, - clusterCtx.VSphereCluster.Name, - vmResourcePolicy.Namespace, - vmResourcePolicy.Name, - ) +func getWorkerAntiAffinityMode(vSphereCluster *vmwarev1.VSphereCluster) vmwarev1.VSphereClusterWorkerAntiAffinityMode { + if vSphereCluster.Spec.Placement == nil || vSphereCluster.Spec.Placement.WorkerAntiAffinity == nil { + return vmwarev1.VSphereClusterWorkerAntiAffinityModeCluster + } + + return vSphereCluster.Spec.Placement.WorkerAntiAffinity.Mode +} + +func getTargetClusterModuleGroups(ctx context.Context, ctrlClient client.Client, cluster *clusterv1.Cluster, vSphereCluster *vmwarev1.VSphereCluster) ([]string, error) { + if !feature.Gates.Enabled(feature.WorkerAntiAffinity) { + // Fallback to old behaviour + return []string{ + ControlPlaneVMClusterModuleGroupName, + getFallbackWorkerClusterModuleGroupName(cluster.Name), + }, nil + } + // Always add a cluster module for control plane machines. + modules := []string{ + ControlPlaneVMClusterModuleGroupName, + } + + switch mode := getWorkerAntiAffinityMode(vSphereCluster); mode { + case vmwarev1.VSphereClusterWorkerAntiAffinityModeNone: + // Only configure a cluster module for control-plane nodes + case vmwarev1.VSphereClusterWorkerAntiAffinityModeCluster: + // Add an additional cluster module for workers when using Cluster mode. + modules = append(modules, ClusterWorkerVMClusterModuleGroupName) + case vmwarev1.VSphereClusterWorkerAntiAffinityModeMachineDeployment: + // Add an additional cluster module for each MachineDeployment workers when using MachineDeployment mode. + machineDeploymentNames, err := getMachineDeploymentNamesForCluster(ctx, ctrlClient, cluster) + if err != nil { + return nil, err } - return nil - }) + + modules = append(modules, machineDeploymentNames...) + default: + return nil, errors.Errorf("unknown mode %q configured for WorkerAntiAffinity", mode) + } + + // Add cluster modules from existing VirtualMachines and deduplicate with the target ones. + existingModules, err := getVirtualMachineClusterModulesForCluster(ctx, ctrlClient, cluster) if err != nil { return nil, err } - return vmResourcePolicy, nil + modules = existingModules.Insert(modules...).UnsortedList() + + // Sort elements to have deterministic output. + sort.Strings(modules) + + return modules, nil +} + +func getVirtualMachineClusterModulesForCluster(ctx context.Context, ctrlClient client.Client, cluster *clusterv1.Cluster) (sets.Set[string], error) { + labels := map[string]string{clusterv1.ClusterNameLabel: cluster.GetName()} + virtualMachineList := &vmoprv1.VirtualMachineList{} + if err := ctrlClient.List( + ctx, virtualMachineList, + client.InNamespace(cluster.GetNamespace()), + client.MatchingLabels(labels)); err != nil { + return nil, errors.Wrapf(err, "failed to list MachineDeployment objects") + } + + clusterModules := sets.Set[string]{} + for _, virtualMachine := range virtualMachineList.Items { + if clusterModule, ok := virtualMachine.Annotations[ClusterModuleNameAnnotationKey]; ok { + clusterModules = clusterModules.Insert(clusterModule) + } + } + return clusterModules, nil +} + +func checkClusterModuleGroup(ctx context.Context, ctrlClient client.Client, cluster *clusterv1.Cluster, clusterModuleGroupName string) error { + resourcePolicy, err := getVirtualMachineSetResourcePolicy(ctx, ctrlClient, cluster) + if err != nil { + return err + } + + for _, cm := range resourcePolicy.Status.ClusterModules { + if cm.GroupName == clusterModuleGroupName { + return nil + } + } + + return errors.Errorf("VirtualMachineSetResourcePolicy's .status.clusterModules does not yet contain group %q", clusterModuleGroupName) } diff --git a/pkg/services/vmoperator/resource_policy_test.go b/pkg/services/vmoperator/resource_policy_test.go index b2f0529057..c5af7bf54b 100644 --- a/pkg/services/vmoperator/resource_policy_test.go +++ b/pkg/services/vmoperator/resource_policy_test.go @@ -18,35 +18,144 @@ package vmoperator import ( "context" - "fmt" "testing" . "github.com/onsi/gomega" - capi_util "sigs.k8s.io/cluster-api/util" + vmoprv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha2" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + utilfeature "k8s.io/component-base/featuregate/testing" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" - "sigs.k8s.io/cluster-api-provider-vsphere/pkg/util" + vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1" + "sigs.k8s.io/cluster-api-provider-vsphere/feature" ) -func TestRPService(t *testing.T) { - clusterName := "test-cluster" - vsphereClusterName := fmt.Sprintf("%s-%s", clusterName, capi_util.RandomString((6))) - cluster := util.CreateCluster(clusterName) - vsphereCluster := util.CreateVSphereCluster(vsphereClusterName) - clusterCtx, controllerCtx := util.CreateClusterContext(cluster, vsphereCluster) +func TestRPService_ReconcileResourcePolicy(t *testing.T) { + scheme := runtime.NewScheme() + _ = vmwarev1.AddToScheme(scheme) + _ = clusterv1.AddToScheme(scheme) + _ = vmoprv1.AddToScheme(scheme) ctx := context.Background() - rpService := RPService{ - Client: controllerCtx.Client, + + tests := []struct { + name string + cluster *clusterv1.Cluster + vSphereCluster *vmwarev1.VSphereCluster `` + additionalObjs []client.Object + wantClusterModuleGroups []string + wantErr bool + workerAntiAffinity bool + }{ + { + name: "create VirtualMachinesetResourcePolicy for control-plane only on None mode (WorkerAntiAffinity: false)", + cluster: &clusterv1.Cluster{ObjectMeta: metav1.ObjectMeta{Namespace: "some", Name: "cluster"}}, + vSphereCluster: &vmwarev1.VSphereCluster{Spec: vmwarev1.VSphereClusterSpec{Placement: &vmwarev1.VSphereClusterPlacement{WorkerAntiAffinity: &vmwarev1.VSphereClusterWorkerAntiAffinity{ + Mode: vmwarev1.VSphereClusterWorkerAntiAffinityModeNone, + }}}}, + wantErr: false, + wantClusterModuleGroups: []string{ControlPlaneVMClusterModuleGroupName, getFallbackWorkerClusterModuleGroupName("cluster")}, + workerAntiAffinity: false, + }, + { + name: "create VirtualMachinesetResourcePolicy for control-plane only on None mode (WorkerAntiAffinity: true)", + cluster: &clusterv1.Cluster{ObjectMeta: metav1.ObjectMeta{Namespace: "some", Name: "cluster"}}, + vSphereCluster: &vmwarev1.VSphereCluster{Spec: vmwarev1.VSphereClusterSpec{Placement: &vmwarev1.VSphereClusterPlacement{WorkerAntiAffinity: &vmwarev1.VSphereClusterWorkerAntiAffinity{ + Mode: vmwarev1.VSphereClusterWorkerAntiAffinityModeNone, + }}}}, + wantErr: false, + wantClusterModuleGroups: []string{ControlPlaneVMClusterModuleGroupName}, + workerAntiAffinity: true, + }, + { + name: "create VirtualMachinesetResourcePolicy for control-plane and workers on Cluster mode (WorkerAntiAffinity: false)", + cluster: &clusterv1.Cluster{ObjectMeta: metav1.ObjectMeta{Namespace: "some", Name: "cluster"}}, + vSphereCluster: &vmwarev1.VSphereCluster{Spec: vmwarev1.VSphereClusterSpec{Placement: &vmwarev1.VSphereClusterPlacement{WorkerAntiAffinity: &vmwarev1.VSphereClusterWorkerAntiAffinity{ + Mode: vmwarev1.VSphereClusterWorkerAntiAffinityModeCluster, + }}}}, + wantErr: false, + wantClusterModuleGroups: []string{ControlPlaneVMClusterModuleGroupName, getFallbackWorkerClusterModuleGroupName("cluster")}, + workerAntiAffinity: false, + }, + { + name: "create VirtualMachinesetResourcePolicy for control-plane and workers on Cluster mode (WorkerAntiAffinity: true)", + cluster: &clusterv1.Cluster{ObjectMeta: metav1.ObjectMeta{Namespace: "some", Name: "cluster"}}, + vSphereCluster: &vmwarev1.VSphereCluster{Spec: vmwarev1.VSphereClusterSpec{Placement: &vmwarev1.VSphereClusterPlacement{WorkerAntiAffinity: &vmwarev1.VSphereClusterWorkerAntiAffinity{ + Mode: vmwarev1.VSphereClusterWorkerAntiAffinityModeCluster, + }}}}, + wantErr: false, + wantClusterModuleGroups: []string{ClusterWorkerVMClusterModuleGroupName, ControlPlaneVMClusterModuleGroupName}, + workerAntiAffinity: true, + }, + { + name: "create VirtualMachinesetResourcePolicy for control-plane only when no MachineDeployments exist on MachineDeployment mode (WorkerAntiAffinity: true)", + cluster: &clusterv1.Cluster{ObjectMeta: metav1.ObjectMeta{Namespace: "some", Name: "cluster"}}, + vSphereCluster: &vmwarev1.VSphereCluster{Spec: vmwarev1.VSphereClusterSpec{Placement: &vmwarev1.VSphereClusterPlacement{WorkerAntiAffinity: &vmwarev1.VSphereClusterWorkerAntiAffinity{ + Mode: vmwarev1.VSphereClusterWorkerAntiAffinityModeMachineDeployment, + }}}}, + wantErr: false, + wantClusterModuleGroups: []string{ControlPlaneVMClusterModuleGroupName}, + workerAntiAffinity: true, + }, + { + name: "create VirtualMachinesetResourcePolicy for control-plane and workers on MachineDeployment mode (WorkerAntiAffinity: true)", + cluster: &clusterv1.Cluster{ObjectMeta: metav1.ObjectMeta{Namespace: "some", Name: "cluster"}}, + vSphereCluster: &vmwarev1.VSphereCluster{Spec: vmwarev1.VSphereClusterSpec{Placement: &vmwarev1.VSphereClusterPlacement{WorkerAntiAffinity: &vmwarev1.VSphereClusterWorkerAntiAffinity{ + Mode: vmwarev1.VSphereClusterWorkerAntiAffinityModeMachineDeployment, + }}}}, + additionalObjs: []client.Object{ + &clusterv1.MachineDeployment{ObjectMeta: metav1.ObjectMeta{Namespace: "some", Name: "md-1", Labels: map[string]string{clusterv1.ClusterNameLabel: "cluster"}}}, + &clusterv1.MachineDeployment{ObjectMeta: metav1.ObjectMeta{Namespace: "some", Name: "md-0", Labels: map[string]string{clusterv1.ClusterNameLabel: "cluster"}}}, + &clusterv1.MachineDeployment{ObjectMeta: metav1.ObjectMeta{Namespace: "some", Name: "other-cluster-md-0", Labels: map[string]string{clusterv1.ClusterNameLabel: "other"}}}, + }, + wantErr: false, + wantClusterModuleGroups: []string{ControlPlaneVMClusterModuleGroupName, "md-0", "md-1"}, + workerAntiAffinity: true, + }, + { + name: "update VirtualMachinesetResourcePolicy for control-plane only on None mode and preserve used cluster modules from VirtualMachine's (WorkerAntiAffinity: true)", + cluster: &clusterv1.Cluster{ObjectMeta: metav1.ObjectMeta{Namespace: "some", Name: "cluster"}}, + vSphereCluster: &vmwarev1.VSphereCluster{Spec: vmwarev1.VSphereClusterSpec{Placement: &vmwarev1.VSphereClusterPlacement{WorkerAntiAffinity: &vmwarev1.VSphereClusterWorkerAntiAffinity{ + Mode: vmwarev1.VSphereClusterWorkerAntiAffinityModeNone, + }}}}, + additionalObjs: []client.Object{ + &vmoprv1.VirtualMachineSetResourcePolicy{ + ObjectMeta: metav1.ObjectMeta{Namespace: "some", Name: "cluster"}, + }, + &vmoprv1.VirtualMachine{ObjectMeta: metav1.ObjectMeta{Namespace: "some", Name: "machine-0", Labels: map[string]string{clusterv1.ClusterNameLabel: "cluster"}, Annotations: map[string]string{ClusterModuleNameAnnotationKey: "deleted-md-0"}}}, + }, + wantErr: false, + wantClusterModuleGroups: []string{ControlPlaneVMClusterModuleGroupName, "deleted-md-0"}, + workerAntiAffinity: true, + }, } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.WorkerAntiAffinity, tt.workerAntiAffinity) + + s := &RPService{ + Client: fake.NewClientBuilder().WithScheme(scheme).WithStatusSubresource( + &vmoprv1.VirtualMachineService{}, + &vmoprv1.VirtualMachine{}, + ).WithObjects(tt.additionalObjs...).Build(), + } + got, err := s.ReconcileResourcePolicy(ctx, tt.cluster, tt.vSphereCluster) + if tt.wantErr { + g.Expect(err).To(HaveOccurred()) + g.Expect(got).To(BeEquivalentTo("")) + } else { + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(got).To(BeEquivalentTo(tt.cluster.Name)) + } - t.Run("Creates Resource Policy using the cluster name", func(t *testing.T) { - g := NewWithT(t) - name, err := rpService.ReconcileResourcePolicy(ctx, clusterCtx) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(name).To(Equal(clusterName)) - - resourcePolicy, err := rpService.getVirtualMachineSetResourcePolicy(ctx, clusterCtx) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(resourcePolicy.Spec.ResourcePool.Name).To(Equal(clusterName)) - g.Expect(resourcePolicy.Spec.Folder).To(Equal(clusterName)) - }) + var resourcePolicy vmoprv1.VirtualMachineSetResourcePolicy + + g.Expect(s.Client.Get(ctx, client.ObjectKey{Name: got, Namespace: tt.cluster.Namespace}, &resourcePolicy)).To(Succeed()) + g.Expect(resourcePolicy.Spec.ClusterModuleGroups).To(BeEquivalentTo(tt.wantClusterModuleGroups)) + }) + } } diff --git a/pkg/services/vmoperator/vmopmachine.go b/pkg/services/vmoperator/vmopmachine.go index be336f598e..6e029ad5ea 100644 --- a/pkg/services/vmoperator/vmopmachine.go +++ b/pkg/services/vmoperator/vmopmachine.go @@ -38,6 +38,7 @@ import ( infrav1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/v1beta1" vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1" + "sigs.k8s.io/cluster-api-provider-vsphere/feature" capvcontext "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context" "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context/vmware" infrautilv1 "sigs.k8s.io/cluster-api-provider-vsphere/pkg/util" @@ -432,7 +433,9 @@ func (v *VmopMachineService) reconcileVMOperatorVM(ctx context.Context, supervis // Assign the VM's labels. vmOperatorVM.Labels = getVMLabels(supervisorMachineCtx, vmOperatorVM.Labels) - addResourcePolicyAnnotations(supervisorMachineCtx, vmOperatorVM) + if err := addResourcePolicyAnnotations(ctx, v.Client, supervisorMachineCtx, vmOperatorVM); err != nil { + return err + } if err := v.addVolumes(ctx, supervisorMachineCtx, vmOperatorVM); err != nil { return err @@ -537,7 +540,7 @@ func (v *VmopMachineService) getVirtualMachinesInCluster(ctx context.Context, su // Helper function to add annotations to indicate which tag vm-operator should add as well as which clusterModule VM // should be associated. -func addResourcePolicyAnnotations(supervisorMachineCtx *vmware.SupervisorMachineContext, vm *vmoprv1.VirtualMachine) { +func addResourcePolicyAnnotations(ctx context.Context, ctrlClient client.Client, supervisorMachineCtx *vmware.SupervisorMachineContext, vm *vmoprv1.VirtualMachine) error { annotations := vm.ObjectMeta.GetAnnotations() if annotations == nil { annotations = make(map[string]string) @@ -548,10 +551,49 @@ func addResourcePolicyAnnotations(supervisorMachineCtx *vmware.SupervisorMachine annotations[ClusterModuleNameAnnotationKey] = ControlPlaneVMClusterModuleGroupName } else { annotations[ProviderTagsAnnotationKey] = WorkerVMVMAntiAffinityTagValue - annotations[ClusterModuleNameAnnotationKey] = getMachineDeploymentNameForCluster(supervisorMachineCtx.Cluster) + + // Only set the ClusterModuleGroup annotation if not already set + if _, ok := annotations[ClusterModuleNameAnnotationKey]; !ok { + var clusterModuleGroupName string + // Set ClusterModuleGroupName depending on the configured mode from VSphereCluster.Spec.Placement.WorkerAntiAffinity.Mode + // and the WorkerAntiAffinity feature-gate + switch mode := getWorkerAntiAffinityMode(supervisorMachineCtx.VSphereCluster); mode { + case vmwarev1.VSphereClusterWorkerAntiAffinityModeNone: + // Nothing to set. + case vmwarev1.VSphereClusterWorkerAntiAffinityModeCluster: + if feature.Gates.Enabled(feature.WorkerAntiAffinity) { + // Set the cluster-wide group name. + clusterModuleGroupName = ClusterWorkerVMClusterModuleGroupName + } else { + // Fallback to old name. + clusterModuleGroupName = getFallbackWorkerClusterModuleGroupName(supervisorMachineCtx.Cluster.Name) + } + case vmwarev1.VSphereClusterWorkerAntiAffinityModeMachineDeployment: + if !feature.Gates.Enabled(feature.WorkerAntiAffinity) { + // This should not be possible because MachineDeployment mode is only allowed with the feature enabled. + return errors.New("failed to set ClusterModuleGroup in mode MachineDeployment with WorkerAntiAffinity disabled") + } + // Set the MachineDeployment name as ClusterModuleGroup. + var ok bool + clusterModuleGroupName, ok = supervisorMachineCtx.Machine.Labels[clusterv1.MachineDeploymentNameLabel] + if !ok { + return errors.Errorf("failed to set ClusterModuleGroup because of missing label %s on Machine", clusterv1.MachineDeploymentNameLabel) + } + default: + return errors.Errorf("unknown mode %q configured for WorkerAntiAffinity", mode) + } + + if clusterModuleGroupName != "" { + if err := checkClusterModuleGroup(ctx, ctrlClient, supervisorMachineCtx.Cluster, clusterModuleGroupName); err != nil { + return err + } + annotations[ClusterModuleNameAnnotationKey] = clusterModuleGroupName + } + } } vm.ObjectMeta.SetAnnotations(annotations) + return nil } func volumeName(machine *vmwarev1.VSphereMachine, volume vmwarev1.VSphereMachineVolume) string { @@ -699,8 +741,20 @@ func getTopologyLabels(supervisorMachineCtx *vmware.SupervisorMachineContext) ma return nil } -// getMachineDeploymentName returns the MachineDeployment name for a Cluster. -// This is also the name used by VSphereMachineTemplate and KubeadmConfigTemplate. -func getMachineDeploymentNameForCluster(cluster *clusterv1.Cluster) string { - return fmt.Sprintf("%s-workers-0", cluster.Name) +func getMachineDeploymentNamesForCluster(ctx context.Context, ctrlClient client.Client, cluster *clusterv1.Cluster) ([]string, error) { + mdNames := []string{} + labels := map[string]string{clusterv1.ClusterNameLabel: cluster.GetName()} + mdList := &clusterv1.MachineDeploymentList{} + if err := ctrlClient.List( + ctx, mdList, + client.InNamespace(cluster.GetNamespace()), + client.MatchingLabels(labels)); err != nil { + return nil, errors.Wrapf(err, "failed to list MachineDeployment objects") + } + for _, md := range mdList.Items { + if md.DeletionTimestamp.IsZero() { + mdNames = append(mdNames, md.Name) + } + } + return mdNames, nil } diff --git a/pkg/util/cluster.go b/pkg/util/cluster.go index e0fcce7efd..1d6661081e 100644 --- a/pkg/util/cluster.go +++ b/pkg/util/cluster.go @@ -59,6 +59,36 @@ func GetVSphereClusterFromVMwareMachine(ctx context.Context, c client.Client, ma return vsphereCluster, err } +// GetVMwareVSphereClusterFromMachineDeployment gets the vmware.infrastructure.cluster.x-k8s.io.VSphereCluster resource for the given MachineDeployment$. +func GetVMwareVSphereClusterFromMachineDeployment(ctx context.Context, c client.Client, machineDeployment *clusterv1.MachineDeployment) (*vmwarev1.VSphereCluster, error) { + clusterName := machineDeployment.Labels[clusterv1.ClusterNameLabel] + if clusterName == "" { + return nil, errors.Errorf("error getting VSphereCluster name from MachineDeployment %s/%s", + machineDeployment.Namespace, machineDeployment.Name) + } + namespacedName := apitypes.NamespacedName{ + Namespace: machineDeployment.Namespace, + Name: clusterName, + } + cluster := &clusterv1.Cluster{} + if err := c.Get(ctx, namespacedName, cluster); err != nil { + return nil, err + } + + if cluster.Spec.InfrastructureRef == nil { + return nil, errors.Errorf("error getting VSphereCluster name from MachineDeployment %s/%s: Cluster.spec.infrastructureRef not yet set", + machineDeployment.Namespace, machineDeployment.Name) + } + + vsphereClusterKey := apitypes.NamespacedName{ + Namespace: machineDeployment.Namespace, + Name: cluster.Spec.InfrastructureRef.Name, + } + vsphereCluster := &vmwarev1.VSphereCluster{} + err := c.Get(ctx, vsphereClusterKey, vsphereCluster) + return vsphereCluster, err +} + // GetVSphereClusterFromVSphereMachine gets the infrastructure.cluster.x-k8s.io.VSphereCluster resource for the given VSphereMachine. func GetVSphereClusterFromVSphereMachine(ctx context.Context, c client.Client, machine *infrav1.VSphereMachine) (*infrav1.VSphereCluster, error) { clusterName := machine.Labels[clusterv1.ClusterNameLabel]