From 2776d2fd935f4b56019eef5302d9f3560f827eea Mon Sep 17 00:00:00 2001
From: Stefan Bueringer <buringerst@vmware.com>
Date: Tue, 27 Aug 2024 15:23:37 +0200
Subject: [PATCH] Improve Node drain e2e test coverage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Stefan Büringer buringerst@vmware.com
---
 Makefile                                      |   1 -
 .../machinehealthcheck_targets.go             |   4 +-
 test/e2e/config/docker.yaml                   |   2 -
 .../cluster-with-kcp.yaml                     |   9 -
 .../kustomization.yaml                        |   8 -
 .../main/cluster-template-node-drain/md.yaml  |   8 -
 test/e2e/node_drain_timeout.go                | 196 ++++++++++++++----
 test/e2e/node_drain_timeout_test.go           |   1 +
 test/framework/deployment_helpers.go          |  76 ++++---
 9 files changed, 202 insertions(+), 103 deletions(-)
 delete mode 100644 test/e2e/data/infrastructure-docker/main/cluster-template-node-drain/cluster-with-kcp.yaml
 delete mode 100644 test/e2e/data/infrastructure-docker/main/cluster-template-node-drain/kustomization.yaml
 delete mode 100644 test/e2e/data/infrastructure-docker/main/cluster-template-node-drain/md.yaml

diff --git a/Makefile b/Makefile
index 945462337d44..b4698bf7ae91 100644
--- a/Makefile
+++ b/Makefile
@@ -600,7 +600,6 @@ generate-e2e-templates-main: $(KUSTOMIZE)
 	echo "---" >> $(DOCKER_TEMPLATES)/main/cluster-template-kcp-adoption.yaml
 	$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-kcp-adoption/step2 --load-restrictor LoadRestrictionsNone >> $(DOCKER_TEMPLATES)/main/cluster-template-kcp-adoption.yaml
 	$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-machine-pool --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-machine-pool.yaml
-	$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-node-drain --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-node-drain.yaml
 	$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-upgrades --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-upgrades.yaml
 	$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-upgrades-runtimesdk --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-upgrades-runtimesdk.yaml
 	$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-kcp-scale-in --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-kcp-scale-in.yaml
diff --git a/internal/controllers/machinehealthcheck/machinehealthcheck_targets.go b/internal/controllers/machinehealthcheck/machinehealthcheck_targets.go
index abe9bf6e3b12..5544291360dd 100644
--- a/internal/controllers/machinehealthcheck/machinehealthcheck_targets.go
+++ b/internal/controllers/machinehealthcheck/machinehealthcheck_targets.go
@@ -317,8 +317,8 @@ func (r *Reconciler) healthCheckTargets(targets []healthCheckTarget, logger logr
 				t.Machine,
 				corev1.EventTypeNormal,
 				EventDetectedUnhealthy,
-				"Machine %v has unhealthy node %v",
-				t.string(),
+				"Machine %s has unhealthy Node %s",
+				klog.KObj(t.Machine),
 				t.nodeName(),
 			)
 			nextCheckTimes = append(nextCheckTimes, nextCheck)
diff --git a/test/e2e/config/docker.yaml b/test/e2e/config/docker.yaml
index 242055c0d64a..fce2cb55dd18 100644
--- a/test/e2e/config/docker.yaml
+++ b/test/e2e/config/docker.yaml
@@ -347,7 +347,6 @@ providers:
     - sourcePath: "../data/infrastructure-docker/main/cluster-template-kcp-remediation.yaml"
     - sourcePath: "../data/infrastructure-docker/main/cluster-template-kcp-adoption.yaml"
     - sourcePath: "../data/infrastructure-docker/main/cluster-template-machine-pool.yaml"
-    - sourcePath: "../data/infrastructure-docker/main/cluster-template-node-drain.yaml"
     - sourcePath: "../data/infrastructure-docker/main/cluster-template-upgrades.yaml"
     - sourcePath: "../data/infrastructure-docker/main/cluster-template-upgrades-runtimesdk.yaml"
     - sourcePath: "../data/infrastructure-docker/main/cluster-template-kcp-scale-in.yaml"
@@ -408,7 +407,6 @@ variables:
   CNI: "./data/cni/kindnet/kindnet.yaml"
   KUBETEST_CONFIGURATION: "./data/kubetest/conformance.yaml"
   AUTOSCALER_WORKLOAD: "./data/autoscaler/autoscaler-to-workload-workload.yaml"
-  NODE_DRAIN_TIMEOUT: "60s"
   # Enabling the feature flags by setting the env variables.
   # Note: EXP_CLUSTER_RESOURCE_SET & EXP_MACHINE_POOL are enabled per default with CAPI v1.7.0.
   # We still have to enable them here for clusterctl upgrade tests that use older versions.
diff --git a/test/e2e/data/infrastructure-docker/main/cluster-template-node-drain/cluster-with-kcp.yaml b/test/e2e/data/infrastructure-docker/main/cluster-template-node-drain/cluster-with-kcp.yaml
deleted file mode 100644
index 91a5a7f4728f..000000000000
--- a/test/e2e/data/infrastructure-docker/main/cluster-template-node-drain/cluster-with-kcp.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-# KubeadmControlPlane referenced by the Cluster object with
-# - the label kcp-adoption.step2, because it should be created in the second step of the kcp-adoption test.
-kind: KubeadmControlPlane
-apiVersion: controlplane.cluster.x-k8s.io/v1beta1
-metadata:
-  name: "${CLUSTER_NAME}-control-plane"
-spec:
-  machineTemplate:
-    nodeDrainTimeout: ${NODE_DRAIN_TIMEOUT}
diff --git a/test/e2e/data/infrastructure-docker/main/cluster-template-node-drain/kustomization.yaml b/test/e2e/data/infrastructure-docker/main/cluster-template-node-drain/kustomization.yaml
deleted file mode 100644
index a2f9bea098b4..000000000000
--- a/test/e2e/data/infrastructure-docker/main/cluster-template-node-drain/kustomization.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-resources:
-- ../bases/crs.yaml
-- ../bases/md.yaml
-- ../bases/cluster-with-kcp.yaml
-
-patches:
-- path: md.yaml
-- path: cluster-with-kcp.yaml
diff --git a/test/e2e/data/infrastructure-docker/main/cluster-template-node-drain/md.yaml b/test/e2e/data/infrastructure-docker/main/cluster-template-node-drain/md.yaml
deleted file mode 100644
index bc4577762f31..000000000000
--- a/test/e2e/data/infrastructure-docker/main/cluster-template-node-drain/md.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-apiVersion: cluster.x-k8s.io/v1beta1
-kind: MachineDeployment
-metadata:
-  name: "${CLUSTER_NAME}-md-0"
-spec:
-  template:
-    spec:
-      nodeDrainTimeout: "${NODE_DRAIN_TIMEOUT}"
diff --git a/test/e2e/node_drain_timeout.go b/test/e2e/node_drain_timeout.go
index 51337e860431..21b42dd80180 100644
--- a/test/e2e/node_drain_timeout.go
+++ b/test/e2e/node_drain_timeout.go
@@ -30,10 +30,10 @@ import (
 	"k8s.io/utils/ptr"
 
 	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
-	controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
 	"sigs.k8s.io/cluster-api/test/framework"
 	"sigs.k8s.io/cluster-api/test/framework/clusterctl"
 	"sigs.k8s.io/cluster-api/util"
+	"sigs.k8s.io/cluster-api/util/conditions"
 )
 
 // NodeDrainTimeoutSpecInput is the input for NodeDrainTimeoutSpec.
@@ -52,10 +52,8 @@ type NodeDrainTimeoutSpecInput struct {
 	// able to identify the default.
 	InfrastructureProvider *string
 
-	// Flavor, if specified, must refer to a template that contains
-	// a KubeadmControlPlane resource with spec.machineTemplate.nodeDrainTimeout
-	// configured and a MachineDeployment resource that has
-	// spec.template.spec.nodeDrainTimeout configured.
+	// Flavor, if specified, must refer to a template that uses a Cluster with ClusterClass.
+	// The cluster must use a KubeadmControlPlane and a MachineDeployment.
 	// If not specified, "node-drain" is used.
 	Flavor *string
 
@@ -66,13 +64,11 @@ type NodeDrainTimeoutSpecInput struct {
 
 func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeoutSpecInput) {
 	var (
-		specName           = "node-drain"
-		input              NodeDrainTimeoutSpecInput
-		namespace          *corev1.Namespace
-		cancelWatches      context.CancelFunc
-		clusterResources   *clusterctl.ApplyClusterTemplateAndWaitResult
-		machineDeployments []*clusterv1.MachineDeployment
-		controlplane       *controlplanev1.KubeadmControlPlane
+		specName         = "node-drain"
+		input            NodeDrainTimeoutSpecInput
+		namespace        *corev1.Namespace
+		cancelWatches    context.CancelFunc
+		clusterResources *clusterctl.ApplyClusterTemplateAndWaitResult
 	)
 
 	BeforeEach(func() {
@@ -97,6 +93,7 @@ func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeo
 		if input.InfrastructureProvider != nil {
 			infrastructureProvider = *input.InfrastructureProvider
 		}
+
 		controlPlaneReplicas := 3
 		clusterctl.ApplyClusterTemplateAndWait(ctx, clusterctl.ApplyClusterTemplateAndWaitInput{
 			ClusterProxy: input.BootstrapClusterProxy,
@@ -118,52 +115,167 @@ func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeo
 			WaitForMachineDeployments:    input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"),
 		}, clusterResources)
 		cluster := clusterResources.Cluster
-		controlplane = clusterResources.ControlPlane
-		machineDeployments = clusterResources.MachineDeployments
+		controlplane := clusterResources.ControlPlane
+		machineDeployments := clusterResources.MachineDeployments
 		Expect(machineDeployments[0].Spec.Replicas).To(Equal(ptr.To[int32](1)))
 
-		By("Add a deployment with unevictable pods and podDisruptionBudget to the workload cluster. The deployed pods cannot be evicted in the node draining process.")
+		// This label will be added to all Machines so we can later create the unevictable Pods on the right Nodes.
+		nodeOwnerLabelKey := "owner.node.cluster.x-k8s.io"
+
+		By("Ensure Node label is set & NodeDrainTimeout is set to 0 (wait forever) on ControlPlane and MachineDeployment topologies")
+		modifyControlPlaneViaClusterAndWait(ctx, modifyControlPlaneViaClusterAndWaitInput{
+			ClusterProxy: input.BootstrapClusterProxy,
+			Cluster:      cluster,
+			ModifyControlPlaneTopology: func(topology *clusterv1.ControlPlaneTopology) {
+				topology.NodeDrainTimeout = &metav1.Duration{Duration: time.Duration(0)}
+				if topology.Metadata.Labels == nil {
+					topology.Metadata.Labels = map[string]string{}
+				}
+				topology.Metadata.Labels[nodeOwnerLabelKey] = "KubeadmControlPlane-" + controlplane.Name
+			},
+			WaitForControlPlane: input.E2EConfig.GetIntervals(specName, "wait-control-plane"),
+		})
+		modifyMachineDeploymentViaClusterAndWait(ctx, modifyMachineDeploymentViaClusterAndWaitInput{
+			ClusterProxy: input.BootstrapClusterProxy,
+			Cluster:      cluster,
+			ModifyMachineDeploymentTopology: func(topology *clusterv1.MachineDeploymentTopology) {
+				topology.NodeDrainTimeout = &metav1.Duration{Duration: time.Duration(0)}
+				if topology.Metadata.Labels == nil {
+					topology.Metadata.Labels = map[string]string{}
+				}
+				for _, md := range machineDeployments {
+					if md.Labels[clusterv1.ClusterTopologyMachineDeploymentNameLabel] == topology.Name {
+						topology.Metadata.Labels[nodeOwnerLabelKey] = "MachineDeployment-" + md.Name
+					}
+				}
+			},
+			WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"),
+		})
+
 		workloadClusterProxy := input.BootstrapClusterProxy.GetWorkloadCluster(ctx, cluster.Namespace, cluster.Name)
+		By("Deploy Deployment with unevictable Pods on control plane Nodes.")
+
+		cpDeploymentAndPDBName := fmt.Sprintf("%s-%s", "unevictable-pod-cp", util.RandomString(3))
 		framework.DeployUnevictablePod(ctx, framework.DeployUnevictablePodInput{
 			WorkloadClusterProxy:               workloadClusterProxy,
-			DeploymentName:                     fmt.Sprintf("%s-%s", "unevictable-pod", util.RandomString(3)),
+			ControlPlane:                       controlplane,
+			DeploymentName:                     cpDeploymentAndPDBName,
 			Namespace:                          namespace.Name + "-unevictable-workload",
+			NodeSelector:                       map[string]string{nodeOwnerLabelKey: "KubeadmControlPlane-" + controlplane.Name},
 			WaitForDeploymentAvailableInterval: input.E2EConfig.GetIntervals(specName, "wait-deployment-available"),
 		})
-
-		By("Scale the machinedeployment down to zero. If we didn't have the NodeDrainTimeout duration, the node drain process would block this operator.")
-		// Because all the machines of a machinedeployment can be deleted at the same time, so we only prepare the interval for 1 replica.
-		nodeDrainTimeoutMachineDeploymentInterval := getDrainAndDeleteInterval(input.E2EConfig.GetIntervals(specName, "wait-machine-deleted"), machineDeployments[0].Spec.Template.Spec.NodeDrainTimeout, 1)
+		By("Deploy Deployment with unevictable Pods on MachineDeployment Nodes.")
+		mdDeploymentAndPDBNames := map[string]string{}
 		for _, md := range machineDeployments {
-			framework.ScaleAndWaitMachineDeployment(ctx, framework.ScaleAndWaitMachineDeploymentInput{
-				ClusterProxy:              input.BootstrapClusterProxy,
-				Cluster:                   cluster,
-				MachineDeployment:         md,
-				WaitForMachineDeployments: nodeDrainTimeoutMachineDeploymentInterval,
-				Replicas:                  0,
+			mdDeploymentAndPDBNames[md.Name] = fmt.Sprintf("%s-%s", "unevictable-pod-md", util.RandomString(3))
+			framework.DeployUnevictablePod(ctx, framework.DeployUnevictablePodInput{
+				WorkloadClusterProxy:               workloadClusterProxy,
+				MachineDeployment:                  md,
+				DeploymentName:                     mdDeploymentAndPDBNames[md.Name],
+				Namespace:                          namespace.Name + "-unevictable-workload",
+				NodeSelector:                       map[string]string{nodeOwnerLabelKey: "MachineDeployment-" + md.Name},
+				WaitForDeploymentAvailableInterval: input.E2EConfig.GetIntervals(specName, "wait-deployment-available"),
 			})
 		}
 
-		By("Deploy deployment with unevictable pods on control plane nodes.")
-		framework.DeployUnevictablePod(ctx, framework.DeployUnevictablePodInput{
-			WorkloadClusterProxy:               workloadClusterProxy,
-			ControlPlane:                       controlplane,
-			DeploymentName:                     fmt.Sprintf("%s-%s", "unevictable-pod", util.RandomString(3)),
-			Namespace:                          namespace.Name + "-unevictable-workload",
-			WaitForDeploymentAvailableInterval: input.E2EConfig.GetIntervals(specName, "wait-deployment-available"),
+		By("Scale down the control plane to 1 and MachineDeployments to 0.")
+		modifyControlPlaneViaClusterAndWait(ctx, modifyControlPlaneViaClusterAndWaitInput{
+			ClusterProxy: input.BootstrapClusterProxy,
+			Cluster:      cluster,
+			ModifyControlPlaneTopology: func(topology *clusterv1.ControlPlaneTopology) {
+				topology.Replicas = ptr.To[int32](1)
+			},
+			WaitForControlPlane: input.E2EConfig.GetIntervals(specName, "wait-control-plane"),
+		})
+		modifyMachineDeploymentViaClusterAndWait(ctx, modifyMachineDeploymentViaClusterAndWaitInput{
+			ClusterProxy: input.BootstrapClusterProxy,
+			Cluster:      cluster,
+			ModifyMachineDeploymentTopology: func(topology *clusterv1.MachineDeploymentTopology) {
+				topology.Replicas = ptr.To[int32](0)
+			},
+			WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"),
 		})
 
-		By("Scale down the controlplane of the workload cluster and make sure that nodes running workload can be deleted even the draining process is blocked.")
-		// When we scale down the KCP, controlplane machines are by default deleted one by one, so it requires more time.
-		nodeDrainTimeoutKCPInterval := getDrainAndDeleteInterval(input.E2EConfig.GetIntervals(specName, "wait-machine-deleted"), controlplane.Spec.MachineTemplate.NodeDrainTimeout, controlPlaneReplicas)
-		framework.ScaleAndWaitControlPlane(ctx, framework.ScaleAndWaitControlPlaneInput{
-			ClusterProxy:        input.BootstrapClusterProxy,
-			Cluster:             cluster,
-			ControlPlane:        controlplane,
-			Replicas:            1,
-			WaitForControlPlane: nodeDrainTimeoutKCPInterval,
+		By("Verify Node drains for control plane and MachineDeployment Machines are blocked")
+		Eventually(func(g Gomega) {
+			controlPlaneMachines := framework.GetControlPlaneMachinesByCluster(ctx, framework.GetControlPlaneMachinesByClusterInput{
+				Lister:      input.BootstrapClusterProxy.GetClient(),
+				ClusterName: cluster.Name,
+				Namespace:   cluster.Namespace,
+			})
+			var condition *clusterv1.Condition
+			for _, machine := range controlPlaneMachines {
+				condition = conditions.Get(&machine, clusterv1.DrainingSucceededCondition)
+				if condition != nil {
+					// We only expect to find the condition on one Machine (as KCP will only try to drain one Machine at a time)
+					break
+				}
+			}
+			g.Expect(condition).ToNot(BeNil())
+			g.Expect(condition.Status).To(Equal(corev1.ConditionFalse))
+			g.Expect(condition.Message).To(ContainSubstring(fmt.Sprintf("Cannot evict pod as it would violate the pod's disruption budget. The disruption budget %s needs", cpDeploymentAndPDBName)))
+		}, input.E2EConfig.GetIntervals(specName, "wait-machine-deleted")...).Should(Succeed())
+		for _, md := range machineDeployments {
+			Eventually(func(g Gomega) {
+				machines := framework.GetMachinesByMachineDeployments(ctx, framework.GetMachinesByMachineDeploymentsInput{
+					Lister:            input.BootstrapClusterProxy.GetClient(),
+					ClusterName:       cluster.Name,
+					Namespace:         cluster.Namespace,
+					MachineDeployment: *md,
+				})
+				g.Expect(machines).To(HaveLen(1))
+				condition := conditions.Get(&machines[0], clusterv1.DrainingSucceededCondition)
+				g.Expect(condition).ToNot(BeNil())
+				g.Expect(condition.Status).To(Equal(corev1.ConditionFalse))
+				g.Expect(condition.Message).To(ContainSubstring(fmt.Sprintf("Cannot evict pod as it would violate the pod's disruption budget. The disruption budget %s needs", mdDeploymentAndPDBNames[md.Name])))
+			}, input.E2EConfig.GetIntervals(specName, "wait-machine-deleted")...).Should(Succeed())
+		}
+
+		By("Set NodeDrainTimeout to 1s to unblock Node drain")
+		// Note: This also verifies that KCP & MachineDeployments are still propagating changes to NodeDrainTimeout down to
+		// Machines that already have a deletionTimestamp.
+		drainTimeout := &metav1.Duration{Duration: time.Duration(1) * time.Second}
+		modifyControlPlaneViaClusterAndWait(ctx, modifyControlPlaneViaClusterAndWaitInput{
+			ClusterProxy: input.BootstrapClusterProxy,
+			Cluster:      cluster,
+			ModifyControlPlaneTopology: func(topology *clusterv1.ControlPlaneTopology) {
+				topology.NodeDrainTimeout = drainTimeout
+			},
+			WaitForControlPlane: input.E2EConfig.GetIntervals(specName, "wait-control-plane"),
+		})
+		modifyMachineDeploymentViaClusterAndWait(ctx, modifyMachineDeploymentViaClusterAndWaitInput{
+			ClusterProxy: input.BootstrapClusterProxy,
+			Cluster:      cluster,
+			ModifyMachineDeploymentTopology: func(topology *clusterv1.MachineDeploymentTopology) {
+				topology.NodeDrainTimeout = drainTimeout
+			},
+			WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"),
 		})
 
+		By("Verify Node drains were unblocked")
+		// When we scale down the KCP, controlplane machines are deleted one by one, so it requires more time
+		// MD Machine deletion is done in parallel and will be faster.
+		nodeDrainTimeoutKCPInterval := getDrainAndDeleteInterval(input.E2EConfig.GetIntervals(specName, "wait-machine-deleted"), drainTimeout, controlPlaneReplicas)
+		Eventually(func(g Gomega) {
+			// When all drains complete we only have 1 control plane & 0 MD replicas left.
+			controlPlaneMachines := framework.GetControlPlaneMachinesByCluster(ctx, framework.GetControlPlaneMachinesByClusterInput{
+				Lister:      input.BootstrapClusterProxy.GetClient(),
+				ClusterName: cluster.Name,
+				Namespace:   cluster.Namespace,
+			})
+			g.Expect(controlPlaneMachines).To(HaveLen(1))
+
+			for _, md := range machineDeployments {
+				machines := framework.GetMachinesByMachineDeployments(ctx, framework.GetMachinesByMachineDeploymentsInput{
+					Lister:            input.BootstrapClusterProxy.GetClient(),
+					ClusterName:       cluster.Name,
+					Namespace:         cluster.Namespace,
+					MachineDeployment: *md,
+				})
+				g.Expect(machines).To(BeEmpty())
+			}
+		}, nodeDrainTimeoutKCPInterval...).Should(Succeed())
+
 		By("PASSED!")
 	})
 
diff --git a/test/e2e/node_drain_timeout_test.go b/test/e2e/node_drain_timeout_test.go
index 408b9d44e0c5..882c9621ff5c 100644
--- a/test/e2e/node_drain_timeout_test.go
+++ b/test/e2e/node_drain_timeout_test.go
@@ -32,6 +32,7 @@ var _ = Describe("When testing node drain timeout", func() {
 			BootstrapClusterProxy:  bootstrapClusterProxy,
 			ArtifactFolder:         artifactFolder,
 			SkipCleanup:            skipCleanup,
+			Flavor:                 ptr.To("topology"),
 			InfrastructureProvider: ptr.To("docker"),
 		}
 	})
diff --git a/test/framework/deployment_helpers.go b/test/framework/deployment_helpers.go
index a2b0c8c141a8..e139f97b7080 100644
--- a/test/framework/deployment_helpers.go
+++ b/test/framework/deployment_helpers.go
@@ -42,16 +42,14 @@ import (
 	"k8s.io/apimachinery/pkg/labels"
 	kerrors "k8s.io/apimachinery/pkg/util/errors"
 	"k8s.io/apimachinery/pkg/util/intstr"
-	utilversion "k8s.io/apimachinery/pkg/util/version"
 	"k8s.io/apimachinery/pkg/util/wait"
-	"k8s.io/apimachinery/pkg/version"
 	"k8s.io/client-go/kubernetes"
 	"k8s.io/client-go/tools/cache"
 	"k8s.io/klog/v2"
-	"k8s.io/utils/ptr"
 	toolscache "sigs.k8s.io/controller-runtime/pkg/cache"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 
+	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
 	controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
 	. "sigs.k8s.io/cluster-api/test/framework/ginkgoextensions"
 	"sigs.k8s.io/cluster-api/test/framework/internal/log"
@@ -493,16 +491,22 @@ func WaitForDNSUpgrade(ctx context.Context, input WaitForDNSUpgradeInput, interv
 type DeployUnevictablePodInput struct {
 	WorkloadClusterProxy ClusterProxy
 	ControlPlane         *controlplanev1.KubeadmControlPlane
+	MachineDeployment    *clusterv1.MachineDeployment
 	DeploymentName       string
 	Namespace            string
+	NodeSelector         map[string]string
 
 	WaitForDeploymentAvailableInterval []interface{}
 }
 
+// DeployUnevictablePod will deploy a Deployment on a ControlPlane or MachineDeployment.
+// It will deploy one Pod replica to each Machine and then deploy a PDB to ensure none of the Pods can be evicted.
 func DeployUnevictablePod(ctx context.Context, input DeployUnevictablePodInput) {
 	Expect(input.DeploymentName).ToNot(BeNil(), "Need a deployment name in DeployUnevictablePod")
 	Expect(input.Namespace).ToNot(BeNil(), "Need a namespace in DeployUnevictablePod")
 	Expect(input.WorkloadClusterProxy).ToNot(BeNil(), "Need a workloadClusterProxy in DeployUnevictablePod")
+	Expect((input.MachineDeployment == nil && input.ControlPlane != nil) ||
+		(input.MachineDeployment != nil && input.ControlPlane == nil)).To(BeTrue(), "Either MachineDeployment or ControlPlane must be set in DeployUnevictablePod")
 
 	EnsureNamespace(ctx, input.WorkloadClusterProxy.GetClient(), input.Namespace)
 
@@ -512,16 +516,17 @@ func DeployUnevictablePod(ctx context.Context, input DeployUnevictablePodInput)
 			Namespace: input.Namespace,
 		},
 		Spec: appsv1.DeploymentSpec{
-			Replicas: ptr.To[int32](4),
 			Selector: &metav1.LabelSelector{
 				MatchLabels: map[string]string{
-					"app": "nonstop",
+					"app":        "nonstop",
+					"deployment": input.DeploymentName,
 				},
 			},
 			Template: corev1.PodTemplateSpec{
 				ObjectMeta: metav1.ObjectMeta{
 					Labels: map[string]string{
-						"app": "nonstop",
+						"app":        "nonstop",
+						"deployment": input.DeploymentName,
 					},
 				},
 				Spec: corev1.PodSpec{
@@ -531,6 +536,25 @@ func DeployUnevictablePod(ctx context.Context, input DeployUnevictablePodInput)
 							Image: "registry.k8s.io/pause:3.10",
 						},
 					},
+					Affinity: &corev1.Affinity{
+						// Make sure only 1 Pod of this Deployment can run on the same Node.
+						PodAntiAffinity: &corev1.PodAntiAffinity{
+							RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{
+								{
+									LabelSelector: &metav1.LabelSelector{
+										MatchExpressions: []metav1.LabelSelectorRequirement{
+											{
+												Key:      "deployment",
+												Operator: "In",
+												Values:   []string{input.DeploymentName},
+											},
+										},
+									},
+									TopologyKey: "kubernetes.io/hostname",
+								},
+							},
+						},
+					},
 				},
 			},
 		},
@@ -538,31 +562,24 @@ func DeployUnevictablePod(ctx context.Context, input DeployUnevictablePodInput)
 	workloadClient := input.WorkloadClusterProxy.GetClientSet()
 
 	if input.ControlPlane != nil {
-		var serverVersion *version.Info
-		Eventually(func() error {
-			var err error
-			serverVersion, err = workloadClient.ServerVersion()
-			return err
-		}, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), "failed to get server version")
-
-		// Use the control-plane label for Kubernetes version >= v1.20.0.
-		if utilversion.MustParseGeneric(serverVersion.String()).AtLeast(utilversion.MustParseGeneric("v1.20.0")) {
-			workloadDeployment.Spec.Template.Spec.NodeSelector = map[string]string{nodeRoleControlPlane: ""}
-		} else {
-			workloadDeployment.Spec.Template.Spec.NodeSelector = map[string]string{nodeRoleOldControlPlane: ""}
-		}
-
+		workloadDeployment.Spec.Template.Spec.NodeSelector = map[string]string{nodeRoleControlPlane: ""}
 		workloadDeployment.Spec.Template.Spec.Tolerations = []corev1.Toleration{
-			{
-				Key:    nodeRoleOldControlPlane,
-				Effect: "NoSchedule",
-			},
 			{
 				Key:    nodeRoleControlPlane,
 				Effect: "NoSchedule",
 			},
 		}
+		workloadDeployment.Spec.Replicas = input.ControlPlane.Spec.Replicas
+	}
+	if input.MachineDeployment != nil {
+		workloadDeployment.Spec.Replicas = input.MachineDeployment.Spec.Replicas
+	}
+
+	// Note: If set, the NodeSelector field overwrites the NodeSelector we set above for control plane nodes.
+	if input.NodeSelector != nil {
+		workloadDeployment.Spec.Template.Spec.NodeSelector = input.NodeSelector
 	}
+
 	AddDeploymentToWorkloadCluster(ctx, AddDeploymentToWorkloadClusterInput{
 		Namespace:  input.Namespace,
 		ClientSet:  workloadClient,
@@ -570,10 +587,6 @@ func DeployUnevictablePod(ctx context.Context, input DeployUnevictablePodInput)
 	})
 
 	budget := &policyv1.PodDisruptionBudget{
-		TypeMeta: metav1.TypeMeta{
-			Kind:       "PodDisruptionBudget",
-			APIVersion: "policy/v1",
-		},
 		ObjectMeta: metav1.ObjectMeta{
 			Name:      input.DeploymentName,
 			Namespace: input.Namespace,
@@ -581,13 +594,14 @@ func DeployUnevictablePod(ctx context.Context, input DeployUnevictablePodInput)
 		Spec: policyv1.PodDisruptionBudgetSpec{
 			Selector: &metav1.LabelSelector{
 				MatchLabels: map[string]string{
-					"app": "nonstop",
+					"app":        "nonstop",
+					"deployment": input.DeploymentName,
 				},
 			},
+			// Setting MaxUnavailable to 0 means no Pods can be evicted / unavailable.
 			MaxUnavailable: &intstr.IntOrString{
 				Type:   intstr.Int,
-				IntVal: 1,
-				StrVal: "1",
+				IntVal: 0,
 			},
 		},
 	}