From 2776d2fd935f4b56019eef5302d9f3560f827eea Mon Sep 17 00:00:00 2001 From: Stefan Bueringer Date: Tue, 27 Aug 2024 15:23:37 +0200 Subject: [PATCH] Improve Node drain e2e test coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stefan Büringer buringerst@vmware.com --- Makefile | 1 - .../machinehealthcheck_targets.go | 4 +- test/e2e/config/docker.yaml | 2 - .../cluster-with-kcp.yaml | 9 - .../kustomization.yaml | 8 - .../main/cluster-template-node-drain/md.yaml | 8 - test/e2e/node_drain_timeout.go | 196 ++++++++++++++---- test/e2e/node_drain_timeout_test.go | 1 + test/framework/deployment_helpers.go | 76 ++++--- 9 files changed, 202 insertions(+), 103 deletions(-) delete mode 100644 test/e2e/data/infrastructure-docker/main/cluster-template-node-drain/cluster-with-kcp.yaml delete mode 100644 test/e2e/data/infrastructure-docker/main/cluster-template-node-drain/kustomization.yaml delete mode 100644 test/e2e/data/infrastructure-docker/main/cluster-template-node-drain/md.yaml diff --git a/Makefile b/Makefile index 945462337d44..b4698bf7ae91 100644 --- a/Makefile +++ b/Makefile @@ -600,7 +600,6 @@ generate-e2e-templates-main: $(KUSTOMIZE) echo "---" >> $(DOCKER_TEMPLATES)/main/cluster-template-kcp-adoption.yaml $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-kcp-adoption/step2 --load-restrictor LoadRestrictionsNone >> $(DOCKER_TEMPLATES)/main/cluster-template-kcp-adoption.yaml $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-machine-pool --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-machine-pool.yaml - $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-node-drain --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-node-drain.yaml $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-upgrades --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-upgrades.yaml $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-upgrades-runtimesdk --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-upgrades-runtimesdk.yaml $(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-kcp-scale-in --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-kcp-scale-in.yaml diff --git a/internal/controllers/machinehealthcheck/machinehealthcheck_targets.go b/internal/controllers/machinehealthcheck/machinehealthcheck_targets.go index abe9bf6e3b12..5544291360dd 100644 --- a/internal/controllers/machinehealthcheck/machinehealthcheck_targets.go +++ b/internal/controllers/machinehealthcheck/machinehealthcheck_targets.go @@ -317,8 +317,8 @@ func (r *Reconciler) healthCheckTargets(targets []healthCheckTarget, logger logr t.Machine, corev1.EventTypeNormal, EventDetectedUnhealthy, - "Machine %v has unhealthy node %v", - t.string(), + "Machine %s has unhealthy Node %s", + klog.KObj(t.Machine), t.nodeName(), ) nextCheckTimes = append(nextCheckTimes, nextCheck) diff --git a/test/e2e/config/docker.yaml b/test/e2e/config/docker.yaml index 242055c0d64a..fce2cb55dd18 100644 --- a/test/e2e/config/docker.yaml +++ b/test/e2e/config/docker.yaml @@ -347,7 +347,6 @@ providers: - sourcePath: "../data/infrastructure-docker/main/cluster-template-kcp-remediation.yaml" - sourcePath: "../data/infrastructure-docker/main/cluster-template-kcp-adoption.yaml" - sourcePath: "../data/infrastructure-docker/main/cluster-template-machine-pool.yaml" - - sourcePath: "../data/infrastructure-docker/main/cluster-template-node-drain.yaml" - sourcePath: "../data/infrastructure-docker/main/cluster-template-upgrades.yaml" - sourcePath: "../data/infrastructure-docker/main/cluster-template-upgrades-runtimesdk.yaml" - sourcePath: "../data/infrastructure-docker/main/cluster-template-kcp-scale-in.yaml" @@ -408,7 +407,6 @@ variables: CNI: "./data/cni/kindnet/kindnet.yaml" KUBETEST_CONFIGURATION: "./data/kubetest/conformance.yaml" AUTOSCALER_WORKLOAD: "./data/autoscaler/autoscaler-to-workload-workload.yaml" - NODE_DRAIN_TIMEOUT: "60s" # Enabling the feature flags by setting the env variables. # Note: EXP_CLUSTER_RESOURCE_SET & EXP_MACHINE_POOL are enabled per default with CAPI v1.7.0. # We still have to enable them here for clusterctl upgrade tests that use older versions. diff --git a/test/e2e/data/infrastructure-docker/main/cluster-template-node-drain/cluster-with-kcp.yaml b/test/e2e/data/infrastructure-docker/main/cluster-template-node-drain/cluster-with-kcp.yaml deleted file mode 100644 index 91a5a7f4728f..000000000000 --- a/test/e2e/data/infrastructure-docker/main/cluster-template-node-drain/cluster-with-kcp.yaml +++ /dev/null @@ -1,9 +0,0 @@ -# KubeadmControlPlane referenced by the Cluster object with -# - the label kcp-adoption.step2, because it should be created in the second step of the kcp-adoption test. -kind: KubeadmControlPlane -apiVersion: controlplane.cluster.x-k8s.io/v1beta1 -metadata: - name: "${CLUSTER_NAME}-control-plane" -spec: - machineTemplate: - nodeDrainTimeout: ${NODE_DRAIN_TIMEOUT} diff --git a/test/e2e/data/infrastructure-docker/main/cluster-template-node-drain/kustomization.yaml b/test/e2e/data/infrastructure-docker/main/cluster-template-node-drain/kustomization.yaml deleted file mode 100644 index a2f9bea098b4..000000000000 --- a/test/e2e/data/infrastructure-docker/main/cluster-template-node-drain/kustomization.yaml +++ /dev/null @@ -1,8 +0,0 @@ -resources: -- ../bases/crs.yaml -- ../bases/md.yaml -- ../bases/cluster-with-kcp.yaml - -patches: -- path: md.yaml -- path: cluster-with-kcp.yaml diff --git a/test/e2e/data/infrastructure-docker/main/cluster-template-node-drain/md.yaml b/test/e2e/data/infrastructure-docker/main/cluster-template-node-drain/md.yaml deleted file mode 100644 index bc4577762f31..000000000000 --- a/test/e2e/data/infrastructure-docker/main/cluster-template-node-drain/md.yaml +++ /dev/null @@ -1,8 +0,0 @@ -apiVersion: cluster.x-k8s.io/v1beta1 -kind: MachineDeployment -metadata: - name: "${CLUSTER_NAME}-md-0" -spec: - template: - spec: - nodeDrainTimeout: "${NODE_DRAIN_TIMEOUT}" diff --git a/test/e2e/node_drain_timeout.go b/test/e2e/node_drain_timeout.go index 51337e860431..21b42dd80180 100644 --- a/test/e2e/node_drain_timeout.go +++ b/test/e2e/node_drain_timeout.go @@ -30,10 +30,10 @@ import ( "k8s.io/utils/ptr" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" - controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" "sigs.k8s.io/cluster-api/test/framework" "sigs.k8s.io/cluster-api/test/framework/clusterctl" "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/cluster-api/util/conditions" ) // NodeDrainTimeoutSpecInput is the input for NodeDrainTimeoutSpec. @@ -52,10 +52,8 @@ type NodeDrainTimeoutSpecInput struct { // able to identify the default. InfrastructureProvider *string - // Flavor, if specified, must refer to a template that contains - // a KubeadmControlPlane resource with spec.machineTemplate.nodeDrainTimeout - // configured and a MachineDeployment resource that has - // spec.template.spec.nodeDrainTimeout configured. + // Flavor, if specified, must refer to a template that uses a Cluster with ClusterClass. + // The cluster must use a KubeadmControlPlane and a MachineDeployment. // If not specified, "node-drain" is used. Flavor *string @@ -66,13 +64,11 @@ type NodeDrainTimeoutSpecInput struct { func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeoutSpecInput) { var ( - specName = "node-drain" - input NodeDrainTimeoutSpecInput - namespace *corev1.Namespace - cancelWatches context.CancelFunc - clusterResources *clusterctl.ApplyClusterTemplateAndWaitResult - machineDeployments []*clusterv1.MachineDeployment - controlplane *controlplanev1.KubeadmControlPlane + specName = "node-drain" + input NodeDrainTimeoutSpecInput + namespace *corev1.Namespace + cancelWatches context.CancelFunc + clusterResources *clusterctl.ApplyClusterTemplateAndWaitResult ) BeforeEach(func() { @@ -97,6 +93,7 @@ func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeo if input.InfrastructureProvider != nil { infrastructureProvider = *input.InfrastructureProvider } + controlPlaneReplicas := 3 clusterctl.ApplyClusterTemplateAndWait(ctx, clusterctl.ApplyClusterTemplateAndWaitInput{ ClusterProxy: input.BootstrapClusterProxy, @@ -118,52 +115,167 @@ func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeo WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), }, clusterResources) cluster := clusterResources.Cluster - controlplane = clusterResources.ControlPlane - machineDeployments = clusterResources.MachineDeployments + controlplane := clusterResources.ControlPlane + machineDeployments := clusterResources.MachineDeployments Expect(machineDeployments[0].Spec.Replicas).To(Equal(ptr.To[int32](1))) - By("Add a deployment with unevictable pods and podDisruptionBudget to the workload cluster. The deployed pods cannot be evicted in the node draining process.") + // This label will be added to all Machines so we can later create the unevictable Pods on the right Nodes. + nodeOwnerLabelKey := "owner.node.cluster.x-k8s.io" + + By("Ensure Node label is set & NodeDrainTimeout is set to 0 (wait forever) on ControlPlane and MachineDeployment topologies") + modifyControlPlaneViaClusterAndWait(ctx, modifyControlPlaneViaClusterAndWaitInput{ + ClusterProxy: input.BootstrapClusterProxy, + Cluster: cluster, + ModifyControlPlaneTopology: func(topology *clusterv1.ControlPlaneTopology) { + topology.NodeDrainTimeout = &metav1.Duration{Duration: time.Duration(0)} + if topology.Metadata.Labels == nil { + topology.Metadata.Labels = map[string]string{} + } + topology.Metadata.Labels[nodeOwnerLabelKey] = "KubeadmControlPlane-" + controlplane.Name + }, + WaitForControlPlane: input.E2EConfig.GetIntervals(specName, "wait-control-plane"), + }) + modifyMachineDeploymentViaClusterAndWait(ctx, modifyMachineDeploymentViaClusterAndWaitInput{ + ClusterProxy: input.BootstrapClusterProxy, + Cluster: cluster, + ModifyMachineDeploymentTopology: func(topology *clusterv1.MachineDeploymentTopology) { + topology.NodeDrainTimeout = &metav1.Duration{Duration: time.Duration(0)} + if topology.Metadata.Labels == nil { + topology.Metadata.Labels = map[string]string{} + } + for _, md := range machineDeployments { + if md.Labels[clusterv1.ClusterTopologyMachineDeploymentNameLabel] == topology.Name { + topology.Metadata.Labels[nodeOwnerLabelKey] = "MachineDeployment-" + md.Name + } + } + }, + WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), + }) + workloadClusterProxy := input.BootstrapClusterProxy.GetWorkloadCluster(ctx, cluster.Namespace, cluster.Name) + By("Deploy Deployment with unevictable Pods on control plane Nodes.") + + cpDeploymentAndPDBName := fmt.Sprintf("%s-%s", "unevictable-pod-cp", util.RandomString(3)) framework.DeployUnevictablePod(ctx, framework.DeployUnevictablePodInput{ WorkloadClusterProxy: workloadClusterProxy, - DeploymentName: fmt.Sprintf("%s-%s", "unevictable-pod", util.RandomString(3)), + ControlPlane: controlplane, + DeploymentName: cpDeploymentAndPDBName, Namespace: namespace.Name + "-unevictable-workload", + NodeSelector: map[string]string{nodeOwnerLabelKey: "KubeadmControlPlane-" + controlplane.Name}, WaitForDeploymentAvailableInterval: input.E2EConfig.GetIntervals(specName, "wait-deployment-available"), }) - - By("Scale the machinedeployment down to zero. If we didn't have the NodeDrainTimeout duration, the node drain process would block this operator.") - // Because all the machines of a machinedeployment can be deleted at the same time, so we only prepare the interval for 1 replica. - nodeDrainTimeoutMachineDeploymentInterval := getDrainAndDeleteInterval(input.E2EConfig.GetIntervals(specName, "wait-machine-deleted"), machineDeployments[0].Spec.Template.Spec.NodeDrainTimeout, 1) + By("Deploy Deployment with unevictable Pods on MachineDeployment Nodes.") + mdDeploymentAndPDBNames := map[string]string{} for _, md := range machineDeployments { - framework.ScaleAndWaitMachineDeployment(ctx, framework.ScaleAndWaitMachineDeploymentInput{ - ClusterProxy: input.BootstrapClusterProxy, - Cluster: cluster, - MachineDeployment: md, - WaitForMachineDeployments: nodeDrainTimeoutMachineDeploymentInterval, - Replicas: 0, + mdDeploymentAndPDBNames[md.Name] = fmt.Sprintf("%s-%s", "unevictable-pod-md", util.RandomString(3)) + framework.DeployUnevictablePod(ctx, framework.DeployUnevictablePodInput{ + WorkloadClusterProxy: workloadClusterProxy, + MachineDeployment: md, + DeploymentName: mdDeploymentAndPDBNames[md.Name], + Namespace: namespace.Name + "-unevictable-workload", + NodeSelector: map[string]string{nodeOwnerLabelKey: "MachineDeployment-" + md.Name}, + WaitForDeploymentAvailableInterval: input.E2EConfig.GetIntervals(specName, "wait-deployment-available"), }) } - By("Deploy deployment with unevictable pods on control plane nodes.") - framework.DeployUnevictablePod(ctx, framework.DeployUnevictablePodInput{ - WorkloadClusterProxy: workloadClusterProxy, - ControlPlane: controlplane, - DeploymentName: fmt.Sprintf("%s-%s", "unevictable-pod", util.RandomString(3)), - Namespace: namespace.Name + "-unevictable-workload", - WaitForDeploymentAvailableInterval: input.E2EConfig.GetIntervals(specName, "wait-deployment-available"), + By("Scale down the control plane to 1 and MachineDeployments to 0.") + modifyControlPlaneViaClusterAndWait(ctx, modifyControlPlaneViaClusterAndWaitInput{ + ClusterProxy: input.BootstrapClusterProxy, + Cluster: cluster, + ModifyControlPlaneTopology: func(topology *clusterv1.ControlPlaneTopology) { + topology.Replicas = ptr.To[int32](1) + }, + WaitForControlPlane: input.E2EConfig.GetIntervals(specName, "wait-control-plane"), + }) + modifyMachineDeploymentViaClusterAndWait(ctx, modifyMachineDeploymentViaClusterAndWaitInput{ + ClusterProxy: input.BootstrapClusterProxy, + Cluster: cluster, + ModifyMachineDeploymentTopology: func(topology *clusterv1.MachineDeploymentTopology) { + topology.Replicas = ptr.To[int32](0) + }, + WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), }) - By("Scale down the controlplane of the workload cluster and make sure that nodes running workload can be deleted even the draining process is blocked.") - // When we scale down the KCP, controlplane machines are by default deleted one by one, so it requires more time. - nodeDrainTimeoutKCPInterval := getDrainAndDeleteInterval(input.E2EConfig.GetIntervals(specName, "wait-machine-deleted"), controlplane.Spec.MachineTemplate.NodeDrainTimeout, controlPlaneReplicas) - framework.ScaleAndWaitControlPlane(ctx, framework.ScaleAndWaitControlPlaneInput{ - ClusterProxy: input.BootstrapClusterProxy, - Cluster: cluster, - ControlPlane: controlplane, - Replicas: 1, - WaitForControlPlane: nodeDrainTimeoutKCPInterval, + By("Verify Node drains for control plane and MachineDeployment Machines are blocked") + Eventually(func(g Gomega) { + controlPlaneMachines := framework.GetControlPlaneMachinesByCluster(ctx, framework.GetControlPlaneMachinesByClusterInput{ + Lister: input.BootstrapClusterProxy.GetClient(), + ClusterName: cluster.Name, + Namespace: cluster.Namespace, + }) + var condition *clusterv1.Condition + for _, machine := range controlPlaneMachines { + condition = conditions.Get(&machine, clusterv1.DrainingSucceededCondition) + if condition != nil { + // We only expect to find the condition on one Machine (as KCP will only try to drain one Machine at a time) + break + } + } + g.Expect(condition).ToNot(BeNil()) + g.Expect(condition.Status).To(Equal(corev1.ConditionFalse)) + g.Expect(condition.Message).To(ContainSubstring(fmt.Sprintf("Cannot evict pod as it would violate the pod's disruption budget. The disruption budget %s needs", cpDeploymentAndPDBName))) + }, input.E2EConfig.GetIntervals(specName, "wait-machine-deleted")...).Should(Succeed()) + for _, md := range machineDeployments { + Eventually(func(g Gomega) { + machines := framework.GetMachinesByMachineDeployments(ctx, framework.GetMachinesByMachineDeploymentsInput{ + Lister: input.BootstrapClusterProxy.GetClient(), + ClusterName: cluster.Name, + Namespace: cluster.Namespace, + MachineDeployment: *md, + }) + g.Expect(machines).To(HaveLen(1)) + condition := conditions.Get(&machines[0], clusterv1.DrainingSucceededCondition) + g.Expect(condition).ToNot(BeNil()) + g.Expect(condition.Status).To(Equal(corev1.ConditionFalse)) + g.Expect(condition.Message).To(ContainSubstring(fmt.Sprintf("Cannot evict pod as it would violate the pod's disruption budget. The disruption budget %s needs", mdDeploymentAndPDBNames[md.Name]))) + }, input.E2EConfig.GetIntervals(specName, "wait-machine-deleted")...).Should(Succeed()) + } + + By("Set NodeDrainTimeout to 1s to unblock Node drain") + // Note: This also verifies that KCP & MachineDeployments are still propagating changes to NodeDrainTimeout down to + // Machines that already have a deletionTimestamp. + drainTimeout := &metav1.Duration{Duration: time.Duration(1) * time.Second} + modifyControlPlaneViaClusterAndWait(ctx, modifyControlPlaneViaClusterAndWaitInput{ + ClusterProxy: input.BootstrapClusterProxy, + Cluster: cluster, + ModifyControlPlaneTopology: func(topology *clusterv1.ControlPlaneTopology) { + topology.NodeDrainTimeout = drainTimeout + }, + WaitForControlPlane: input.E2EConfig.GetIntervals(specName, "wait-control-plane"), + }) + modifyMachineDeploymentViaClusterAndWait(ctx, modifyMachineDeploymentViaClusterAndWaitInput{ + ClusterProxy: input.BootstrapClusterProxy, + Cluster: cluster, + ModifyMachineDeploymentTopology: func(topology *clusterv1.MachineDeploymentTopology) { + topology.NodeDrainTimeout = drainTimeout + }, + WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), }) + By("Verify Node drains were unblocked") + // When we scale down the KCP, controlplane machines are deleted one by one, so it requires more time + // MD Machine deletion is done in parallel and will be faster. + nodeDrainTimeoutKCPInterval := getDrainAndDeleteInterval(input.E2EConfig.GetIntervals(specName, "wait-machine-deleted"), drainTimeout, controlPlaneReplicas) + Eventually(func(g Gomega) { + // When all drains complete we only have 1 control plane & 0 MD replicas left. + controlPlaneMachines := framework.GetControlPlaneMachinesByCluster(ctx, framework.GetControlPlaneMachinesByClusterInput{ + Lister: input.BootstrapClusterProxy.GetClient(), + ClusterName: cluster.Name, + Namespace: cluster.Namespace, + }) + g.Expect(controlPlaneMachines).To(HaveLen(1)) + + for _, md := range machineDeployments { + machines := framework.GetMachinesByMachineDeployments(ctx, framework.GetMachinesByMachineDeploymentsInput{ + Lister: input.BootstrapClusterProxy.GetClient(), + ClusterName: cluster.Name, + Namespace: cluster.Namespace, + MachineDeployment: *md, + }) + g.Expect(machines).To(BeEmpty()) + } + }, nodeDrainTimeoutKCPInterval...).Should(Succeed()) + By("PASSED!") }) diff --git a/test/e2e/node_drain_timeout_test.go b/test/e2e/node_drain_timeout_test.go index 408b9d44e0c5..882c9621ff5c 100644 --- a/test/e2e/node_drain_timeout_test.go +++ b/test/e2e/node_drain_timeout_test.go @@ -32,6 +32,7 @@ var _ = Describe("When testing node drain timeout", func() { BootstrapClusterProxy: bootstrapClusterProxy, ArtifactFolder: artifactFolder, SkipCleanup: skipCleanup, + Flavor: ptr.To("topology"), InfrastructureProvider: ptr.To("docker"), } }) diff --git a/test/framework/deployment_helpers.go b/test/framework/deployment_helpers.go index a2b0c8c141a8..e139f97b7080 100644 --- a/test/framework/deployment_helpers.go +++ b/test/framework/deployment_helpers.go @@ -42,16 +42,14 @@ import ( "k8s.io/apimachinery/pkg/labels" kerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/intstr" - utilversion "k8s.io/apimachinery/pkg/util/version" "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/apimachinery/pkg/version" "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/cache" "k8s.io/klog/v2" - "k8s.io/utils/ptr" toolscache "sigs.k8s.io/controller-runtime/pkg/cache" "sigs.k8s.io/controller-runtime/pkg/client" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" . "sigs.k8s.io/cluster-api/test/framework/ginkgoextensions" "sigs.k8s.io/cluster-api/test/framework/internal/log" @@ -493,16 +491,22 @@ func WaitForDNSUpgrade(ctx context.Context, input WaitForDNSUpgradeInput, interv type DeployUnevictablePodInput struct { WorkloadClusterProxy ClusterProxy ControlPlane *controlplanev1.KubeadmControlPlane + MachineDeployment *clusterv1.MachineDeployment DeploymentName string Namespace string + NodeSelector map[string]string WaitForDeploymentAvailableInterval []interface{} } +// DeployUnevictablePod will deploy a Deployment on a ControlPlane or MachineDeployment. +// It will deploy one Pod replica to each Machine and then deploy a PDB to ensure none of the Pods can be evicted. func DeployUnevictablePod(ctx context.Context, input DeployUnevictablePodInput) { Expect(input.DeploymentName).ToNot(BeNil(), "Need a deployment name in DeployUnevictablePod") Expect(input.Namespace).ToNot(BeNil(), "Need a namespace in DeployUnevictablePod") Expect(input.WorkloadClusterProxy).ToNot(BeNil(), "Need a workloadClusterProxy in DeployUnevictablePod") + Expect((input.MachineDeployment == nil && input.ControlPlane != nil) || + (input.MachineDeployment != nil && input.ControlPlane == nil)).To(BeTrue(), "Either MachineDeployment or ControlPlane must be set in DeployUnevictablePod") EnsureNamespace(ctx, input.WorkloadClusterProxy.GetClient(), input.Namespace) @@ -512,16 +516,17 @@ func DeployUnevictablePod(ctx context.Context, input DeployUnevictablePodInput) Namespace: input.Namespace, }, Spec: appsv1.DeploymentSpec{ - Replicas: ptr.To[int32](4), Selector: &metav1.LabelSelector{ MatchLabels: map[string]string{ - "app": "nonstop", + "app": "nonstop", + "deployment": input.DeploymentName, }, }, Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{ - "app": "nonstop", + "app": "nonstop", + "deployment": input.DeploymentName, }, }, Spec: corev1.PodSpec{ @@ -531,6 +536,25 @@ func DeployUnevictablePod(ctx context.Context, input DeployUnevictablePodInput) Image: "registry.k8s.io/pause:3.10", }, }, + Affinity: &corev1.Affinity{ + // Make sure only 1 Pod of this Deployment can run on the same Node. + PodAntiAffinity: &corev1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "deployment", + Operator: "In", + Values: []string{input.DeploymentName}, + }, + }, + }, + TopologyKey: "kubernetes.io/hostname", + }, + }, + }, + }, }, }, }, @@ -538,31 +562,24 @@ func DeployUnevictablePod(ctx context.Context, input DeployUnevictablePodInput) workloadClient := input.WorkloadClusterProxy.GetClientSet() if input.ControlPlane != nil { - var serverVersion *version.Info - Eventually(func() error { - var err error - serverVersion, err = workloadClient.ServerVersion() - return err - }, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), "failed to get server version") - - // Use the control-plane label for Kubernetes version >= v1.20.0. - if utilversion.MustParseGeneric(serverVersion.String()).AtLeast(utilversion.MustParseGeneric("v1.20.0")) { - workloadDeployment.Spec.Template.Spec.NodeSelector = map[string]string{nodeRoleControlPlane: ""} - } else { - workloadDeployment.Spec.Template.Spec.NodeSelector = map[string]string{nodeRoleOldControlPlane: ""} - } - + workloadDeployment.Spec.Template.Spec.NodeSelector = map[string]string{nodeRoleControlPlane: ""} workloadDeployment.Spec.Template.Spec.Tolerations = []corev1.Toleration{ - { - Key: nodeRoleOldControlPlane, - Effect: "NoSchedule", - }, { Key: nodeRoleControlPlane, Effect: "NoSchedule", }, } + workloadDeployment.Spec.Replicas = input.ControlPlane.Spec.Replicas + } + if input.MachineDeployment != nil { + workloadDeployment.Spec.Replicas = input.MachineDeployment.Spec.Replicas + } + + // Note: If set, the NodeSelector field overwrites the NodeSelector we set above for control plane nodes. + if input.NodeSelector != nil { + workloadDeployment.Spec.Template.Spec.NodeSelector = input.NodeSelector } + AddDeploymentToWorkloadCluster(ctx, AddDeploymentToWorkloadClusterInput{ Namespace: input.Namespace, ClientSet: workloadClient, @@ -570,10 +587,6 @@ func DeployUnevictablePod(ctx context.Context, input DeployUnevictablePodInput) }) budget := &policyv1.PodDisruptionBudget{ - TypeMeta: metav1.TypeMeta{ - Kind: "PodDisruptionBudget", - APIVersion: "policy/v1", - }, ObjectMeta: metav1.ObjectMeta{ Name: input.DeploymentName, Namespace: input.Namespace, @@ -581,13 +594,14 @@ func DeployUnevictablePod(ctx context.Context, input DeployUnevictablePodInput) Spec: policyv1.PodDisruptionBudgetSpec{ Selector: &metav1.LabelSelector{ MatchLabels: map[string]string{ - "app": "nonstop", + "app": "nonstop", + "deployment": input.DeploymentName, }, }, + // Setting MaxUnavailable to 0 means no Pods can be evicted / unavailable. MaxUnavailable: &intstr.IntOrString{ Type: intstr.Int, - IntVal: 1, - StrVal: "1", + IntVal: 0, }, }, }