Skip to content

Commit

Permalink
Merge pull request #11127 from sbueringer/pr-improve-node-drain-e2e-test
Browse files Browse the repository at this point in the history
🌱 Improve node drain e2e test
  • Loading branch information
k8s-ci-robot authored Sep 9, 2024
2 parents a8ae016 + dbe5b1a commit eebff7b
Show file tree
Hide file tree
Showing 9 changed files with 446 additions and 135 deletions.
1 change: 0 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -600,7 +600,6 @@ generate-e2e-templates-main: $(KUSTOMIZE)
echo "---" >> $(DOCKER_TEMPLATES)/main/cluster-template-kcp-adoption.yaml
$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-kcp-adoption/step2 --load-restrictor LoadRestrictionsNone >> $(DOCKER_TEMPLATES)/main/cluster-template-kcp-adoption.yaml
$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-machine-pool --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-machine-pool.yaml
$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-node-drain --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-node-drain.yaml
$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-upgrades --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-upgrades.yaml
$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-upgrades-runtimesdk --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-upgrades-runtimesdk.yaml
$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-kcp-scale-in --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-kcp-scale-in.yaml
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -317,8 +317,8 @@ func (r *Reconciler) healthCheckTargets(targets []healthCheckTarget, logger logr
t.Machine,
corev1.EventTypeNormal,
EventDetectedUnhealthy,
"Machine %v has unhealthy node %v",
t.string(),
"Machine %s has unhealthy Node %s",
klog.KObj(t.Machine),
t.nodeName(),
)
nextCheckTimes = append(nextCheckTimes, nextCheck)
Expand Down
2 changes: 0 additions & 2 deletions test/e2e/config/docker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,6 @@ providers:
- sourcePath: "../data/infrastructure-docker/main/cluster-template-kcp-remediation.yaml"
- sourcePath: "../data/infrastructure-docker/main/cluster-template-kcp-adoption.yaml"
- sourcePath: "../data/infrastructure-docker/main/cluster-template-machine-pool.yaml"
- sourcePath: "../data/infrastructure-docker/main/cluster-template-node-drain.yaml"
- sourcePath: "../data/infrastructure-docker/main/cluster-template-upgrades.yaml"
- sourcePath: "../data/infrastructure-docker/main/cluster-template-upgrades-runtimesdk.yaml"
- sourcePath: "../data/infrastructure-docker/main/cluster-template-kcp-scale-in.yaml"
Expand Down Expand Up @@ -408,7 +407,6 @@ variables:
CNI: "./data/cni/kindnet/kindnet.yaml"
KUBETEST_CONFIGURATION: "./data/kubetest/conformance.yaml"
AUTOSCALER_WORKLOAD: "./data/autoscaler/autoscaler-to-workload-workload.yaml"
NODE_DRAIN_TIMEOUT: "60s"
# Enabling the feature flags by setting the env variables.
# Note: EXP_CLUSTER_RESOURCE_SET & EXP_MACHINE_POOL are enabled per default with CAPI v1.7.0.
# We still have to enable them here for clusterctl upgrade tests that use older versions.
Expand Down

This file was deleted.

This file was deleted.

This file was deleted.

339 changes: 297 additions & 42 deletions test/e2e/node_drain_timeout.go

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions test/e2e/node_drain_timeout_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ var _ = Describe("When testing node drain timeout", func() {
BootstrapClusterProxy: bootstrapClusterProxy,
ArtifactFolder: artifactFolder,
SkipCleanup: skipCleanup,
Flavor: ptr.To("topology"),
InfrastructureProvider: ptr.To("docker"),
}
})
Expand Down
209 changes: 146 additions & 63 deletions test/framework/deployment_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,14 @@ import (
"k8s.io/apimachinery/pkg/labels"
kerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/intstr"
utilversion "k8s.io/apimachinery/pkg/util/version"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apimachinery/pkg/version"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/cache"
"k8s.io/klog/v2"
"k8s.io/utils/ptr"
toolscache "sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/client"

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
. "sigs.k8s.io/cluster-api/test/framework/ginkgoextensions"
"sigs.k8s.io/cluster-api/test/framework/internal/log"
Expand Down Expand Up @@ -493,115 +491,200 @@ func WaitForDNSUpgrade(ctx context.Context, input WaitForDNSUpgradeInput, interv
type DeployUnevictablePodInput struct {
WorkloadClusterProxy ClusterProxy
ControlPlane *controlplanev1.KubeadmControlPlane
MachineDeployment *clusterv1.MachineDeployment
DeploymentName string
Namespace string
NodeSelector map[string]string

WaitForDeploymentAvailableInterval []interface{}
}

// DeployUnevictablePod will deploy a Deployment on a ControlPlane or MachineDeployment.
// It will deploy one Pod replica to each Machine and then deploy a PDB to ensure none of the Pods can be evicted.
func DeployUnevictablePod(ctx context.Context, input DeployUnevictablePodInput) {
Expect(input.DeploymentName).ToNot(BeNil(), "Need a deployment name in DeployUnevictablePod")
Expect(input.Namespace).ToNot(BeNil(), "Need a namespace in DeployUnevictablePod")
Expect(input.WorkloadClusterProxy).ToNot(BeNil(), "Need a workloadClusterProxy in DeployUnevictablePod")
Expect((input.MachineDeployment == nil && input.ControlPlane != nil) ||
(input.MachineDeployment != nil && input.ControlPlane == nil)).To(BeTrue(), "Either MachineDeployment or ControlPlane must be set in DeployUnevictablePod")

EnsureNamespace(ctx, input.WorkloadClusterProxy.GetClient(), input.Namespace)

workloadDeployment := &appsv1.Deployment{
workloadDeployment := generateDeployment(generateDeploymentInput{
ControlPlane: input.ControlPlane,
MachineDeployment: input.MachineDeployment,
Name: input.DeploymentName,
Namespace: input.Namespace,
NodeSelector: input.NodeSelector,
})

workloadClient := input.WorkloadClusterProxy.GetClientSet()

AddDeploymentToWorkloadCluster(ctx, AddDeploymentToWorkloadClusterInput{
Namespace: input.Namespace,
ClientSet: workloadClient,
Deployment: workloadDeployment,
})

budget := &policyv1.PodDisruptionBudget{
ObjectMeta: metav1.ObjectMeta{
Name: input.DeploymentName,
Namespace: input.Namespace,
},
Spec: policyv1.PodDisruptionBudgetSpec{
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"app": "nonstop",
"deployment": input.DeploymentName,
},
},
// Setting MaxUnavailable to 0 means no Pods can be evicted / unavailable.
MaxUnavailable: &intstr.IntOrString{
Type: intstr.Int,
IntVal: 0,
},
},
}

AddPodDisruptionBudget(ctx, AddPodDisruptionBudgetInput{
Namespace: input.Namespace,
ClientSet: workloadClient,
Budget: budget,
})

WaitForDeploymentsAvailable(ctx, WaitForDeploymentsAvailableInput{
Getter: input.WorkloadClusterProxy.GetClient(),
Deployment: workloadDeployment,
}, input.WaitForDeploymentAvailableInterval...)
}

type DeployEvictablePodInput struct {
WorkloadClusterProxy ClusterProxy
ControlPlane *controlplanev1.KubeadmControlPlane
MachineDeployment *clusterv1.MachineDeployment
DeploymentName string
Namespace string
NodeSelector map[string]string

ModifyDeployment func(deployment *appsv1.Deployment)

WaitForDeploymentAvailableInterval []interface{}
}

// DeployEvictablePod will deploy a Deployment on a ControlPlane or MachineDeployment.
// It will deploy one Pod replica to each Machine.
func DeployEvictablePod(ctx context.Context, input DeployEvictablePodInput) {
Expect(input.DeploymentName).ToNot(BeNil(), "Need a deployment name in DeployUnevictablePod")
Expect(input.Namespace).ToNot(BeNil(), "Need a namespace in DeployUnevictablePod")
Expect(input.WorkloadClusterProxy).ToNot(BeNil(), "Need a workloadClusterProxy in DeployUnevictablePod")
Expect((input.MachineDeployment == nil && input.ControlPlane != nil) ||
(input.MachineDeployment != nil && input.ControlPlane == nil)).To(BeTrue(), "Either MachineDeployment or ControlPlane must be set in DeployUnevictablePod")

EnsureNamespace(ctx, input.WorkloadClusterProxy.GetClient(), input.Namespace)

workloadDeployment := generateDeployment(generateDeploymentInput{
ControlPlane: input.ControlPlane,
MachineDeployment: input.MachineDeployment,
Name: input.DeploymentName,
Namespace: input.Namespace,
NodeSelector: input.NodeSelector,
})

input.ModifyDeployment(workloadDeployment)

workloadClient := input.WorkloadClusterProxy.GetClientSet()

AddDeploymentToWorkloadCluster(ctx, AddDeploymentToWorkloadClusterInput{
Namespace: input.Namespace,
ClientSet: workloadClient,
Deployment: workloadDeployment,
})

WaitForDeploymentsAvailable(ctx, WaitForDeploymentsAvailableInput{
Getter: input.WorkloadClusterProxy.GetClient(),
Deployment: workloadDeployment,
}, input.WaitForDeploymentAvailableInterval...)
}

type generateDeploymentInput struct {
ControlPlane *controlplanev1.KubeadmControlPlane
MachineDeployment *clusterv1.MachineDeployment
Name string
Namespace string
NodeSelector map[string]string
}

func generateDeployment(input generateDeploymentInput) *appsv1.Deployment {
workloadDeployment := &appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: input.Name,
Namespace: input.Namespace,
},
Spec: appsv1.DeploymentSpec{
Replicas: ptr.To[int32](4),
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"app": "nonstop",
"app": "nonstop",
"deployment": input.Name,
},
},
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"app": "nonstop",
"app": "nonstop",
"deployment": input.Name,
},
},
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "web",
Name: "main",
Image: "registry.k8s.io/pause:3.10",
},
},
Affinity: &corev1.Affinity{
// Make sure only 1 Pod of this Deployment can run on the same Node.
PodAntiAffinity: &corev1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "deployment",
Operator: "In",
Values: []string{input.Name},
},
},
},
TopologyKey: "kubernetes.io/hostname",
},
},
},
},
},
},
},
}
workloadClient := input.WorkloadClusterProxy.GetClientSet()

if input.ControlPlane != nil {
var serverVersion *version.Info
Eventually(func() error {
var err error
serverVersion, err = workloadClient.ServerVersion()
return err
}, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), "failed to get server version")

// Use the control-plane label for Kubernetes version >= v1.20.0.
if utilversion.MustParseGeneric(serverVersion.String()).AtLeast(utilversion.MustParseGeneric("v1.20.0")) {
workloadDeployment.Spec.Template.Spec.NodeSelector = map[string]string{nodeRoleControlPlane: ""}
} else {
workloadDeployment.Spec.Template.Spec.NodeSelector = map[string]string{nodeRoleOldControlPlane: ""}
}

workloadDeployment.Spec.Template.Spec.NodeSelector = map[string]string{nodeRoleControlPlane: ""}
workloadDeployment.Spec.Template.Spec.Tolerations = []corev1.Toleration{
{
Key: nodeRoleOldControlPlane,
Effect: "NoSchedule",
},
{
Key: nodeRoleControlPlane,
Effect: "NoSchedule",
},
}
workloadDeployment.Spec.Replicas = input.ControlPlane.Spec.Replicas
}
AddDeploymentToWorkloadCluster(ctx, AddDeploymentToWorkloadClusterInput{
Namespace: input.Namespace,
ClientSet: workloadClient,
Deployment: workloadDeployment,
})

budget := &policyv1.PodDisruptionBudget{
TypeMeta: metav1.TypeMeta{
Kind: "PodDisruptionBudget",
APIVersion: "policy/v1",
},
ObjectMeta: metav1.ObjectMeta{
Name: input.DeploymentName,
Namespace: input.Namespace,
},
Spec: policyv1.PodDisruptionBudgetSpec{
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"app": "nonstop",
},
},
MaxUnavailable: &intstr.IntOrString{
Type: intstr.Int,
IntVal: 1,
StrVal: "1",
},
},
if input.MachineDeployment != nil {
workloadDeployment.Spec.Replicas = input.MachineDeployment.Spec.Replicas
}

AddPodDisruptionBudget(ctx, AddPodDisruptionBudgetInput{
Namespace: input.Namespace,
ClientSet: workloadClient,
Budget: budget,
})
// Note: If set, the NodeSelector field overwrites the NodeSelector we set above for control plane nodes.
if input.NodeSelector != nil {
workloadDeployment.Spec.Template.Spec.NodeSelector = input.NodeSelector
}

WaitForDeploymentsAvailable(ctx, WaitForDeploymentsAvailableInput{
Getter: input.WorkloadClusterProxy.GetClient(),
Deployment: workloadDeployment,
}, input.WaitForDeploymentAvailableInterval...)
return workloadDeployment
}

type AddDeploymentToWorkloadClusterInput struct {
Expand Down

0 comments on commit eebff7b

Please sign in to comment.