Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🌱 Improve node drain e2e test #11127

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -600,7 +600,6 @@ generate-e2e-templates-main: $(KUSTOMIZE)
echo "---" >> $(DOCKER_TEMPLATES)/main/cluster-template-kcp-adoption.yaml
$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-kcp-adoption/step2 --load-restrictor LoadRestrictionsNone >> $(DOCKER_TEMPLATES)/main/cluster-template-kcp-adoption.yaml
$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-machine-pool --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-machine-pool.yaml
$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-node-drain --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-node-drain.yaml
$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-upgrades --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-upgrades.yaml
$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-upgrades-runtimesdk --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-upgrades-runtimesdk.yaml
$(KUSTOMIZE) build $(DOCKER_TEMPLATES)/main/cluster-template-kcp-scale-in --load-restrictor LoadRestrictionsNone > $(DOCKER_TEMPLATES)/main/cluster-template-kcp-scale-in.yaml
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -317,8 +317,8 @@ func (r *Reconciler) healthCheckTargets(targets []healthCheckTarget, logger logr
t.Machine,
corev1.EventTypeNormal,
EventDetectedUnhealthy,
"Machine %v has unhealthy node %v",
t.string(),
"Machine %s has unhealthy Node %s",
klog.KObj(t.Machine),
t.nodeName(),
)
nextCheckTimes = append(nextCheckTimes, nextCheck)
Expand Down
2 changes: 0 additions & 2 deletions test/e2e/config/docker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,6 @@ providers:
- sourcePath: "../data/infrastructure-docker/main/cluster-template-kcp-remediation.yaml"
- sourcePath: "../data/infrastructure-docker/main/cluster-template-kcp-adoption.yaml"
- sourcePath: "../data/infrastructure-docker/main/cluster-template-machine-pool.yaml"
- sourcePath: "../data/infrastructure-docker/main/cluster-template-node-drain.yaml"
- sourcePath: "../data/infrastructure-docker/main/cluster-template-upgrades.yaml"
- sourcePath: "../data/infrastructure-docker/main/cluster-template-upgrades-runtimesdk.yaml"
- sourcePath: "../data/infrastructure-docker/main/cluster-template-kcp-scale-in.yaml"
Expand Down Expand Up @@ -408,7 +407,6 @@ variables:
CNI: "./data/cni/kindnet/kindnet.yaml"
KUBETEST_CONFIGURATION: "./data/kubetest/conformance.yaml"
AUTOSCALER_WORKLOAD: "./data/autoscaler/autoscaler-to-workload-workload.yaml"
NODE_DRAIN_TIMEOUT: "60s"
# Enabling the feature flags by setting the env variables.
# Note: EXP_CLUSTER_RESOURCE_SET & EXP_MACHINE_POOL are enabled per default with CAPI v1.7.0.
# We still have to enable them here for clusterctl upgrade tests that use older versions.
Expand Down

This file was deleted.

This file was deleted.

This file was deleted.

339 changes: 297 additions & 42 deletions test/e2e/node_drain_timeout.go

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions test/e2e/node_drain_timeout_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ var _ = Describe("When testing node drain timeout", func() {
BootstrapClusterProxy: bootstrapClusterProxy,
ArtifactFolder: artifactFolder,
SkipCleanup: skipCleanup,
Flavor: ptr.To("topology"),
InfrastructureProvider: ptr.To("docker"),
}
})
Expand Down
209 changes: 146 additions & 63 deletions test/framework/deployment_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,14 @@ import (
"k8s.io/apimachinery/pkg/labels"
kerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/intstr"
utilversion "k8s.io/apimachinery/pkg/util/version"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apimachinery/pkg/version"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/cache"
"k8s.io/klog/v2"
"k8s.io/utils/ptr"
toolscache "sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/client"

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
. "sigs.k8s.io/cluster-api/test/framework/ginkgoextensions"
"sigs.k8s.io/cluster-api/test/framework/internal/log"
Expand Down Expand Up @@ -493,115 +491,200 @@ func WaitForDNSUpgrade(ctx context.Context, input WaitForDNSUpgradeInput, interv
type DeployUnevictablePodInput struct {
WorkloadClusterProxy ClusterProxy
ControlPlane *controlplanev1.KubeadmControlPlane
MachineDeployment *clusterv1.MachineDeployment
DeploymentName string
Namespace string
NodeSelector map[string]string

WaitForDeploymentAvailableInterval []interface{}
}

// DeployUnevictablePod will deploy a Deployment on a ControlPlane or MachineDeployment.
// It will deploy one Pod replica to each Machine and then deploy a PDB to ensure none of the Pods can be evicted.
func DeployUnevictablePod(ctx context.Context, input DeployUnevictablePodInput) {
Expect(input.DeploymentName).ToNot(BeNil(), "Need a deployment name in DeployUnevictablePod")
Expect(input.Namespace).ToNot(BeNil(), "Need a namespace in DeployUnevictablePod")
Expect(input.WorkloadClusterProxy).ToNot(BeNil(), "Need a workloadClusterProxy in DeployUnevictablePod")
Expect((input.MachineDeployment == nil && input.ControlPlane != nil) ||
(input.MachineDeployment != nil && input.ControlPlane == nil)).To(BeTrue(), "Either MachineDeployment or ControlPlane must be set in DeployUnevictablePod")

EnsureNamespace(ctx, input.WorkloadClusterProxy.GetClient(), input.Namespace)

workloadDeployment := &appsv1.Deployment{
workloadDeployment := generateDeployment(generateDeploymentInput{
ControlPlane: input.ControlPlane,
MachineDeployment: input.MachineDeployment,
Name: input.DeploymentName,
Namespace: input.Namespace,
NodeSelector: input.NodeSelector,
})

workloadClient := input.WorkloadClusterProxy.GetClientSet()

AddDeploymentToWorkloadCluster(ctx, AddDeploymentToWorkloadClusterInput{
Namespace: input.Namespace,
ClientSet: workloadClient,
Deployment: workloadDeployment,
})

budget := &policyv1.PodDisruptionBudget{
ObjectMeta: metav1.ObjectMeta{
Name: input.DeploymentName,
Namespace: input.Namespace,
},
Spec: policyv1.PodDisruptionBudgetSpec{
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"app": "nonstop",
"deployment": input.DeploymentName,
},
},
// Setting MaxUnavailable to 0 means no Pods can be evicted / unavailable.
MaxUnavailable: &intstr.IntOrString{
Type: intstr.Int,
IntVal: 0,
},
},
}

AddPodDisruptionBudget(ctx, AddPodDisruptionBudgetInput{
Namespace: input.Namespace,
ClientSet: workloadClient,
Budget: budget,
})

WaitForDeploymentsAvailable(ctx, WaitForDeploymentsAvailableInput{
Getter: input.WorkloadClusterProxy.GetClient(),
Deployment: workloadDeployment,
}, input.WaitForDeploymentAvailableInterval...)
}

type DeployEvictablePodInput struct {
WorkloadClusterProxy ClusterProxy
ControlPlane *controlplanev1.KubeadmControlPlane
MachineDeployment *clusterv1.MachineDeployment
DeploymentName string
Namespace string
NodeSelector map[string]string

ModifyDeployment func(deployment *appsv1.Deployment)

WaitForDeploymentAvailableInterval []interface{}
}

// DeployEvictablePod will deploy a Deployment on a ControlPlane or MachineDeployment.
// It will deploy one Pod replica to each Machine.
func DeployEvictablePod(ctx context.Context, input DeployEvictablePodInput) {
Expect(input.DeploymentName).ToNot(BeNil(), "Need a deployment name in DeployUnevictablePod")
Expect(input.Namespace).ToNot(BeNil(), "Need a namespace in DeployUnevictablePod")
Expect(input.WorkloadClusterProxy).ToNot(BeNil(), "Need a workloadClusterProxy in DeployUnevictablePod")
Expect((input.MachineDeployment == nil && input.ControlPlane != nil) ||
(input.MachineDeployment != nil && input.ControlPlane == nil)).To(BeTrue(), "Either MachineDeployment or ControlPlane must be set in DeployUnevictablePod")

EnsureNamespace(ctx, input.WorkloadClusterProxy.GetClient(), input.Namespace)

workloadDeployment := generateDeployment(generateDeploymentInput{
ControlPlane: input.ControlPlane,
MachineDeployment: input.MachineDeployment,
Name: input.DeploymentName,
Namespace: input.Namespace,
NodeSelector: input.NodeSelector,
})

input.ModifyDeployment(workloadDeployment)

workloadClient := input.WorkloadClusterProxy.GetClientSet()

AddDeploymentToWorkloadCluster(ctx, AddDeploymentToWorkloadClusterInput{
Namespace: input.Namespace,
ClientSet: workloadClient,
Deployment: workloadDeployment,
})

WaitForDeploymentsAvailable(ctx, WaitForDeploymentsAvailableInput{
Getter: input.WorkloadClusterProxy.GetClient(),
Deployment: workloadDeployment,
}, input.WaitForDeploymentAvailableInterval...)
}

type generateDeploymentInput struct {
ControlPlane *controlplanev1.KubeadmControlPlane
MachineDeployment *clusterv1.MachineDeployment
Name string
Namespace string
NodeSelector map[string]string
}

func generateDeployment(input generateDeploymentInput) *appsv1.Deployment {
workloadDeployment := &appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: input.Name,
Namespace: input.Namespace,
},
Spec: appsv1.DeploymentSpec{
Replicas: ptr.To[int32](4),
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"app": "nonstop",
"app": "nonstop",
"deployment": input.Name,
},
},
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"app": "nonstop",
"app": "nonstop",
"deployment": input.Name,
},
},
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "web",
Name: "main",
Image: "registry.k8s.io/pause:3.10",
},
},
Affinity: &corev1.Affinity{
// Make sure only 1 Pod of this Deployment can run on the same Node.
PodAntiAffinity: &corev1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "deployment",
Operator: "In",
Values: []string{input.Name},
},
},
},
TopologyKey: "kubernetes.io/hostname",
},
},
},
},
},
},
},
}
workloadClient := input.WorkloadClusterProxy.GetClientSet()

if input.ControlPlane != nil {
var serverVersion *version.Info
Eventually(func() error {
var err error
serverVersion, err = workloadClient.ServerVersion()
return err
}, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), "failed to get server version")

// Use the control-plane label for Kubernetes version >= v1.20.0.
if utilversion.MustParseGeneric(serverVersion.String()).AtLeast(utilversion.MustParseGeneric("v1.20.0")) {
workloadDeployment.Spec.Template.Spec.NodeSelector = map[string]string{nodeRoleControlPlane: ""}
} else {
workloadDeployment.Spec.Template.Spec.NodeSelector = map[string]string{nodeRoleOldControlPlane: ""}
}

workloadDeployment.Spec.Template.Spec.NodeSelector = map[string]string{nodeRoleControlPlane: ""}
workloadDeployment.Spec.Template.Spec.Tolerations = []corev1.Toleration{
{
Key: nodeRoleOldControlPlane,
Effect: "NoSchedule",
},
{
Key: nodeRoleControlPlane,
Effect: "NoSchedule",
},
}
workloadDeployment.Spec.Replicas = input.ControlPlane.Spec.Replicas
}
AddDeploymentToWorkloadCluster(ctx, AddDeploymentToWorkloadClusterInput{
Namespace: input.Namespace,
ClientSet: workloadClient,
Deployment: workloadDeployment,
})

budget := &policyv1.PodDisruptionBudget{
TypeMeta: metav1.TypeMeta{
Kind: "PodDisruptionBudget",
APIVersion: "policy/v1",
},
ObjectMeta: metav1.ObjectMeta{
Name: input.DeploymentName,
Namespace: input.Namespace,
},
Spec: policyv1.PodDisruptionBudgetSpec{
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{
"app": "nonstop",
},
},
MaxUnavailable: &intstr.IntOrString{
Type: intstr.Int,
IntVal: 1,
StrVal: "1",
},
},
if input.MachineDeployment != nil {
workloadDeployment.Spec.Replicas = input.MachineDeployment.Spec.Replicas
}

AddPodDisruptionBudget(ctx, AddPodDisruptionBudgetInput{
Namespace: input.Namespace,
ClientSet: workloadClient,
Budget: budget,
})
// Note: If set, the NodeSelector field overwrites the NodeSelector we set above for control plane nodes.
if input.NodeSelector != nil {
workloadDeployment.Spec.Template.Spec.NodeSelector = input.NodeSelector
}

WaitForDeploymentsAvailable(ctx, WaitForDeploymentsAvailableInput{
Getter: input.WorkloadClusterProxy.GetClient(),
Deployment: workloadDeployment,
}, input.WaitForDeploymentAvailableInterval...)
return workloadDeployment
}

type AddDeploymentToWorkloadClusterInput struct {
Expand Down
Loading