diff --git a/controlplane/kubeadm/internal/controllers/status.go b/controlplane/kubeadm/internal/controllers/status.go index e2f5d574725b..3b6f837a9a09 100644 --- a/controlplane/kubeadm/internal/controllers/status.go +++ b/controlplane/kubeadm/internal/controllers/status.go @@ -865,6 +865,9 @@ func aggregateStaleMachines(machines collections.Machines) string { if strings.Contains(deletingCondition.Message, "failed to evict Pod") { delayReasons.Insert("Pod eviction errors") } + if strings.Contains(deletingCondition.Message, "waiting for completion") { + delayReasons.Insert("Pods not completed yet") + } } } } @@ -889,7 +892,7 @@ func aggregateStaleMachines(machines collections.Machines) string { message += "in deletion since more than 15m" if len(delayReasons) > 0 { reasonList := []string{} - for _, r := range []string{"PodDisruptionBudgets", "Pods not terminating", "Pod eviction errors"} { + for _, r := range []string{"PodDisruptionBudgets", "Pods not terminating", "Pod eviction errors", "Pods not completed yet"} { if delayReasons.Has(r) { reasonList = append(reasonList, r) } diff --git a/controlplane/kubeadm/internal/controllers/status_test.go b/controlplane/kubeadm/internal/controllers/status_test.go index 17295d65b6f5..8d2829bd8cc4 100644 --- a/controlplane/kubeadm/internal/controllers/status_test.go +++ b/controlplane/kubeadm/internal/controllers/status_test.go @@ -467,7 +467,28 @@ func Test_setScalingDownCondition(t *testing.T) { Status: controlplanev1.KubeadmControlPlaneStatus{Replicas: 3}, }, Machines: collections.FromMachines( - &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m1", DeletionTimestamp: ptr.To(metav1.Time{Time: time.Now().Add(-1 * time.Hour)})}}, + &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m1", DeletionTimestamp: ptr.To(metav1.Time{Time: time.Now().Add(-1 * time.Hour)})}, + Status: clusterv1.MachineStatus{ + V1Beta2: &clusterv1.MachineV1Beta2Status{ + Conditions: []metav1.Condition{ + { + Type: clusterv1.MachineDeletingV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeletingDrainingNodeV1Beta2Reason, + Message: `Drain not completed yet (started at 2024-10-09T16:13:59Z): +* Pods pod-2-deletionTimestamp-set-1, pod-3-to-trigger-eviction-successfully-1: deletionTimestamp set, but still not removed from the Node +* Pod pod-5-to-trigger-eviction-pdb-violated-1: cannot evict pod as it would violate the pod's disruption budget. The disruption budget pod-5-pdb needs 20 healthy pods and has 20 currently +* Pod pod-6-to-trigger-eviction-some-other-error: failed to evict Pod, some other error 1 +* Pod pod-9-wait-completed: waiting for completion +After above Pods have been removed from the Node, the following Pods will be evicted: pod-7-eviction-later, pod-8-eviction-later`, + }, + }, + }, + Deletion: &clusterv1.MachineDeletionStatus{ + NodeDrainStartTime: &metav1.Time{Time: time.Now().Add(-6 * time.Minute)}, + }, + }, + }, &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m2"}}, &clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m3"}}, ), @@ -477,7 +498,7 @@ func Test_setScalingDownCondition(t *testing.T) { Status: metav1.ConditionTrue, Reason: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Reason, Message: "Scaling down from 3 to 1 replicas is blocked because:\n" + - "* Machine m1 is in deletion since more than 15m", + "* Machine m1 is in deletion since more than 15m, delay likely due to PodDisruptionBudgets, Pods not terminating, Pod eviction errors, Pods not completed yet", }, }, { diff --git a/internal/controllers/machine/machine_controller_status.go b/internal/controllers/machine/machine_controller_status.go index bf03e3cb339c..38e09e957c16 100644 --- a/internal/controllers/machine/machine_controller_status.go +++ b/internal/controllers/machine/machine_controller_status.go @@ -710,6 +710,9 @@ func calculateDeletingConditionForSummary(machine *clusterv1.Machine) v1beta2con if strings.Contains(deletingCondition.Message, "failed to evict Pod") { delayReasons = append(delayReasons, "Pod eviction errors") } + if strings.Contains(deletingCondition.Message, "waiting for completion") { + delayReasons = append(delayReasons, "Pods not completed yet") + } if len(delayReasons) > 0 { msg += fmt.Sprintf(", delay likely due to %s", strings.Join(delayReasons, ", ")) } diff --git a/internal/controllers/machine/machine_controller_status_test.go b/internal/controllers/machine/machine_controller_status_test.go index 275947d7880b..ef62eb3debc8 100644 --- a/internal/controllers/machine/machine_controller_status_test.go +++ b/internal/controllers/machine/machine_controller_status_test.go @@ -1715,6 +1715,7 @@ func TestCalculateDeletingConditionForSummary(t *testing.T) { * Pods pod-2-deletionTimestamp-set-1, pod-3-to-trigger-eviction-successfully-1: deletionTimestamp set, but still not removed from the Node * Pod pod-5-to-trigger-eviction-pdb-violated-1: cannot evict pod as it would violate the pod's disruption budget. The disruption budget pod-5-pdb needs 20 healthy pods and has 20 currently * Pod pod-6-to-trigger-eviction-some-other-error: failed to evict Pod, some other error 1 +* Pod pod-9-wait-completed: waiting for completion After above Pods have been removed from the Node, the following Pods will be evicted: pod-7-eviction-later, pod-8-eviction-later`, }, }, @@ -1733,7 +1734,7 @@ After above Pods have been removed from the Node, the following Pods will be evi Type: clusterv1.MachineDeletingV1Beta2Condition, Status: metav1.ConditionTrue, Reason: clusterv1.MachineDeletingV1Beta2Reason, - Message: "Machine deletion in progress since more than 15m, stage: DrainingNode, delay likely due to PodDisruptionBudgets, Pods not terminating, Pod eviction errors", + Message: "Machine deletion in progress since more than 15m, stage: DrainingNode, delay likely due to PodDisruptionBudgets, Pods not terminating, Pod eviction errors, Pods not completed yet", }, }, }, diff --git a/internal/controllers/machinedeployment/machinedeployment_status.go b/internal/controllers/machinedeployment/machinedeployment_status.go index 1387bd6d4171..558b01c6d5b9 100644 --- a/internal/controllers/machinedeployment/machinedeployment_status.go +++ b/internal/controllers/machinedeployment/machinedeployment_status.go @@ -581,6 +581,9 @@ func aggregateStaleMachines(machines collections.Machines) string { if strings.Contains(deletingCondition.Message, "failed to evict Pod") { delayReasons.Insert("Pod eviction errors") } + if strings.Contains(deletingCondition.Message, "waiting for completion") { + delayReasons.Insert("Pods not completed yet") + } } } } @@ -605,7 +608,7 @@ func aggregateStaleMachines(machines collections.Machines) string { message += "in deletion since more than 15m" if len(delayReasons) > 0 { reasonList := []string{} - for _, r := range []string{"PodDisruptionBudgets", "Pods not terminating", "Pod eviction errors"} { + for _, r := range []string{"PodDisruptionBudgets", "Pods not terminating", "Pod eviction errors", "Pods not completed yet"} { if delayReasons.Has(r) { reasonList = append(reasonList, r) } diff --git a/internal/controllers/machinedeployment/machinedeployment_status_test.go b/internal/controllers/machinedeployment/machinedeployment_status_test.go index bc8fda802722..ba4b9198698c 100644 --- a/internal/controllers/machinedeployment/machinedeployment_status_test.go +++ b/internal/controllers/machinedeployment/machinedeployment_status_test.go @@ -648,7 +648,28 @@ func Test_setScalingDownCondition(t *testing.T) { }, machines: []*clusterv1.Machine{ fakeMachine("m1"), - fakeMachine("stale-machine-1", withStaleDeletion()), + fakeMachine("stale-machine-1", withStaleDeletion(), func(m *clusterv1.Machine) { + m.Status = clusterv1.MachineStatus{ + V1Beta2: &clusterv1.MachineV1Beta2Status{ + Conditions: []metav1.Condition{ + { + Type: clusterv1.MachineDeletingV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeletingDrainingNodeV1Beta2Reason, + Message: `Drain not completed yet (started at 2024-10-09T16:13:59Z): +* Pods pod-2-deletionTimestamp-set-1, pod-3-to-trigger-eviction-successfully-1: deletionTimestamp set, but still not removed from the Node +* Pod pod-5-to-trigger-eviction-pdb-violated-1: cannot evict pod as it would violate the pod's disruption budget. The disruption budget pod-5-pdb needs 20 healthy pods and has 20 currently +* Pod pod-6-to-trigger-eviction-some-other-error: failed to evict Pod, some other error 1 +* Pod pod-9-wait-completed: waiting for completion +After above Pods have been removed from the Node, the following Pods will be evicted: pod-7-eviction-later, pod-8-eviction-later`, + }, + }, + }, + Deletion: &clusterv1.MachineDeletionStatus{ + NodeDrainStartTime: &metav1.Time{Time: time.Now().Add(-6 * time.Minute)}, + }, + } + }), }, getAndAdoptMachineSetsForDeploymentSucceeded: true, expectCondition: metav1.Condition{ @@ -656,7 +677,7 @@ func Test_setScalingDownCondition(t *testing.T) { Status: metav1.ConditionTrue, Reason: clusterv1.MachineDeploymentScalingDownV1Beta2Reason, Message: "Scaling down from 2 to 1 replicas\n" + - "* Machine stale-machine-1 is in deletion since more than 15m", + "* Machine stale-machine-1 is in deletion since more than 15m, delay likely due to PodDisruptionBudgets, Pods not terminating, Pod eviction errors, Pods not completed yet", }, }, { diff --git a/internal/controllers/machineset/machineset_controller_status.go b/internal/controllers/machineset/machineset_controller_status.go index c85186dbfac2..c127b2b6f25d 100644 --- a/internal/controllers/machineset/machineset_controller_status.go +++ b/internal/controllers/machineset/machineset_controller_status.go @@ -457,6 +457,9 @@ func aggregateStaleMachines(machines []*clusterv1.Machine) string { if strings.Contains(deletingCondition.Message, "failed to evict Pod") { delayReasons.Insert("Pod eviction errors") } + if strings.Contains(deletingCondition.Message, "waiting for completion") { + delayReasons.Insert("Pods not completed yet") + } } } } @@ -481,7 +484,7 @@ func aggregateStaleMachines(machines []*clusterv1.Machine) string { message += "in deletion since more than 15m" if len(delayReasons) > 0 { reasonList := []string{} - for _, r := range []string{"PodDisruptionBudgets", "Pods not terminating", "Pod eviction errors"} { + for _, r := range []string{"PodDisruptionBudgets", "Pods not terminating", "Pod eviction errors", "Pods not completed yet"} { if delayReasons.Has(r) { reasonList = append(reasonList, r) } diff --git a/internal/controllers/machineset/machineset_controller_status_test.go b/internal/controllers/machineset/machineset_controller_status_test.go index 0e139d4906f2..4e6465ad5850 100644 --- a/internal/controllers/machineset/machineset_controller_status_test.go +++ b/internal/controllers/machineset/machineset_controller_status_test.go @@ -420,7 +420,28 @@ func Test_setScalingDownCondition(t *testing.T) { name: "scaling down with 1 stale machine", ms: machineSet1Replica, machines: []*clusterv1.Machine{ - fakeMachine("stale-machine-1", withStaleDeletionTimestamp()), + fakeMachine("stale-machine-1", withStaleDeletionTimestamp(), func(m *clusterv1.Machine) { + m.Status = clusterv1.MachineStatus{ + V1Beta2: &clusterv1.MachineV1Beta2Status{ + Conditions: []metav1.Condition{ + { + Type: clusterv1.MachineDeletingV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineDeletingDrainingNodeV1Beta2Reason, + Message: `Drain not completed yet (started at 2024-10-09T16:13:59Z): +* Pods pod-2-deletionTimestamp-set-1, pod-3-to-trigger-eviction-successfully-1: deletionTimestamp set, but still not removed from the Node +* Pod pod-5-to-trigger-eviction-pdb-violated-1: cannot evict pod as it would violate the pod's disruption budget. The disruption budget pod-5-pdb needs 20 healthy pods and has 20 currently +* Pod pod-6-to-trigger-eviction-some-other-error: failed to evict Pod, some other error 1 +* Pod pod-9-wait-completed: waiting for completion +After above Pods have been removed from the Node, the following Pods will be evicted: pod-7-eviction-later, pod-8-eviction-later`, + }, + }, + }, + Deletion: &clusterv1.MachineDeletionStatus{ + NodeDrainStartTime: &metav1.Time{Time: time.Now().Add(-6 * time.Minute)}, + }, + } + }), fakeMachine("machine-2"), }, getAndAdoptMachinesForMachineSetSucceeded: true, @@ -429,7 +450,7 @@ func Test_setScalingDownCondition(t *testing.T) { Status: metav1.ConditionTrue, Reason: clusterv1.MachineSetScalingDownV1Beta2Reason, Message: "Scaling down from 2 to 1 replicas\n" + - "* Machine stale-machine-1 is in deletion since more than 15m", + "* Machine stale-machine-1 is in deletion since more than 15m, delay likely due to PodDisruptionBudgets, Pods not terminating, Pod eviction errors, Pods not completed yet", }, }, {