Skip to content

Commit

Permalink
Delete a subcluster pods after it is shut down (#968)
Browse files Browse the repository at this point in the history
After a subcluster is "shut down"(shutdown set to true), its pods will
stay down and cannot get restarted by the operator. We go a step further
and delete the pods by scaling the statefulset to zero. This is useful
for realdb as we do not want to keep down pods around and the EKS
instances will also be shut down.
When shutdown is set back to false, the statefulset will be scale up to
its original size(subcluster size in spec).
  • Loading branch information
roypaulin authored Oct 29, 2024
1 parent f4bda62 commit 984df06
Show file tree
Hide file tree
Showing 10 changed files with 255 additions and 10 deletions.
15 changes: 15 additions & 0 deletions api/v1/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,21 @@ func (s *Subcluster) IsZombie(vdb *VerticaDB) bool {
return !foundInSandbox && !foundInSandboxStatus
}

// GetStsSize returns the number of replicas that will be assigned
// to the statefulset. By default it is the subcluster's size, and
// zero if the subcluster has shutdown true.
func (s *Subcluster) GetStsSize(vdb *VerticaDB) int32 {
if !s.Shutdown {
return s.Size
}
scStatusMap := vdb.GenSubclusterStatusMap()
ss := scStatusMap[s.Name]
if ss != nil && ss.Shutdown {
return 0
}
return s.Size
}

// FindSubclusterForServiceName will find any subclusters that match the given service name
func (v *VerticaDB) FindSubclusterForServiceName(svcName string) (scs []*Subcluster, totalSize int32) {
totalSize = int32(0)
Expand Down
3 changes: 2 additions & 1 deletion pkg/builder/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -1257,6 +1257,7 @@ func getStorageClassName(vdb *vapi.VerticaDB) *string {

// BuildStsSpec builds manifest for a subclusters statefulset
func BuildStsSpec(nm types.NamespacedName, vdb *vapi.VerticaDB, sc *vapi.Subcluster) *appsv1.StatefulSet {
scSize := sc.GetStsSize(vdb)
return &appsv1.StatefulSet{
ObjectMeta: metav1.ObjectMeta{
Name: nm.Name,
Expand All @@ -1269,7 +1270,7 @@ func BuildStsSpec(nm types.NamespacedName, vdb *vapi.VerticaDB, sc *vapi.Subclus
MatchLabels: MakeStsSelectorLabels(vdb, sc),
},
ServiceName: names.GenHlSvcName(vdb).Name,
Replicas: &sc.Size,
Replicas: &scSize,
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: MakeLabelsForPodObject(vdb, sc),
Expand Down
2 changes: 2 additions & 0 deletions pkg/controllers/sandbox/sandbox_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,8 @@ func (r *SandboxConfigMapReconciler) constructActors(vdb *v1.VerticaDB, log logr
// Update the vdb status including subclusters[].shutdown, after a stopdb
// or a restart
vdbcontroller.MakeStatusReconcilerWithShutdown(r.Client, r.Scheme, log, vdb, pfacts),
// Scale down the subclusters' statefulsets to zero after the subclusters are shut down
MakeScaleStafulsetReconciler(r, vdb, pfacts),
}
}

Expand Down
83 changes: 83 additions & 0 deletions pkg/controllers/sandbox/scalestatefulset_reconciler.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
(c) Copyright [2021-2024] Open Text.
Licensed under the Apache License, Version 2.0 (the "License");
You may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package sandbox

import (
"context"
"fmt"

"github.com/go-logr/logr"
v1 "github.com/vertica/vertica-kubernetes/api/v1"
"github.com/vertica/vertica-kubernetes/pkg/controllers"
"github.com/vertica/vertica-kubernetes/pkg/iter"
vmeta "github.com/vertica/vertica-kubernetes/pkg/meta"
"github.com/vertica/vertica-kubernetes/pkg/names"
"github.com/vertica/vertica-kubernetes/pkg/podfacts"
"k8s.io/client-go/util/retry"
ctrl "sigs.k8s.io/controller-runtime"
)

// ScaleStafulsetReconciler will make sure that the sandbox's subclusters that are
// shut down have their pods removed.
type ScaleStafulsetReconciler struct {
VRec *SandboxConfigMapReconciler
Vdb *v1.VerticaDB
Log logr.Logger
PFacts *podfacts.PodFacts
}

func MakeScaleStafulsetReconciler(r *SandboxConfigMapReconciler,
vdb *v1.VerticaDB, pfacts *podfacts.PodFacts) controllers.ReconcileActor {
return &ScaleStafulsetReconciler{
VRec: r,
Vdb: vdb,
PFacts: pfacts,
}
}

func (s *ScaleStafulsetReconciler) Reconcile(ctx context.Context, _ *ctrl.Request) (ctrl.Result, error) {
scMap := s.Vdb.GenSubclusterMap()
finder := iter.MakeSubclusterFinder(s.VRec.GetClient(), s.Vdb)
stss, err := finder.FindStatefulSets(ctx, iter.FindInVdb, s.PFacts.SandboxName)
if err != nil {
return ctrl.Result{}, err
}
for inx := range stss.Items {
sts := &stss.Items[inx]
err := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
nm := names.GenNamespacedName(s.Vdb, sts.Name)
err := s.VRec.GetClient().Get(ctx, nm, sts)
if err != nil {
return err
}
oldSize := sts.Spec.Replicas
sc := scMap[sts.Labels[vmeta.SubclusterNameLabel]]
if sc == nil {
return fmt.Errorf("subcluster %s not found in vdb", sts.Labels[vmeta.SubclusterNameLabel])
}
newSize := sc.GetStsSize(s.Vdb)
if *oldSize == newSize {
return nil
}
sts.Spec.Replicas = &newSize
return s.VRec.GetClient().Update(ctx, sts)
})
if err != nil {
return ctrl.Result{}, err
}
}
return ctrl.Result{}, nil
}
70 changes: 70 additions & 0 deletions pkg/controllers/sandbox/scalestatefulset_reconciler_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
(c) Copyright [2021-2024] Open Text.
Licensed under the Apache License, Version 2.0 (the "License");
You may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package sandbox

import (
"context"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
v1 "github.com/vertica/vertica-kubernetes/api/v1"
"github.com/vertica/vertica-kubernetes/pkg/cmds"
"github.com/vertica/vertica-kubernetes/pkg/names"
"github.com/vertica/vertica-kubernetes/pkg/podfacts"
"github.com/vertica/vertica-kubernetes/pkg/test"
appsv1 "k8s.io/api/apps/v1"
ctrl "sigs.k8s.io/controller-runtime"
)

var _ = Describe("scalestatefulset_reconciler", func() {
ctx := context.Background()

It("should scale the sts to zero if subcluster is shut down", func() {
vdb := v1.MakeVDB()
const sc1 = "sc1"
vdb.Spec.Subclusters = []v1.Subcluster{
{Name: sc1, Size: 3, Shutdown: false},
}
vdb.Spec.Sandboxes = []v1.Sandbox{
{Name: sc1, Subclusters: []v1.SubclusterName{{Name: sc1}}},
}
vdb.Status.Subclusters = []v1.SubclusterStatus{
{Name: sc1, Shutdown: true},
}
vdb.Status.Sandboxes = []v1.SandboxStatus{
{Name: sc1, Subclusters: []string{sc1}},
}
test.CreatePods(ctx, k8sClient, vdb, test.AllPodsRunning)
defer test.DeletePods(ctx, k8sClient, vdb)

sts := &appsv1.StatefulSet{}
Expect(k8sClient.Get(ctx, names.GenStsName(vdb, &vdb.Spec.Subclusters[0]), sts)).Should(Succeed())
Expect(*sts.Spec.Replicas).Should(Equal(vdb.Spec.Subclusters[0].Size))

vdb.Spec.Subclusters[0].Shutdown = true
fpr := &cmds.FakePodRunner{}
pfacts := podfacts.MakePodFacts(sbRec, fpr, logger, TestPassword)
pfacts.SandboxName = sc1
r := MakeScaleStafulsetReconciler(sbRec, vdb, &pfacts)
res, err := r.Reconcile(ctx, &ctrl.Request{})
Expect(err).Should(Succeed())
Expect(res).Should(Equal(ctrl.Result{}))

newSts := &appsv1.StatefulSet{}
Expect(k8sClient.Get(ctx, names.GenStsName(vdb, &vdb.Spec.Subclusters[0]), newSts)).Should(Succeed())
Expect(*newSts.Spec.Replicas).Should(Equal(int32(0)))
})
})
22 changes: 19 additions & 3 deletions pkg/controllers/vdb/status_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,12 +130,12 @@ func (s *StatusReconciler) updateStatusFields(ctx context.Context) error {
continue
}

if err := s.calculateSubclusterStatus(ctx, subclusters[i], &vdbChg.Status.Subclusters[i]); err != nil {
return fmt.Errorf("failed to calculate subcluster status %s %w", subclusters[i].Name, err)
}
if !s.SkipShutdown {
s.updateShutdownStatus(subclusters[i], &vdbChg.Status.Subclusters[i])
}
if err := s.calculateSubclusterStatus(ctx, subclusters[i], &vdbChg.Status.Subclusters[i]); err != nil {
return fmt.Errorf("failed to calculate subcluster status %s %w", subclusters[i].Name, err)
}
}
s.calculateClusterStatus(&vdbChg.Status)
return nil
Expand Down Expand Up @@ -196,6 +196,14 @@ func (s *StatusReconciler) calculateSubclusterStatus(ctx context.Context, sc *va
if !ok {
continue
}
stsSize := sc.GetStsSize(s.Vdb)
if stsSize == 0 && stsSize != sc.Size {
s.setSubclusterStatusWhenShutdown(podIndex, curStat)
// At this point the subcluster pods have been deleted
// but we do not want to lose info like vnodename or subclusteroid
// so we jump to the next subcluster.
continue
}
curStat.Detail[podIndex].UpNode = pf.GetUpNode()
curStat.Detail[podIndex].Installed = pf.GetIsInstalled()
curStat.Detail[podIndex].AddedToDB = pf.GetDBExists()
Expand All @@ -220,6 +228,14 @@ func (s *StatusReconciler) calculateSubclusterStatus(ctx context.Context, sc *va
return nil
}

// setSubclusterStatusWhenShutdown sets some subcluster status fields
// when it is shutdown.
func (s *StatusReconciler) setSubclusterStatusWhenShutdown(podIndex int32, curStat *vapi.SubclusterStatus) {
curStat.Detail[podIndex].UpNode = false
curStat.Detail[podIndex].Installed = false
curStat.Detail[podIndex].AddedToDB = false
}

func (s *StatusReconciler) updateShutdownStatus(sc *vapi.Subcluster, curStat *vapi.SubclusterStatus) {
curStat.Shutdown = sc.Shutdown
}
Expand Down
18 changes: 16 additions & 2 deletions tests/e2e-leg-10/sandbox-shutdown/50-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,20 @@ involvedObject:
kind: VerticaDB
name: v-sandbox-shutdown
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: v-sandbox-shutdown-sec1
status:
replicas: 0
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: v-sandbox-shutdown-sec3
status:
replicas: 0
---
apiVersion: vertica.com/v1
kind: VerticaDB
metadata:
Expand All @@ -41,14 +55,14 @@ status:
subclusters:
- addedToDBCount: 3
name: pri1
- addedToDBCount: 3
- addedToDBCount: 0
upNodeCount: 0
shutdown: true
name: sec1
- addedToDBCount: 1
upNodeCount: 1
name: sec2
- addedToDBCount: 1
- addedToDBCount: 0
upNodeCount: 0
shutdown: true
name: sec3
18 changes: 16 additions & 2 deletions tests/e2e-leg-10/sandbox-shutdown/55-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: apps/v1
kind: StatefulSet
metadata:
name: v-sandbox-shutdown-sec1
status:
replicas: 0
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: v-sandbox-shutdown-sec3
status:
replicas: 0
---
apiVersion: vertica.com/v1
kind: VerticaDB
metadata:
Expand All @@ -31,14 +45,14 @@ status:
subclusters:
- addedToDBCount: 3
name: pri1
- addedToDBCount: 3
- addedToDBCount: 0
upNodeCount: 0
shutdown: true
name: sec1
- addedToDBCount: 1
upNodeCount: 0
name: sec2
- addedToDBCount: 1
- addedToDBCount: 0
upNodeCount: 0
shutdown: true
name: sec3
18 changes: 16 additions & 2 deletions tests/e2e-leg-10/sandbox-shutdown/60-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,20 @@
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: apps/v1
kind: StatefulSet
metadata:
name: v-sandbox-shutdown-sec1
status:
replicas: 0
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: v-sandbox-shutdown-sec3
status:
replicas: 0
---
apiVersion: vertica.com/v1
kind: VerticaDB
metadata:
Expand All @@ -19,14 +33,14 @@ status:
subclusters:
- addedToDBCount: 3
name: pri1
- addedToDBCount: 3
- addedToDBCount: 0
upNodeCount: 0
shutdown: true
name: sec1
- addedToDBCount: 1
upNodeCount: 1
name: sec2
- addedToDBCount: 1
- addedToDBCount: 0
upNodeCount: 0
shutdown: true
name: sec3
16 changes: 16 additions & 0 deletions tests/e2e-leg-10/sandbox-shutdown/65-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,22 @@
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: apps/v1
kind: StatefulSet
metadata:
name: v-sandbox-shutdown-sec1
status:
replicas: 3
readyReplicas: 3
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: v-sandbox-shutdown-sec3
status:
replicas: 1
readyReplicas: 1
---
apiVersion: vertica.com/v1
kind: VerticaDB
metadata:
Expand Down

0 comments on commit 984df06

Please sign in to comment.