Skip to content

Commit f307fdc

Browse files
committed
test: automate scale test execution
Signed-off-by: Alex Castilio dos Santos <alexsantos@microsoft.com>
1 parent ca5f362 commit f307fdc

File tree

9 files changed

+286
-44
lines changed

9 files changed

+286
-44
lines changed
+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
name: Daily Scale Test
2+
3+
on:
4+
push:
5+
branches:
6+
- alexcastilio/scale-test-workflow
7+
schedule:
8+
- cron: "0 0 * * *"
9+
10+
permissions:
11+
contents: read
12+
id-token: write
13+
14+
jobs:
15+
call-scale-test:
16+
uses: ./.github/workflows/scale-test.yaml
17+
with:
18+
num_deployments: 20
19+
num_replicas: 1000
20+
# TODO: Fix values
21+
num_netpol: 0
22+
num_nodes: 1000
23+
cleanup: false
24+
secrets: inherit

.github/workflows/scale-test.yaml

+12-14
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ on:
1515
description: "Image Namespace (if not set, default namespace will be used)"
1616
type: string
1717
image_tag:
18-
description: "Image Tag (if not set, default for this commit will be used)"
18+
description: "Image Tag (if not set, latest commit from 'main' will be used)"
1919
type: string
2020
num_deployments:
2121
description: "Number of Traffic Deployments"
@@ -36,25 +36,21 @@ on:
3636

3737
workflow_call:
3838
inputs:
39-
resource_group:
40-
description: "Azure Resource Group"
41-
required: true
42-
type: string
43-
cluster_name:
44-
description: "AKS Cluster Name"
45-
required: true
46-
type: string
4739
num_deployments:
4840
description: "Number of Traffic Deployments"
49-
default: 1000
41+
default: 100
5042
type: number
5143
num_replicas:
5244
description: "Number of Traffic Replicas per Deployment"
53-
default: 40
45+
default: 10
5446
type: number
5547
num_netpol:
5648
description: "Number of Network Policies"
57-
default: 1000
49+
default: 100
50+
type: number
51+
num_nodes:
52+
description: "Number of nodes per pool"
53+
default: 100
5854
type: number
5955
cleanup:
6056
description: "Clean up environment after test"
@@ -100,8 +96,10 @@ jobs:
10096
IMAGE_NAMESPACE: ${{ inputs.image_namespace == '' && github.repository || inputs.image_namespace }}
10197
TAG: ${{ inputs.image_tag }}
10298
AZURE_APP_INSIGHTS_KEY: ${{ secrets.AZURE_APP_INSIGHTS_KEY }}
99+
NODES: ${{ inputs.num_nodes }}
100+
CREATE_INFRA: ${{ github.event_name != 'workflow_dispatch' }}
103101
shell: bash
104102
run: |
105103
set -euo pipefail
106-
[[ $TAG == "" ]] && TAG=$(make version)
107-
go test -v ./test/e2e/. -timeout 300m -tags=scale -count=1 -args -create-infra=false -delete-infra=false
104+
[[ $TAG == "" ]] && TAG=$(curl -s https://api.github.com/repos/microsoft/retina/commits | jq -r '.[0].sha' | cut -c1-7)
105+
go test -v ./test/e2e/. -timeout 300m -tags=scale -count=1 -args -create-infra=$(echo $CREATE_INFRA) -delete-infra=$(echo $CREATE_INFRA)

test/e2e/common/common.go

+51-2
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@ package common
66

77
import (
88
"flag"
9-
"os"
109
"os/user"
1110
"path/filepath"
1211
"strconv"
1312
"testing"
1413
"time"
1514

15+
"github.com/microsoft/retina/test/e2e/framework/params"
1616
"github.com/stretchr/testify/require"
1717
)
1818

@@ -31,6 +31,13 @@ var (
3131
Architectures = []string{"amd64", "arm64"}
3232
CreateInfra = flag.Bool("create-infra", true, "create a Resource group, vNET and AKS cluster for testing")
3333
DeleteInfra = flag.Bool("delete-infra", true, "delete a Resource group, vNET and AKS cluster for testing")
34+
ScaleTestInfra = ScaleTestInfraHandler{
35+
location: params.Location,
36+
subscriptionID: params.SubscriptionID,
37+
resourceGroup: params.ResourceGroup,
38+
clusterName: params.ClusterName,
39+
nodes: params.Nodes,
40+
}
3441

3542
// kubeconfig: path to kubeconfig file, in not provided,
3643
// a new k8s cluster will be created
@@ -49,8 +56,50 @@ var (
4956
}
5057
)
5158

59+
type ScaleTestInfraHandler struct {
60+
location string
61+
subscriptionID string
62+
resourceGroup string
63+
clusterName string
64+
nodes string
65+
}
66+
67+
func (s ScaleTestInfraHandler) GetSubscriptionID() string {
68+
return s.subscriptionID
69+
}
70+
71+
func (s ScaleTestInfraHandler) GetLocation() string {
72+
if s.location == "" {
73+
return "westus2"
74+
}
75+
return s.location
76+
}
77+
78+
func (s ScaleTestInfraHandler) GetResourceGroup() string {
79+
if s.resourceGroup != "" {
80+
return s.resourceGroup
81+
}
82+
// Use the cluster name as the resource group name by default.
83+
return s.GetClusterName()
84+
}
85+
86+
func (s ScaleTestInfraHandler) GetNodes() string {
87+
if s.nodes == "" {
88+
// Default to 100 nodes per pool
89+
return "100"
90+
}
91+
return s.nodes
92+
}
93+
94+
func (s ScaleTestInfraHandler) GetClusterName() string {
95+
if s.clusterName != "" {
96+
return s.clusterName
97+
}
98+
return "retina-scale-test"
99+
}
100+
52101
func ClusterNameForE2ETest(t *testing.T) string {
53-
clusterName := os.Getenv("CLUSTER_NAME")
102+
clusterName := params.ClusterName
54103
if clusterName == "" {
55104
curuser, err := user.Current()
56105
require.NoError(t, err)

test/e2e/framework/azure/create-cluster.go

+43-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package azure
33
import (
44
"context"
55
"fmt"
6+
"log"
67
"time"
78

89
"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
@@ -23,6 +24,24 @@ type CreateCluster struct {
2324
ResourceGroupName string
2425
Location string
2526
ClusterName string
27+
podCidr string
28+
vmSize string
29+
networkPluginMode string
30+
Nodes int32
31+
}
32+
33+
func (c *CreateCluster) SetPodCidr(podCidr string) *CreateCluster {
34+
c.podCidr = podCidr
35+
return c
36+
}
37+
38+
func (c *CreateCluster) SetVMSize(vmSize string) *CreateCluster {
39+
c.vmSize = vmSize
40+
return c
41+
}
42+
func (c *CreateCluster) SetNetworkPluginMode(networkPluginMode string) *CreateCluster {
43+
c.networkPluginMode = networkPluginMode
44+
return c
2645
}
2746

2847
func (c *CreateCluster) Run() error {
@@ -36,15 +55,38 @@ func (c *CreateCluster) Run() error {
3655
if err != nil {
3756
return fmt.Errorf("failed to create client: %w", err)
3857
}
58+
if c.Nodes == 0 {
59+
c.Nodes = MaxNumberOfNodes
60+
}
61+
62+
template := GetStarterClusterTemplate(c.Location)
63+
64+
if c.Nodes > 0 {
65+
template.Properties.AgentPoolProfiles[0].Count = to.Ptr(c.Nodes)
66+
}
67+
68+
if c.podCidr != "" {
69+
template.Properties.NetworkProfile.PodCidr = to.Ptr(c.podCidr)
70+
}
71+
72+
if c.vmSize != "" {
73+
template.Properties.AgentPoolProfiles[0].VMSize = to.Ptr(c.vmSize)
74+
}
75+
76+
if c.networkPluginMode != "" {
77+
template.Properties.NetworkProfile.NetworkPluginMode = to.Ptr(armcontainerservice.NetworkPluginMode(c.networkPluginMode))
78+
}
3979

40-
poller, err := clientFactory.NewManagedClustersClient().BeginCreateOrUpdate(ctx, c.ResourceGroupName, c.ClusterName, GetStarterClusterTemplate(c.Location), nil)
80+
log.Printf("creating cluster %s in location %s...", c.ClusterName, c.Location)
81+
poller, err := clientFactory.NewManagedClustersClient().BeginCreateOrUpdate(ctx, c.ResourceGroupName, c.ClusterName, template, nil)
4182
if err != nil {
4283
return fmt.Errorf("failed to finish the create cluster request: %w", err)
4384
}
4485
_, err = poller.PollUntilDone(ctx, nil)
4586
if err != nil {
4687
return fmt.Errorf("failed to pull the create cluster result: %w", err)
4788
}
89+
log.Printf("cluster created %s in location %s...", c.ClusterName, c.Location)
4890

4991
return nil
5092
}

test/e2e/framework/kubernetes/create-kapinger-deployment.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ func (c *CreateKapingerDeployment) GetKapingerDeployment() *appsv1.Deployment {
132132
Containers: []v1.Container{
133133
{
134134
Name: "kapinger",
135-
Image: "acnpublic.azurecr.io/kapinger:20241014.7",
135+
Image: "acnpublic.azurecr.io/kapinger:v0.0.23-9-g23ef222",
136136
Resources: v1.ResourceRequirements{
137137
Requests: v1.ResourceList{
138138
"memory": resource.MustParse("20Mi"),
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
package kubernetes
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"fmt"
7+
"log"
8+
"time"
9+
10+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
11+
"k8s.io/apimachinery/pkg/types"
12+
"k8s.io/client-go/kubernetes"
13+
"k8s.io/client-go/tools/clientcmd"
14+
)
15+
16+
type patchStringValue struct {
17+
Op string `json:"op"`
18+
Path string `json:"path"`
19+
Value string `json:"value"`
20+
}
21+
22+
type LabelNodes struct {
23+
KubeConfigFilePath string
24+
Labels map[string]string
25+
}
26+
27+
func (l *LabelNodes) Prevalidate() error {
28+
return nil
29+
}
30+
31+
func (l *LabelNodes) Run() error {
32+
config, err := clientcmd.BuildConfigFromFlags("", l.KubeConfigFilePath)
33+
if err != nil {
34+
return fmt.Errorf("error building kubeconfig: %w", err)
35+
}
36+
37+
clientset, err := kubernetes.NewForConfig(config)
38+
if err != nil {
39+
return fmt.Errorf("error creating Kubernetes client: %w", err)
40+
}
41+
42+
ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutSeconds*time.Second)
43+
defer cancel()
44+
45+
nodes, err := clientset.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
46+
if err != nil {
47+
return fmt.Errorf("failed to get nodes: %w", err)
48+
}
49+
50+
patch := []patchStringValue{}
51+
for k, v := range l.Labels {
52+
patch = append(patch, patchStringValue{
53+
Op: "add",
54+
Path: "/metadata/labels/" + k,
55+
Value: v,
56+
})
57+
}
58+
b, err := json.Marshal(patch)
59+
if err != nil {
60+
return fmt.Errorf("failed to marshal patch: %w", err)
61+
}
62+
63+
for i := range nodes.Items {
64+
log.Println("Labeling node", nodes.Items[i].Name)
65+
_, err = clientset.CoreV1().Nodes().Patch(ctx, nodes.Items[i].Name, types.JSONPatchType, b, metav1.PatchOptions{})
66+
if err != nil {
67+
return fmt.Errorf("failed to patch pod: %w", err)
68+
}
69+
}
70+
71+
return nil
72+
}
73+
74+
func (l *LabelNodes) Stop() error {
75+
return nil
76+
}

test/e2e/framework/params/params.go

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
package params
2+
3+
import (
4+
"os"
5+
)
6+
7+
var (
8+
Location = os.Getenv("LOCATION")
9+
SubscriptionID = os.Getenv("AZURE_SUBSCRIPTION_ID")
10+
ResourceGroup = os.Getenv("AZURE_RESOURCE_GROUP")
11+
ClusterName = os.Getenv("CLUSTER_NAME")
12+
Nodes = os.Getenv("NODES")
13+
NumDeployments = os.Getenv("NUM_DEPLOYMENTS")
14+
NumReplicas = os.Getenv("NUM_REPLICAS")
15+
NumNetworkPolicies = os.Getenv("NUM_NET_POL")
16+
CleanUp = os.Getenv("CLEANUP")
17+
)

test/e2e/jobs/scale.go

+47
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import (
55
"time"
66

77
"github.com/microsoft/retina/test/e2e/common"
8+
"github.com/microsoft/retina/test/e2e/framework/azure"
9+
"github.com/microsoft/retina/test/e2e/framework/generic"
810
"github.com/microsoft/retina/test/e2e/framework/kubernetes"
911
"github.com/microsoft/retina/test/e2e/framework/scaletest"
1012
"github.com/microsoft/retina/test/e2e/framework/types"
@@ -45,6 +47,51 @@ func DefaultScaleTestOptions() scaletest.Options {
4547
}
4648
}
4749

50+
func GetScaleTestInfra(subID, rg, clusterName, location, kubeConfigFilePath string, nodes int32, createInfra bool) *types.Job {
51+
job := types.NewJob("Get scale test infrastructure")
52+
53+
if createInfra {
54+
job.AddStep(&azure.CreateResourceGroup{
55+
SubscriptionID: subID,
56+
ResourceGroupName: rg,
57+
Location: location,
58+
}, nil)
59+
60+
job.AddStep((&azure.CreateCluster{
61+
ClusterName: clusterName,
62+
Nodes: nodes,
63+
}).
64+
SetPodCidr("100.64.0.0/10").
65+
SetVMSize("Standard_D4_v3").
66+
SetNetworkPluginMode("overlay"), nil)
67+
68+
job.AddStep(&azure.GetAKSKubeConfig{
69+
KubeConfigFilePath: kubeConfigFilePath,
70+
}, nil)
71+
72+
} else {
73+
job.AddStep(&azure.GetAKSKubeConfig{
74+
KubeConfigFilePath: kubeConfigFilePath,
75+
ClusterName: clusterName,
76+
SubscriptionID: subID,
77+
ResourceGroupName: rg,
78+
Location: location,
79+
}, nil)
80+
}
81+
82+
job.AddStep(&kubernetes.LabelNodes{
83+
Labels: map[string]string{"scale-test": "true"},
84+
}, nil)
85+
86+
job.AddStep(&generic.LoadFlags{
87+
TagEnv: generic.DefaultTagEnv,
88+
ImageNamespaceEnv: generic.DefaultImageNamespace,
89+
ImageRegistryEnv: generic.DefaultImageRegistry,
90+
}, nil)
91+
92+
return job
93+
}
94+
4895
func ScaleTest(opt *scaletest.Options) *types.Job {
4996
job := types.NewJob("Scale Test")
5097

0 commit comments

Comments
 (0)