Skip to content

Commit aacf09b

Browse files
committed
test: automate scale test execution
Signed-off-by: Alex Castilio dos Santos <alexsantos@microsoft.com>
1 parent ca5f362 commit aacf09b

File tree

10 files changed

+284
-44
lines changed

10 files changed

+284
-44
lines changed
+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
name: Daily Scale Test
2+
3+
on:
4+
push:
5+
branches:
6+
- alexcastilio/scale-test-workflow
7+
schedule:
8+
- cron: "0 0 * * *"
9+
10+
permissions:
11+
contents: read
12+
id-token: write
13+
14+
jobs:
15+
call-scale-test:
16+
uses: ./.github/workflows/scale-test.yaml
17+
with:
18+
num_deployments: 30
19+
num_replicas: 10
20+
# TODO: Fix values
21+
num_netpol: 0
22+
num_nodes: 30
23+
cleanup: false
24+
secrets: inherit

.github/workflows/scale-test.yaml

+12-14
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ on:
1515
description: "Image Namespace (if not set, default namespace will be used)"
1616
type: string
1717
image_tag:
18-
description: "Image Tag (if not set, default for this commit will be used)"
18+
description: "Image Tag (if not set, latest commit from 'main' will be used)"
1919
type: string
2020
num_deployments:
2121
description: "Number of Traffic Deployments"
@@ -36,25 +36,21 @@ on:
3636

3737
workflow_call:
3838
inputs:
39-
resource_group:
40-
description: "Azure Resource Group"
41-
required: true
42-
type: string
43-
cluster_name:
44-
description: "AKS Cluster Name"
45-
required: true
46-
type: string
4739
num_deployments:
4840
description: "Number of Traffic Deployments"
49-
default: 1000
41+
default: 100
5042
type: number
5143
num_replicas:
5244
description: "Number of Traffic Replicas per Deployment"
53-
default: 40
45+
default: 10
5446
type: number
5547
num_netpol:
5648
description: "Number of Network Policies"
57-
default: 1000
49+
default: 100
50+
type: number
51+
num_nodes:
52+
description: "Number of nodes per pool"
53+
default: 100
5854
type: number
5955
cleanup:
6056
description: "Clean up environment after test"
@@ -100,8 +96,10 @@ jobs:
10096
IMAGE_NAMESPACE: ${{ inputs.image_namespace == '' && github.repository || inputs.image_namespace }}
10197
TAG: ${{ inputs.image_tag }}
10298
AZURE_APP_INSIGHTS_KEY: ${{ secrets.AZURE_APP_INSIGHTS_KEY }}
99+
NODES_PER_POOL: ${{ inputs.num_nodes }}
100+
CREATE_INFRA: ${{ github.event_name != 'workflow_dispatch' }}
103101
shell: bash
104102
run: |
105103
set -euo pipefail
106-
[[ $TAG == "" ]] && TAG=$(make version)
107-
go test -v ./test/e2e/. -timeout 300m -tags=scale -count=1 -args -create-infra=false -delete-infra=false
104+
[[ $TAG == "" ]] && TAG=$(curl -s https://api.github.com/repos/microsoft/retina/commits | jq -r '.[0].sha' | cut -c1-7)
105+
go test -v ./test/e2e/. -timeout 300m -tags=scale -count=1 -args -create-infra=$(echo $CREATE_INFRA) -delete-infra=$(echo $CREATE_INFRA)

test/e2e/common/common.go

+51-2
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@ package common
66

77
import (
88
"flag"
9-
"os"
109
"os/user"
1110
"path/filepath"
1211
"strconv"
1312
"testing"
1413
"time"
1514

15+
"github.com/microsoft/retina/test/e2e/framework/params"
1616
"github.com/stretchr/testify/require"
1717
)
1818

@@ -31,6 +31,13 @@ var (
3131
Architectures = []string{"amd64", "arm64"}
3232
CreateInfra = flag.Bool("create-infra", true, "create a Resource group, vNET and AKS cluster for testing")
3333
DeleteInfra = flag.Bool("delete-infra", true, "delete a Resource group, vNET and AKS cluster for testing")
34+
ScaleTestInfra = ScaleTestInfraHandler{
35+
location: params.Location,
36+
subscriptionID: params.SubscriptionID,
37+
resourceGroup: params.ResourceGroup,
38+
clusterName: params.ClusterName,
39+
nodesPerPool: params.NodesPerPool,
40+
}
3441

3542
// kubeconfig: path to kubeconfig file, in not provided,
3643
// a new k8s cluster will be created
@@ -49,8 +56,50 @@ var (
4956
}
5057
)
5158

59+
type ScaleTestInfraHandler struct {
60+
location string
61+
subscriptionID string
62+
resourceGroup string
63+
clusterName string
64+
nodesPerPool string
65+
}
66+
67+
func (s ScaleTestInfraHandler) GetSubscriptionID() string {
68+
return s.subscriptionID
69+
}
70+
71+
func (s ScaleTestInfraHandler) GetLocation() string {
72+
if s.location == "" {
73+
return "westus2"
74+
}
75+
return s.location
76+
}
77+
78+
func (s ScaleTestInfraHandler) GetResourceGroup() string {
79+
if s.resourceGroup != "" {
80+
return s.resourceGroup
81+
}
82+
// Use the cluster name as the resource group name by default.
83+
return s.GetClusterName()
84+
}
85+
86+
func (s ScaleTestInfraHandler) GetNodesPerPool() string {
87+
if s.nodesPerPool == "" {
88+
// Default to 100 nodes per pool
89+
return "100"
90+
}
91+
return s.nodesPerPool
92+
}
93+
94+
func (s ScaleTestInfraHandler) GetClusterName() string {
95+
if s.clusterName != "" {
96+
return s.clusterName
97+
}
98+
return "retina-scale-test"
99+
}
100+
52101
func ClusterNameForE2ETest(t *testing.T) string {
53-
clusterName := os.Getenv("CLUSTER_NAME")
102+
clusterName := params.ClusterName
54103
if clusterName == "" {
55104
curuser, err := user.Current()
56105
require.NoError(t, err)

test/e2e/framework/azure/create-cluster.go

+40-1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,24 @@ type CreateCluster struct {
2323
ResourceGroupName string
2424
Location string
2525
ClusterName string
26+
podCidr string
27+
vmSize string
28+
networkPluginMode string
29+
Nodes int32
30+
}
31+
32+
func (c *CreateCluster) SetPodCidr(podCidr string) *CreateCluster {
33+
c.podCidr = podCidr
34+
return c
35+
}
36+
37+
func (c *CreateCluster) SetVMSize(vmSize string) *CreateCluster {
38+
c.vmSize = vmSize
39+
return c
40+
}
41+
func (c *CreateCluster) SetNetworkPluginMode(networkPluginMode string) *CreateCluster {
42+
c.networkPluginMode = networkPluginMode
43+
return c
2644
}
2745

2846
func (c *CreateCluster) Run() error {
@@ -36,8 +54,29 @@ func (c *CreateCluster) Run() error {
3654
if err != nil {
3755
return fmt.Errorf("failed to create client: %w", err)
3856
}
57+
if c.Nodes == 0 {
58+
c.Nodes = MaxNumberOfNodes
59+
}
60+
61+
template := GetStarterClusterTemplate(c.Location)
62+
63+
if c.Nodes > 0 {
64+
template.Properties.AgentPoolProfiles[0].Count = to.Ptr(c.Nodes)
65+
}
66+
67+
if c.podCidr != "" {
68+
template.Properties.NetworkProfile.PodCidr = to.Ptr(c.podCidr)
69+
}
70+
71+
if c.vmSize != "" {
72+
template.Properties.AgentPoolProfiles[0].VMSize = to.Ptr(c.vmSize)
73+
}
74+
75+
if c.networkPluginMode != "" {
76+
template.Properties.NetworkProfile.NetworkPluginMode = to.Ptr(armcontainerservice.NetworkPluginMode(c.networkPluginMode))
77+
}
3978

40-
poller, err := clientFactory.NewManagedClustersClient().BeginCreateOrUpdate(ctx, c.ResourceGroupName, c.ClusterName, GetStarterClusterTemplate(c.Location), nil)
79+
poller, err := clientFactory.NewManagedClustersClient().BeginCreateOrUpdate(ctx, c.ResourceGroupName, c.ClusterName, template, nil)
4180
if err != nil {
4281
return fmt.Errorf("failed to finish the create cluster request: %w", err)
4382
}

test/e2e/framework/azure/enable-ama.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ az aks update --enable-azure-monitor-metrics \
9595
return fmt.Errorf("failed to write cluster JSON to file for AMA: %w", err)
9696
}
9797

98-
poller, err := aksClientFactory.NewManagedClustersClient().BeginCreateOrUpdate(ctx, c.ResourceGroupName, c.ClusterName, GetStarterClusterTemplate(c.Location), nil)
98+
poller, err := aksClientFactory.NewManagedClustersClient().BeginCreateOrUpdate(ctx, c.ResourceGroupName, c.ClusterName, GetStarterClusterTemplate(c.Location, MaxNumberOfNodes), nil)
9999
if err != nil {
100100
return fmt.Errorf("failed to finish the update cluster request for AMA: %w", err)
101101
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
package kubernetes
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"fmt"
7+
"log"
8+
"time"
9+
10+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
11+
"k8s.io/apimachinery/pkg/types"
12+
"k8s.io/client-go/kubernetes"
13+
"k8s.io/client-go/tools/clientcmd"
14+
)
15+
16+
type patchStringValue struct {
17+
Op string `json:"op"`
18+
Path string `json:"path"`
19+
Value string `json:"value"`
20+
}
21+
22+
type LabelNodes struct {
23+
KubeConfigFilePath string
24+
Labels map[string]string
25+
}
26+
27+
func (l *LabelNodes) Prevalidate() error {
28+
return nil
29+
}
30+
31+
func (l *LabelNodes) Run() error {
32+
config, err := clientcmd.BuildConfigFromFlags("", l.KubeConfigFilePath)
33+
if err != nil {
34+
return fmt.Errorf("error building kubeconfig: %w", err)
35+
}
36+
37+
clientset, err := kubernetes.NewForConfig(config)
38+
if err != nil {
39+
return fmt.Errorf("error creating Kubernetes client: %w", err)
40+
}
41+
42+
ctx, cancel := context.WithTimeout(context.Background(), defaultTimeoutSeconds*time.Second)
43+
defer cancel()
44+
45+
nodes, err := clientset.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
46+
if err != nil {
47+
return fmt.Errorf("failed to get nodes: %w", err)
48+
}
49+
50+
patch := []patchStringValue{}
51+
for k, v := range l.Labels {
52+
patch = append(patch, patchStringValue{
53+
Op: "add",
54+
Path: "/metadata/labels/" + k,
55+
Value: v,
56+
})
57+
}
58+
b, err := json.Marshal(patch)
59+
if err != nil {
60+
return fmt.Errorf("failed to marshal patch: %w", err)
61+
}
62+
63+
for i := range nodes.Items {
64+
log.Println("Labeling node", nodes.Items[i].Name)
65+
_, err = clientset.CoreV1().Nodes().Patch(ctx, nodes.Items[i].Name, types.JSONPatchType, b, metav1.PatchOptions{})
66+
if err != nil {
67+
return fmt.Errorf("failed to patch pod: %w", err)
68+
}
69+
}
70+
71+
return nil
72+
}
73+
74+
func (l *LabelNodes) Stop() error {
75+
return nil
76+
}

test/e2e/framework/params/params.go

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
package params
2+
3+
import (
4+
"os"
5+
)
6+
7+
var (
8+
Location = os.Getenv("LOCATION")
9+
SubscriptionID = os.Getenv("AZURE_SUBSCRIPTION_ID")
10+
ResourceGroup = os.Getenv("AZURE_RESOURCE_GROUP")
11+
ClusterName = os.Getenv("CLUSTER_NAME")
12+
NodesPerPool = os.Getenv("NODES_PER_POOL")
13+
NumDeployments = os.Getenv("NUM_DEPLOYMENTS")
14+
NumReplicas = os.Getenv("NUM_REPLICAS")
15+
NumNetworkPolicies = os.Getenv("NUM_NET_POL")
16+
CleanUp = os.Getenv("CLEANUP")
17+
)

test/e2e/jobs/jobs.go

+1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ func CreateTestInfra(subID, rg, clusterName, location, kubeConfigFilePath string
3939
PodCidr: "10.128.0.0/9",
4040
DNSServiceIP: "192.168.0.10",
4141
ServiceCidr: "192.168.0.0/28",
42+
NodesPerPool: 1,
4243
}, nil)
4344

4445
job.AddStep(&azure.GetAKSKubeConfig{

test/e2e/jobs/scale.go

+47
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import (
55
"time"
66

77
"github.com/microsoft/retina/test/e2e/common"
8+
"github.com/microsoft/retina/test/e2e/framework/azure"
9+
"github.com/microsoft/retina/test/e2e/framework/generic"
810
"github.com/microsoft/retina/test/e2e/framework/kubernetes"
911
"github.com/microsoft/retina/test/e2e/framework/scaletest"
1012
"github.com/microsoft/retina/test/e2e/framework/types"
@@ -45,6 +47,51 @@ func DefaultScaleTestOptions() scaletest.Options {
4547
}
4648
}
4749

50+
func GetScaleTestInfra(subID, rg, clusterName, location, kubeConfigFilePath string, nodesPerPool int32, createInfra bool) *types.Job {
51+
job := types.NewJob("Get scale test infrastructure")
52+
53+
if createInfra {
54+
job.AddStep(&azure.CreateResourceGroup{
55+
SubscriptionID: subID,
56+
ResourceGroupName: rg,
57+
Location: location,
58+
}, nil)
59+
60+
job.AddStep((&azure.CreateCluster{
61+
ClusterName: clusterName,
62+
NodesPerPool: nodesPerPool,
63+
}).
64+
SetPodCidr("100.64.0.0/10").
65+
SetVMSize("Standard_D4_v3").
66+
SetNetworkPluginMode("overlay"), nil)
67+
68+
job.AddStep(&azure.GetAKSKubeConfig{
69+
KubeConfigFilePath: kubeConfigFilePath,
70+
}, nil)
71+
72+
} else {
73+
job.AddStep(&azure.GetAKSKubeConfig{
74+
KubeConfigFilePath: kubeConfigFilePath,
75+
ClusterName: clusterName,
76+
SubscriptionID: subID,
77+
ResourceGroupName: rg,
78+
Location: location,
79+
}, nil)
80+
}
81+
82+
job.AddStep(&kubernetes.LabelNodes{
83+
Labels: map[string]string{"scale-test": "true"},
84+
}, nil)
85+
86+
job.AddStep(&generic.LoadFlags{
87+
TagEnv: generic.DefaultTagEnv,
88+
ImageNamespaceEnv: generic.DefaultImageNamespace,
89+
ImageRegistryEnv: generic.DefaultImageRegistry,
90+
}, nil)
91+
92+
return job
93+
}
94+
4895
func ScaleTest(opt *scaletest.Options) *types.Job {
4996
job := types.NewJob("Scale Test")
5097

0 commit comments

Comments
 (0)