Skip to content

Commit c84a974

Browse files
authored
Kubectl ko diagnose perf release 1.11 (#2967)
* kubectl ko diagenose perf add to install.sh * diagnose subnet and kubectl ko perf refactor
1 parent 6325c83 commit c84a974

File tree

7 files changed

+291
-7
lines changed

7 files changed

+291
-7
lines changed

cmd/daemon/cniserver.go

+17
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,23 @@ func CmdMain() {
111111
}
112112
}
113113
}
114+
115+
if config.EnableVerboseConnCheck {
116+
go func() {
117+
connListenaddr := fmt.Sprintf("%s:%d", addr, config.TCPConnCheckPort)
118+
if err := util.TCPConnectivityListen(connListenaddr); err != nil {
119+
util.LogFatalAndExit(err, "failed to start TCP listen on addr %s ", addr)
120+
}
121+
}()
122+
123+
go func() {
124+
connListenaddr := fmt.Sprintf("%s:%d", addr, config.UDPConnCheckPort)
125+
if err := util.UDPConnectivityListen(connListenaddr); err != nil {
126+
util.LogFatalAndExit(err, "failed to start UDP listen on addr %s ", addr)
127+
}
128+
}()
129+
}
130+
114131
// conform to Gosec G114
115132
// https://github.com/securego/gosec#available-rules
116133
server := &http.Server{

cmd/pinger/pinger.go

+16
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,22 @@ func CmdMain() {
3636
}
3737
util.LogFatalAndExit(server.ListenAndServe(), "failed to listen and serve on %s", server.Addr)
3838
}()
39+
40+
if config.EnableVerboseConnCheck {
41+
go func() {
42+
addr := fmt.Sprintf("0.0.0.0:%d", config.TCPConnCheckPort)
43+
if err := util.TCPConnectivityListen(addr); err != nil {
44+
util.LogFatalAndExit(err, "failed to start TCP listen on addr %s ", addr)
45+
}
46+
}()
47+
48+
go func() {
49+
addr := fmt.Sprintf("0.0.0.0:%d", config.UDPConnCheckPort)
50+
if err := util.UDPConnectivityListen(addr); err != nil {
51+
util.LogFatalAndExit(err, "failed to start UDP listen on addr %s ", addr)
52+
}
53+
}()
54+
}
3955
}
4056
e := pinger.NewExporter(config)
4157
pinger.StartPinger(config, e)

dist/images/kubectl-ko

+119-6
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ REGISTRY="kubeovn"
1010
OVN_NORTHD_POD=
1111
PERF_TIMES=5
1212
PERF_LABEL="PerfTest"
13+
CONN_CHECK_LABEL="conn-check"
14+
CONN_CHECK_SERVER="conn-check-server"
1315

1416
showHelp(){
1517
echo "kubectl ko {subcommand} [option...]"
@@ -25,7 +27,7 @@ showHelp(){
2527
echo " trace ... trace ovn microflow of specific packet"
2628
echo " trace {namespace/podname} {target ip address} [target mac address] {icmp|tcp|udp} [target tcp/udp port] trace ICMP/TCP/UDP"
2729
echo " trace {namespace/podname} {target ip address} [target mac address] arp {request|reply} trace ARP request/reply"
28-
echo " diagnose {all|node} [nodename] diagnose connectivity of all nodes or a specific node"
30+
echo " diagnose {all|node|subnet} [nodename|subnetName] diagnose connectivity of all nodes or a specific node or specify subnet's ds pod"
2931
echo " env-check check the environment configuration"
3032
echo " tuning {install-fastpath|local-install-fastpath|remove-fastpath|install-stt|local-install-stt|remove-stt} {centos7|centos8}} [kernel-devel-version] deploy kernel optimisation components to the system"
3133
echo " reload restart all kube-ovn components"
@@ -475,6 +477,67 @@ checkLeader(){
475477
echo "ovn-$component leader check ok"
476478
}
477479

480+
applyConnServerDaemonset(){
481+
subnetName=$1
482+
483+
if [ $(kubectl get subnet $subnetName | wc -l) -eq 0 ]; then
484+
echo "no subnet $subnetName exists !!"
485+
exit 1
486+
fi
487+
488+
imageID=$(kubectl get ds -n $KUBE_OVN_NS kube-ovn-pinger -o jsonpath={.spec.template.spec.containers[0].image})
489+
tmpFileName="conn-server.yaml"
490+
cat <<EOF > $tmpFileName
491+
kind: DaemonSet
492+
apiVersion: apps/v1
493+
metadata:
494+
name: $subnetName-$CONN_CHECK_SERVER
495+
namespace: $KUBE_OVN_NS
496+
spec:
497+
selector:
498+
matchLabels:
499+
app: $CONN_CHECK_LABEL
500+
template:
501+
metadata:
502+
annotations:
503+
ovn.kubernetes.io/logical_switch: $subnetName
504+
labels:
505+
app: $CONN_CHECK_LABEL
506+
spec:
507+
serviceAccountName: ovn
508+
containers:
509+
- name: $subnetName-$CONN_CHECK_SERVER
510+
imagePullPolicy: IfNotPresent
511+
image: $imageID
512+
command:
513+
- /kube-ovn/kube-ovn-pinger
514+
args:
515+
- --enable-verbose-conn-check=true
516+
env:
517+
- name: POD_NAME
518+
valueFrom:
519+
fieldRef:
520+
fieldPath: metadata.name
521+
EOF
522+
kubectl apply -f $tmpFileName
523+
rm $tmpFileName
524+
525+
isfailed=true
526+
for i in {0..59}
527+
do
528+
if kubectl wait pod --for=condition=Ready -l app=$CONN_CHECK_LABEL -n $KUBE_OVN_NS ; then
529+
isfailed=false
530+
break
531+
fi
532+
sleep 1; \
533+
done
534+
535+
if $isfailed; then
536+
echo "Error ds $subnetName-$CONN_CHECK_SERVER pod not ready"
537+
return
538+
fi
539+
}
540+
478541
diagnose(){
479542
kubectl get crd vpcs.kubeovn.io
480543
kubectl get crd vpc-nat-gateways.kubeovn.io
@@ -562,9 +625,27 @@ diagnose(){
562625
echo "### finish diagnose node $nodeName"
563626
echo ""
564627
;;
628+
subnet)
629+
subnetName="$2"
630+
applyConnServerDaemonset $subnetName
631+
632+
if [ $(kubectl get ds kube-ovn-cni -n $KUBE_OVN_NS -oyaml | grep enable-verbose-conn-check | wc -l) -eq 0 ]; then
633+
echo "Warning: kube-ovn-cni not have args enable-verbose-conn-check, it will fail when check node tcp/udp connectivity"
634+
fi
635+
636+
pingers=$(kubectl -n $KUBE_OVN_NS get po --no-headers -o custom-columns=NAME:.metadata.name -l app=kube-ovn-pinger)
637+
for pinger in $pingers
638+
do
639+
echo "#### pinger diagnose results:"
640+
kubectl exec -n $KUBE_OVN_NS "$pinger" -- /kube-ovn/kube-ovn-pinger --mode=job --ds-name=$subnetName-$CONN_CHECK_SERVER --ds-namespace=$KUBE_OVN_NS --enable-verbose-conn-check=true
641+
echo ""
642+
done
643+
644+
kubectl delete ds $subnetName-$CONN_CHECK_SERVER -n $KUBE_OVN_NS
645+
;;
565646
*)
566647
echo "type $type not supported"
567-
echo "kubectl ko diagnose {all|node} [nodename]"
648+
echo "kubectl ko diagnose {all|node|subnet} [nodename|subnetName]"
568649
;;
569650
esac
570651
}
@@ -1121,6 +1202,9 @@ perf(){
11211202
echo "Start doing pod multicast network performance"
11221203
multicastPerfTest
11231204

1205+
echo "Start doing host multicast network performance"
1206+
multicastHostPerfTest
1207+
11241208
echo "Start doing leader recover time test"
11251209
checkLeaderRecover
11261210

@@ -1149,6 +1233,34 @@ unicastPerfTest() {
11491233
rm temp_perf_result.log
11501234
}
11511235

1236+
getAddressNic() {
1237+
podName=$1
1238+
ipAddress=$2
1239+
1240+
interface=$(kubectl exec $podName -n $KUBE_OVN_NS -- ip -o addr show | awk '{split($4, a, "/"); print $2, a[1]}' | awk -v ip="$ipAddress" '$0 ~ ip {print $1}')
1241+
echo "$interface"
1242+
}
1243+
1244+
multicastHostPerfTest() {
1245+
clientNode=$(kubectl get pod test-host-client -n $KUBE_OVN_NS -o jsonpath={.spec.nodeName})
1246+
serverNode=$(kubectl get pod test-host-server -n $KUBE_OVN_NS -o jsonpath={.spec.nodeName})
1247+
1248+
clientHostIP=$(kubectl get pod test-host-client -n $KUBE_OVN_NS -o jsonpath={.status.hostIP})
1249+
serverHostIP=$(kubectl get pod test-host-server -n $KUBE_OVN_NS -o jsonpath={.status.hostIP})
1250+
1251+
clientNic=$(getAddressNic test-host-client $clientHostIP)
1252+
serverNic=$(getAddressNic test-host-server $serverHostIP)
1253+
1254+
clientovsPod=$(kubectl get pod -owide -A |grep ovs-ovn | grep $clientNode | awk '{print $2}')
1255+
kubectl exec $clientovsPod -n kube-system -- ip maddr add 01:00:5e:00:00:64 dev $clientNic
1256+
serverovsPod=$(kubectl get pod -owide -A |grep ovs-ovn | grep $serverNode | awk '{print $2}')
1257+
kubectl exec $serverovsPod -n kube-system -- ip maddr add 01:00:5e:00:00:64 dev $serverNic
1258+
genMulticastPerfResult test-host-server test-host-client
1259+
1260+
kubectl exec $clientovsPod -n kube-system -- ip maddr del 01:00:5e:00:00:64 dev $clientNic
1261+
kubectl exec $serverovsPod -n kube-system -- ip maddr del 01:00:5e:00:00:64 dev $serverNic
1262+
}
1263+
11521264
multicastPerfTest() {
11531265
clientNode=$(kubectl get pod test-client -n $KUBE_OVN_NS -o jsonpath={.spec.nodeName})
11541266
serverNode=$(kubectl get pod test-server -n $KUBE_OVN_NS -o jsonpath={.spec.nodeName})
@@ -1158,13 +1270,14 @@ multicastPerfTest() {
11581270
kubectl exec $clientovsPod -n kube-system -- ip netns exec $clientNs ip maddr add 01:00:5e:00:00:64 dev eth0
11591271
serverovsPod=$(kubectl get pod -owide -A |grep ovs-ovn | grep $serverNode | awk '{print $2}')
11601272
kubectl exec $serverovsPod -n kube-system -- ip netns exec $serverNs ip maddr add 01:00:5e:00:00:64 dev eth0
1161-
genMulticastPerfResult test-server
1273+
genMulticastPerfResult test-server test-client
11621274
kubectl exec $clientovsPod -n kube-system -- ip netns exec $clientNs ip maddr del 01:00:5e:00:00:64 dev eth0
11631275
kubectl exec $serverovsPod -n kube-system -- ip netns exec $serverNs ip maddr del 01:00:5e:00:00:64 dev eth0
11641276
}
11651277

11661278
genMulticastPerfResult() {
11671279
serverName=$1
1280+
clientName=$2
11681281

11691282
start_server_cmd="iperf -s -B 224.0.0.100 -i 1 -u"
11701283
kubectl exec $serverName -n $KUBE_OVN_NS -- $start_server_cmd > $serverName.log &
@@ -1173,10 +1286,10 @@ genMulticastPerfResult() {
11731286
printf "%-15s %-15s %-15s %-15s\n" "Size" "UDP Latency" "UDP Lost Rate" "UDP Bandwidth"
11741287
for size in "64" "128" "512" "1k" "4k"
11751288
do
1176-
kubectl exec test-client -n $KUBE_OVN_NS -- iperf -c 224.0.0.100 -u -T 32 -t $PERF_TIMES -i 1 -b 1000G -l $size > /dev/null
1289+
kubectl exec $clientName -n $KUBE_OVN_NS -- iperf -c 224.0.0.100 -u -T 32 -t $PERF_TIMES -i 1 -b 1000G -l $size > /dev/null
11771290
udpBw=$(cat $serverName.log | grep -oP '\d+\.?\d* [KMG]bits/sec' | tail -n 1)
11781291
udpLostRate=$(cat $serverName.log |grep -oP '\(\d+(\.\d+)?%\)' | tail -n 1)
1179-
kubectl exec test-client -n $KUBE_OVN_NS -- iperf -c 224.0.0.100 -u -T 32 -t $PERF_TIMES -i 1 -l $size > /dev/null
1292+
kubectl exec $clientName -n $KUBE_OVN_NS -- iperf -c 224.0.0.100 -u -T 32 -t $PERF_TIMES -i 1 -l $size > /dev/null
11801293
udpLat=$(cat $serverName.log | grep -oP '\d+\.?\d* ms' | tail -n 1)
11811294
printf "%-15s %-15s %-15s %-15s\n" "$size" "$udpLat" "$udpLostRate" "$udpBw"
11821295
done
@@ -1218,7 +1331,7 @@ getPodRecoverTime(){
12181331
while [ $availableNum != $replicas ]
12191332
do
12201333
availableNum=$(kubectl get deployment -n kube-system | grep ovn-central | awk {'print $4'})
1221-
usleep 0.001
1334+
sleep 0.001
12221335
done
12231336

12241337
end_time=$(date +%s.%N)

pkg/daemon/config.go

+10
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ type Configuration struct {
5858
ExternalGatewaySwitch string // provider network underlay vlan subnet
5959
EnableMetrics bool
6060
EnableArpDetectIPConflict bool
61+
EnableVerboseConnCheck bool
62+
TCPConnCheckPort int
63+
UDPConnCheckPort int
6164
}
6265

6366
// ParseFlags will parse cmd args then init kubeClient and configuration
@@ -90,6 +93,10 @@ func ParseFlags() *Configuration {
9093
argExternalGatewaySwitch = pflag.String("external-gateway-switch", "external", "The name of the external gateway switch which is a ovs bridge to provide external network, default: external")
9194
argEnableMetrics = pflag.Bool("enable-metrics", true, "Whether to support metrics query")
9295
argEnableArpDetectIPConflict = pflag.Bool("enable-arp-detect-ip-conflict", true, "Whether to support arp detect ip conflict in vlan network")
96+
97+
argEnableVerboseConnCheck = pflag.Bool("enable-verbose-conn-check", false, "enable TCP/UDP connectivity check listen port")
98+
argTCPConnectivityCheckPort = pflag.Int("tcp-conn-check-port", 8100, "TCP connectivity Check Port")
99+
argUDPConnectivityCheckPort = pflag.Int("udp-conn-check-port", 8101, "UDP connectivity Check Port")
93100
)
94101

95102
// mute info log for ipset lib
@@ -139,6 +146,9 @@ func ParseFlags() *Configuration {
139146
ExternalGatewaySwitch: *argExternalGatewaySwitch,
140147
EnableMetrics: *argEnableMetrics,
141148
EnableArpDetectIPConflict: *argEnableArpDetectIPConflict,
149+
EnableVerboseConnCheck: *argEnableVerboseConnCheck,
150+
TCPConnCheckPort: *argTCPConnectivityCheckPort,
151+
UDPConnCheckPort: *argUDPConnectivityCheckPort,
142152
}
143153
return config
144154
}

pkg/pinger/config.go

+13-1
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,19 @@ type Configuration struct {
5050
ServiceVswitchdFilePidPath string
5151
ServiceOvnControllerFileLogPath string
5252
ServiceOvnControllerFilePidPath string
53+
EnableVerboseConnCheck bool
54+
TCPConnCheckPort int
55+
UDPConnCheckPort int
5356
}
5457

5558
func ParseFlags() (*Configuration, error) {
5659
var (
57-
argPort = pflag.Int("port", 8080, "metrics port")
60+
argPort = pflag.Int("port", 8080, "metrics port")
61+
62+
argEnableVerboseConnCheck = pflag.Bool("enable-verbose-conn-check", false, "enable TCP/UDP connectivity check")
63+
argTCPConnectivityCheckPort = pflag.Int("tcp-conn-check-port", 8100, "TCP connectivity Check Port")
64+
argUDPConnectivityCheckPort = pflag.Int("udp-conn-check-port", 8101, "UDP connectivity Check Port")
65+
5866
argKubeConfigFile = pflag.String("kubeconfig", "", "Path to kubeconfig file with authorization and master location information. If not set use the inCluster token.")
5967
argDaemonSetNameSpace = pflag.String("ds-namespace", "kube-system", "kube-ovn-pinger daemonset namespace")
6068
argDaemonSetName = pflag.String("ds-name", "kube-ovn-pinger", "kube-ovn-pinger daemonset name")
@@ -119,6 +127,10 @@ func ParseFlags() (*Configuration, error) {
119127
NetworkMode: *argNetworkMode,
120128
EnableMetrics: *argEnableMetrics,
121129

130+
EnableVerboseConnCheck: *argEnableVerboseConnCheck,
131+
TCPConnCheckPort: *argTCPConnectivityCheckPort,
132+
UDPConnCheckPort: *argUDPConnectivityCheckPort,
133+
122134
// OVS Monitor
123135
PollTimeout: *argPollTimeout,
124136
PollInterval: *argPollInterval,

pkg/pinger/ping.go

+31
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,21 @@ func pingNodes(config *Configuration) error {
9292
for _, addr := range no.Status.Addresses {
9393
if addr.Type == v1.NodeInternalIP && util.ContainsString(config.PodProtocols, util.CheckProtocol(addr.Address)) {
9494
func(nodeIP, nodeName string) {
95+
if config.EnableVerboseConnCheck {
96+
if err := util.TCPConnectivityCheck(fmt.Sprintf("%s:%d", nodeIP, config.TCPConnCheckPort)); err != nil {
97+
klog.Infof("TCP connnectivity to node %s %s failed", nodeName, nodeIP)
98+
pingErr = err
99+
} else {
100+
klog.Infof("TCP connnectivity to node %s %s success", nodeName, nodeIP)
101+
}
102+
if err := util.UDPConnectivityCheck(fmt.Sprintf("%s:%d", nodeIP, config.UDPConnCheckPort)); err != nil {
103+
klog.Infof("UDP connnectivity to node %s %s failed", nodeName, nodeIP)
104+
pingErr = err
105+
} else {
106+
klog.Infof("UDP connnectivity to node %s %s success", nodeName, nodeIP)
107+
}
108+
}
109+
95110
pinger, err := goping.NewPinger(nodeIP)
96111
if err != nil {
97112
klog.Errorf("failed to init pinger, %v", err)
@@ -143,6 +158,22 @@ func pingPods(config *Configuration) error {
143158
for _, podIP := range pod.Status.PodIPs {
144159
if util.ContainsString(config.PodProtocols, util.CheckProtocol(podIP.IP)) {
145160
func(podIp, podName, nodeIP, nodeName string) {
161+
if config.EnableVerboseConnCheck {
162+
if err := util.TCPConnectivityCheck(fmt.Sprintf("%s:%d", podIp, config.TCPConnCheckPort)); err != nil {
163+
klog.Infof("TCP connnectivity to pod %s %s failed", podName, podIp)
164+
pingErr = err
165+
} else {
166+
klog.Infof("TCP connnectivity to pod %s %s success", podName, podIp)
167+
}
168+
169+
if err := util.UDPConnectivityCheck(fmt.Sprintf("%s:%d", podIp, config.UDPConnCheckPort)); err != nil {
170+
klog.Infof("UDP connnectivity to pod %s %s failed", podName, podIp)
171+
pingErr = err
172+
} else {
173+
klog.Infof("UDP connnectivity to pod %s %s success", podName, podIp)
174+
}
175+
}
176+
146177
pinger, err := goping.NewPinger(podIp)
147178
if err != nil {
148179
klog.Errorf("failed to init pinger, %v", err)

0 commit comments

Comments
 (0)