Skip to content

Commit 19683fa

Browse files
committed
fix gateway node check for centralized ecmp subnets (#4847)
Signed-off-by: zhangzujian <zhangzujian.7@gmail.com>
1 parent cdc639d commit 19683fa

File tree

11 files changed

+126
-34
lines changed

11 files changed

+126
-34
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ dist/images/test-server
55
dist/images/kube-ovn
66
dist/images/kube-ovn-cmd
77
dist/images/kube-ovn-daemon
8+
dist/images/kube-ovn-controller
89
dist/images/kube-ovn-pinger
910
dist/images/kube-ovn-webhook
1011
dist/windows/kube-ovn.exe

Makefile

+2-2
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ build-go:
115115
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build $(GO_BUILD_FLAGS) -o $(CURDIR)/dist/images/kube-ovn -v ./cmd/cni
116116
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build $(GO_BUILD_FLAGS) -buildmode=pie -o $(CURDIR)/dist/images/kube-ovn-cmd -v ./cmd
117117
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build $(GO_BUILD_FLAGS) -buildmode=pie -o $(CURDIR)/dist/images/kube-ovn-daemon -v ./cmd/daemon
118-
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build $(GO_BUILD_FLAGS) -buildmode=pie -o $(CURDIR)/dist/images/kube-ovn-pinger -v ./cmd/pinger
118+
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build $(GO_BUILD_FLAGS) -buildmode=pie -o $(CURDIR)/dist/images/kube-ovn-controller -v ./cmd/controller
119119
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build $(GO_BUILD_FLAGS) -o $(CURDIR)/dist/images/test-server -v ./test/server
120120

121121
.PHONY: build-go-windows
@@ -129,7 +129,7 @@ build-go-arm:
129129
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build $(GO_BUILD_FLAGS) -o $(CURDIR)/dist/images/kube-ovn -v ./cmd/cni
130130
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build $(GO_BUILD_FLAGS) -buildmode=pie -o $(CURDIR)/dist/images/kube-ovn-cmd -v ./cmd
131131
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build $(GO_BUILD_FLAGS) -buildmode=pie -o $(CURDIR)/dist/images/kube-ovn-daemon -v ./cmd/daemon
132-
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build $(GO_BUILD_FLAGS) -buildmode=pie -o $(CURDIR)/dist/images/kube-ovn-pinger -v ./cmd/pinger
132+
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build $(GO_BUILD_FLAGS) -buildmode=pie -o $(CURDIR)/dist/images/kube-ovn-controller -v ./cmd/controller
133133

134134
.PHONY: build-kube-ovn
135135
build-kube-ovn: build-debug build-go

charts/kube-ovn/templates/controller-deploy.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ spec:
145145
capabilities:
146146
add:
147147
- NET_BIND_SERVICE
148+
- NET_RAW
148149
env:
149150
- name: ENABLE_SSL
150151
value: "{{ .Values.networking.ENABLE_SSL }}"

cmd/cmdmain.go

-5
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111

1212
"k8s.io/klog/v2"
1313

14-
"github.com/kubeovn/kube-ovn/cmd/controller"
1514
"github.com/kubeovn/kube-ovn/cmd/health_check"
1615
"github.com/kubeovn/kube-ovn/cmd/ovn_ic_controller"
1716
"github.com/kubeovn/kube-ovn/cmd/ovn_leader_checker"
@@ -22,7 +21,6 @@ import (
2221
)
2322

2423
const (
25-
CmdController = "kube-ovn-controller"
2624
CmdMonitor = "kube-ovn-monitor"
2725
CmdSpeaker = "kube-ovn-speaker"
2826
CmdWebhook = "kube-ovn-webhook"
@@ -91,9 +89,6 @@ func dumpProfile() {
9189
func main() {
9290
cmd := filepath.Base(os.Args[0])
9391
switch cmd {
94-
case CmdController:
95-
dumpProfile()
96-
controller.CmdMain()
9792
case CmdMonitor:
9893
dumpProfile()
9994
ovn_monitor.CmdMain()

cmd/controller/cmdmain.go

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
package main
2+
3+
import (
4+
"fmt"
5+
"os"
6+
"os/signal"
7+
"path/filepath"
8+
"runtime/pprof"
9+
"syscall"
10+
"time"
11+
12+
"k8s.io/klog/v2"
13+
14+
"github.com/kubeovn/kube-ovn/cmd/pinger"
15+
"github.com/kubeovn/kube-ovn/pkg/util"
16+
)
17+
18+
const (
19+
CmdController = "kube-ovn-controller"
20+
CmdPinger = "kube-ovn-pinger"
21+
)
22+
23+
const timeFormat = "2006-01-02_15:04:05"
24+
25+
func dumpProfile() {
26+
ch1 := make(chan os.Signal, 1)
27+
ch2 := make(chan os.Signal, 1)
28+
signal.Notify(ch1, syscall.SIGUSR1)
29+
signal.Notify(ch2, syscall.SIGUSR2)
30+
go func() {
31+
for {
32+
<-ch1
33+
name := fmt.Sprintf("cpu-profile-%s.pprof", time.Now().Format(timeFormat))
34+
path := filepath.Join(os.TempDir(), name)
35+
f, err := os.Create(path) // #nosec G303,G304
36+
if err != nil {
37+
klog.Errorf("failed to create cpu profile file: %v", err)
38+
return
39+
}
40+
if err = pprof.StartCPUProfile(f); err != nil {
41+
klog.Errorf("failed to start cpu profile: %v", err)
42+
if err = f.Close(); err != nil {
43+
klog.Errorf("failed to close file %q: %v", path, err)
44+
}
45+
return
46+
}
47+
time.Sleep(30 * time.Second)
48+
pprof.StopCPUProfile()
49+
if err = f.Close(); err != nil {
50+
klog.Errorf("failed to close file %q: %v", path, err)
51+
return
52+
}
53+
}
54+
}()
55+
go func() {
56+
for {
57+
<-ch2
58+
name := fmt.Sprintf("mem-profile-%s.pprof", time.Now().Format(timeFormat))
59+
path := filepath.Join(os.TempDir(), name)
60+
f, err := os.Create(path) // #nosec G303,G304
61+
if err != nil {
62+
klog.Errorf("failed to create memory profile file: %v", err)
63+
return
64+
}
65+
if err = pprof.WriteHeapProfile(f); err != nil {
66+
klog.Errorf("failed to write memory profile file: %v", err)
67+
if err = f.Close(); err != nil {
68+
klog.Errorf("failed to close file %q: %v", path, err)
69+
}
70+
return
71+
}
72+
if err = f.Close(); err != nil {
73+
klog.Errorf("failed to close file %q: %v", path, err)
74+
return
75+
}
76+
}
77+
}()
78+
}
79+
80+
func main() {
81+
cmd := filepath.Base(os.Args[0])
82+
switch cmd {
83+
case CmdController:
84+
dumpProfile()
85+
CmdMain()
86+
case CmdPinger:
87+
dumpProfile()
88+
pinger.CmdMain()
89+
default:
90+
util.LogFatalAndExit(nil, "%s is an unknown command", cmd)
91+
}
92+
}

cmd/controller/controller.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package controller
1+
package main
22

33
import (
44
"context"

cmd/pinger/pinger.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package main
1+
package pinger
22

33
import (
44
_ "net/http/pprof" // #nosec
@@ -14,7 +14,7 @@ import (
1414
"github.com/kubeovn/kube-ovn/versions"
1515
)
1616

17-
func main() {
17+
func CmdMain() {
1818
defer klog.Flush()
1919

2020
klog.Info(versions.String())

dist/images/Dockerfile

+4-4
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,16 @@ COPY 01-kube-ovn.conflist /kube-ovn/01-kube-ovn.conflist
1010
COPY kube-ovn /kube-ovn/kube-ovn
1111
COPY kube-ovn-cmd /kube-ovn/kube-ovn-cmd
1212
COPY kube-ovn-daemon /kube-ovn/kube-ovn-daemon
13-
COPY kube-ovn-pinger /kube-ovn/kube-ovn-pinger
14-
RUN ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-controller && \
15-
ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-monitor && \
13+
COPY kube-ovn-controller /kube-ovn/kube-ovn-controller
14+
RUN ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-monitor && \
1615
ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-speaker && \
1716
ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-webhook && \
1817
ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-healthcheck && \
1918
ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-leader-checker && \
2019
ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-ic-controller && \
20+
ln -s /kube-ovn/kube-ovn-controller /kube-ovn/kube-ovn-pinger && \
2121
setcap CAP_NET_BIND_SERVICE+eip /kube-ovn/kube-ovn-cmd && \
22-
setcap CAP_NET_RAW,CAP_NET_BIND_SERVICE+eip /kube-ovn/kube-ovn-pinger && \
22+
setcap CAP_NET_RAW,CAP_NET_BIND_SERVICE+eip /kube-ovn/kube-ovn-controller && \
2323
setcap CAP_NET_ADMIN,CAP_NET_RAW,CAP_NET_BIND_SERVICE,CAP_SYS_ADMIN+eip /kube-ovn/kube-ovn-daemon
2424

2525
FROM kubeovn/kube-ovn-base:$BASE_TAG

dist/images/install.sh

+1
Original file line numberDiff line numberDiff line change
@@ -4335,6 +4335,7 @@ spec:
43354335
capabilities:
43364336
add:
43374337
- NET_BIND_SERVICE
4338+
- NET_RAW
43384339
env:
43394340
- name: ENABLE_SSL
43404341
value: "$ENABLE_SSL"

pkg/controller/controller.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -1182,7 +1182,7 @@ func (c *Controller) startWorkers(ctx context.Context) {
11821182

11831183
go wait.Until(c.resyncProviderNetworkStatus, 30*time.Second, ctx.Done())
11841184
go wait.Until(c.exportSubnetMetrics, 30*time.Second, ctx.Done())
1185-
go wait.Until(c.CheckGatewayReady, 5*time.Second, ctx.Done())
1185+
go wait.Until(c.checkSubnetGateway, 5*time.Second, ctx.Done())
11861186

11871187
go wait.Until(runWorker("add ovn eip", c.addOvnEipQueue, c.handleAddOvnEip), time.Second, ctx.Done())
11881188
go wait.Until(runWorker("update ovn eip", c.updateOvnEipQueue, c.handleUpdateOvnEip), time.Second, ctx.Done())

pkg/controller/node.go

+21-19
Original file line numberDiff line numberDiff line change
@@ -540,27 +540,27 @@ func (c *Controller) handleUpdateNode(key string) error {
540540
return nil
541541
}
542542

543-
func (c *Controller) CheckGatewayReady() {
544-
if err := c.checkGatewayReady(); err != nil {
545-
klog.Errorf("failed to check gateway ready %v", err)
543+
func (c *Controller) checkSubnetGateway() {
544+
if err := c.checkSubnetGatewayNode(); err != nil {
545+
klog.Errorf("failed to check subnet gateway node: %v", err)
546546
}
547547
}
548548

549-
func (c *Controller) checkGatewayReady() error {
550-
klog.V(3).Infoln("start to check gateway status")
549+
func (c *Controller) checkSubnetGatewayNode() error {
550+
klog.V(3).Infoln("start to check subnet gateway node")
551551
subnetList, err := c.subnetsLister.List(labels.Everything())
552552
if err != nil {
553-
klog.Errorf("failed to list subnets %v", err)
553+
klog.Errorf("failed to list subnets: %v", err)
554554
return err
555555
}
556556
nodes, err := c.nodesLister.List(labels.Everything())
557557
if err != nil {
558-
klog.Errorf("failed to list nodes, %v", err)
558+
klog.Errorf("failed to list nodes: %v", err)
559559
return err
560560
}
561561

562562
for _, subnet := range subnetList {
563-
if (subnet.Spec.Vlan != "" && !subnet.Spec.LogicalGateway) ||
563+
if (subnet.Spec.Vlan != "" && (subnet.Spec.U2OInterconnection || !subnet.Spec.LogicalGateway)) ||
564564
subnet.Spec.GatewayNode == "" ||
565565
subnet.Spec.GatewayType != kubeovnv1.GWCentralizedType ||
566566
!subnet.Spec.EnableEcmp {
@@ -598,24 +598,26 @@ func (c *Controller) checkGatewayReady() error {
598598
pinger.Timeout = time.Duration(count) * time.Second
599599
pinger.Interval = 1 * time.Second
600600

601-
success := false
602-
601+
var pingSucceeded bool
603602
pinger.OnRecv = func(_ *goping.Packet) {
604-
success = true
603+
pingSucceeded = true
605604
pinger.Stop()
606605
}
607606
if err = pinger.Run(); err != nil {
608607
klog.Errorf("failed to run pinger for destination %s: %v", ip, err)
609608
return err
610609
}
611610

612-
if !nodeReady(node) {
613-
success = false
614-
}
615-
616-
if !success {
611+
nodeIsReady := nodeReady(node)
612+
if !pingSucceeded || !nodeIsReady {
617613
if exist {
618-
klog.Warningf("failed to ping ovn0 %s or node %s is not ready, delete ecmp policy route for node", ip, node.Name)
614+
if !pingSucceeded {
615+
klog.Warningf("failed to ping ovn0 ip %s on node %s", ip, node.Name)
616+
}
617+
if !nodeIsReady {
618+
klog.Warningf("node %s is not ready", node.Name)
619+
}
620+
klog.Warningf("delete ecmp policy route for node %s ip %s", node.Name, ip)
619621
nextHops.Remove(ip)
620622
delete(nameIPMap, node.Name)
621623
klog.Infof("update policy route for centralized subnet %s, nextHops %s", subnet.Name, nextHops)
@@ -625,7 +627,7 @@ func (c *Controller) checkGatewayReady() error {
625627
}
626628
}
627629
} else {
628-
klog.V(3).Infof("succeed to ping gw %s", ip)
630+
klog.V(3).Infof("succeeded to ping ovn0 ip %s on node %s", ip, node.Name)
629631
if !exist {
630632
nextHops.Add(ip)
631633
if nameIPMap == nil {
@@ -640,7 +642,7 @@ func (c *Controller) checkGatewayReady() error {
640642
}
641643
}
642644
} else if exist {
643-
klog.Infof("subnet %s gatewayNode does not contains node %v, delete policy route for node ip %s", subnet.Name, node.Name, ip)
645+
klog.Infof("subnet %s gateway nodes does not contain node %s, delete policy route for node ip %s", subnet.Name, node.Name, ip)
644646
nextHops.Remove(ip)
645647
delete(nameIPMap, node.Name)
646648
klog.Infof("update policy route for centralized subnet %s, nextHops %s", subnet.Name, nextHops)

0 commit comments

Comments
 (0)