Skip to content

Commit a299180

Browse files
committed
fix gateway node check for centralized ecmp subnets
Signed-off-by: zhangzujian <zhangzujian.7@gmail.com>
1 parent 4875f23 commit a299180

File tree

11 files changed

+126
-34
lines changed

11 files changed

+126
-34
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ dist/images/test-server
55
dist/images/kube-ovn
66
dist/images/kube-ovn-cmd
77
dist/images/kube-ovn-daemon
8+
dist/images/kube-ovn-controller
89
dist/images/kube-ovn-pinger
910
dist/images/kube-ovn-webhook
1011
dist/windows/kube-ovn.exe

Makefile

+2-2
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ build-go:
117117
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build $(GO_BUILD_FLAGS) -o $(CURDIR)/dist/images/kube-ovn -v ./cmd/cni
118118
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build $(GO_BUILD_FLAGS) -buildmode=pie -o $(CURDIR)/dist/images/kube-ovn-cmd -v ./cmd
119119
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build $(GO_BUILD_FLAGS) -buildmode=pie -o $(CURDIR)/dist/images/kube-ovn-daemon -v ./cmd/daemon
120-
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build $(GO_BUILD_FLAGS) -buildmode=pie -o $(CURDIR)/dist/images/kube-ovn-pinger -v ./cmd/pinger
120+
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build $(GO_BUILD_FLAGS) -buildmode=pie -o $(CURDIR)/dist/images/kube-ovn-controller -v ./cmd/controller
121121
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build $(GO_BUILD_FLAGS) -o $(CURDIR)/dist/images/test-server -v ./test/server
122122

123123
.PHONY: build-go-windows
@@ -131,7 +131,7 @@ build-go-arm:
131131
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build $(GO_BUILD_FLAGS) -o $(CURDIR)/dist/images/kube-ovn -v ./cmd/cni
132132
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build $(GO_BUILD_FLAGS) -buildmode=pie -o $(CURDIR)/dist/images/kube-ovn-cmd -v ./cmd
133133
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build $(GO_BUILD_FLAGS) -buildmode=pie -o $(CURDIR)/dist/images/kube-ovn-daemon -v ./cmd/daemon
134-
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build $(GO_BUILD_FLAGS) -buildmode=pie -o $(CURDIR)/dist/images/kube-ovn-pinger -v ./cmd/pinger
134+
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build $(GO_BUILD_FLAGS) -buildmode=pie -o $(CURDIR)/dist/images/kube-ovn-controller -v ./cmd/controller
135135

136136
.PHONY: build-kube-ovn
137137
build-kube-ovn: build-debug build-go

charts/kube-ovn/templates/controller-deploy.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ spec:
147147
capabilities:
148148
add:
149149
- NET_BIND_SERVICE
150+
- NET_RAW
150151
env:
151152
- name: ENABLE_SSL
152153
value: "{{ .Values.networking.ENABLE_SSL }}"

cmd/cmdmain.go

-5
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111

1212
"k8s.io/klog/v2"
1313

14-
"github.com/kubeovn/kube-ovn/cmd/controller"
1514
"github.com/kubeovn/kube-ovn/cmd/health_check"
1615
"github.com/kubeovn/kube-ovn/cmd/ovn_ic_controller"
1716
"github.com/kubeovn/kube-ovn/cmd/ovn_leader_checker"
@@ -22,7 +21,6 @@ import (
2221
)
2322

2423
const (
25-
CmdController = "kube-ovn-controller"
2624
CmdMonitor = "kube-ovn-monitor"
2725
CmdSpeaker = "kube-ovn-speaker"
2826
CmdWebhook = "kube-ovn-webhook"
@@ -91,9 +89,6 @@ func dumpProfile() {
9189
func main() {
9290
cmd := filepath.Base(os.Args[0])
9391
switch cmd {
94-
case CmdController:
95-
dumpProfile()
96-
controller.CmdMain()
9792
case CmdMonitor:
9893
dumpProfile()
9994
ovn_monitor.CmdMain()

cmd/controller/cmdmain.go

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
package main
2+
3+
import (
4+
"fmt"
5+
"os"
6+
"os/signal"
7+
"path/filepath"
8+
"runtime/pprof"
9+
"syscall"
10+
"time"
11+
12+
"k8s.io/klog/v2"
13+
14+
"github.com/kubeovn/kube-ovn/cmd/pinger"
15+
"github.com/kubeovn/kube-ovn/pkg/util"
16+
)
17+
18+
const (
19+
CmdController = "kube-ovn-controller"
20+
CmdPinger = "kube-ovn-pinger"
21+
)
22+
23+
const timeFormat = "2006-01-02_15:04:05"
24+
25+
func dumpProfile() {
26+
ch1 := make(chan os.Signal, 1)
27+
ch2 := make(chan os.Signal, 1)
28+
signal.Notify(ch1, syscall.SIGUSR1)
29+
signal.Notify(ch2, syscall.SIGUSR2)
30+
go func() {
31+
for {
32+
<-ch1
33+
name := fmt.Sprintf("cpu-profile-%s.pprof", time.Now().Format(timeFormat))
34+
path := filepath.Join(os.TempDir(), name)
35+
f, err := os.Create(path) // #nosec G303,G304
36+
if err != nil {
37+
klog.Errorf("failed to create cpu profile file: %v", err)
38+
return
39+
}
40+
if err = pprof.StartCPUProfile(f); err != nil {
41+
klog.Errorf("failed to start cpu profile: %v", err)
42+
if err = f.Close(); err != nil {
43+
klog.Errorf("failed to close file %q: %v", path, err)
44+
}
45+
return
46+
}
47+
time.Sleep(30 * time.Second)
48+
pprof.StopCPUProfile()
49+
if err = f.Close(); err != nil {
50+
klog.Errorf("failed to close file %q: %v", path, err)
51+
return
52+
}
53+
}
54+
}()
55+
go func() {
56+
for {
57+
<-ch2
58+
name := fmt.Sprintf("mem-profile-%s.pprof", time.Now().Format(timeFormat))
59+
path := filepath.Join(os.TempDir(), name)
60+
f, err := os.Create(path) // #nosec G303,G304
61+
if err != nil {
62+
klog.Errorf("failed to create memory profile file: %v", err)
63+
return
64+
}
65+
if err = pprof.WriteHeapProfile(f); err != nil {
66+
klog.Errorf("failed to write memory profile file: %v", err)
67+
if err = f.Close(); err != nil {
68+
klog.Errorf("failed to close file %q: %v", path, err)
69+
}
70+
return
71+
}
72+
if err = f.Close(); err != nil {
73+
klog.Errorf("failed to close file %q: %v", path, err)
74+
return
75+
}
76+
}
77+
}()
78+
}
79+
80+
func main() {
81+
cmd := filepath.Base(os.Args[0])
82+
switch cmd {
83+
case CmdController:
84+
dumpProfile()
85+
CmdMain()
86+
case CmdPinger:
87+
dumpProfile()
88+
pinger.CmdMain()
89+
default:
90+
util.LogFatalAndExit(nil, "%s is an unknown command", cmd)
91+
}
92+
}

cmd/controller/controller.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package controller
1+
package main
22

33
import (
44
"context"

cmd/pinger/pinger.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package main
1+
package pinger
22

33
import (
44
_ "net/http/pprof" // #nosec
@@ -14,7 +14,7 @@ import (
1414
"github.com/kubeovn/kube-ovn/versions"
1515
)
1616

17-
func main() {
17+
func CmdMain() {
1818
defer klog.Flush()
1919

2020
klog.Info(versions.String())

dist/images/Dockerfile

+4-4
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,16 @@ COPY 01-kube-ovn.conflist /kube-ovn/01-kube-ovn.conflist
1010
COPY kube-ovn /kube-ovn/kube-ovn
1111
COPY kube-ovn-cmd /kube-ovn/kube-ovn-cmd
1212
COPY kube-ovn-daemon /kube-ovn/kube-ovn-daemon
13-
COPY kube-ovn-pinger /kube-ovn/kube-ovn-pinger
14-
RUN ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-controller && \
15-
ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-monitor && \
13+
COPY kube-ovn-controller /kube-ovn/kube-ovn-controller
14+
RUN ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-monitor && \
1615
ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-speaker && \
1716
ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-webhook && \
1817
ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-healthcheck && \
1918
ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-leader-checker && \
2019
ln -s /kube-ovn/kube-ovn-cmd /kube-ovn/kube-ovn-ic-controller && \
20+
ln -s /kube-ovn/kube-ovn-controller /kube-ovn/kube-ovn-pinger && \
2121
setcap CAP_NET_BIND_SERVICE+eip /kube-ovn/kube-ovn-cmd && \
22-
setcap CAP_NET_RAW,CAP_NET_BIND_SERVICE+eip /kube-ovn/kube-ovn-pinger && \
22+
setcap CAP_NET_RAW,CAP_NET_BIND_SERVICE+eip /kube-ovn/kube-ovn-controller && \
2323
setcap CAP_NET_ADMIN,CAP_NET_RAW,CAP_NET_BIND_SERVICE,CAP_SYS_ADMIN+eip /kube-ovn/kube-ovn-daemon
2424

2525
FROM kubeovn/kube-ovn-base:$BASE_TAG

dist/images/install.sh

+1
Original file line numberDiff line numberDiff line change
@@ -4732,6 +4732,7 @@ spec:
47324732
capabilities:
47334733
add:
47344734
- NET_BIND_SERVICE
4735+
- NET_RAW
47354736
env:
47364737
- name: ENABLE_SSL
47374738
value: "$ENABLE_SSL"

pkg/controller/controller.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -1263,7 +1263,7 @@ func (c *Controller) startWorkers(ctx context.Context) {
12631263

12641264
go wait.Until(c.resyncProviderNetworkStatus, 30*time.Second, ctx.Done())
12651265
go wait.Until(c.exportSubnetMetrics, 30*time.Second, ctx.Done())
1266-
go wait.Until(c.CheckGatewayReady, 5*time.Second, ctx.Done())
1266+
go wait.Until(c.checkSubnetGateway, 5*time.Second, ctx.Done())
12671267

12681268
go wait.Until(runWorker("add ovn eip", c.addOvnEipQueue, c.handleAddOvnEip), time.Second, ctx.Done())
12691269
go wait.Until(runWorker("update ovn eip", c.updateOvnEipQueue, c.handleUpdateOvnEip), time.Second, ctx.Done())

pkg/controller/node.go

+21-19
Original file line numberDiff line numberDiff line change
@@ -544,27 +544,27 @@ func (c *Controller) handleUpdateNode(key string) error {
544544
return nil
545545
}
546546

547-
func (c *Controller) CheckGatewayReady() {
548-
if err := c.checkGatewayReady(); err != nil {
549-
klog.Errorf("failed to check gateway ready %v", err)
547+
func (c *Controller) checkSubnetGateway() {
548+
if err := c.checkSubnetGatewayNode(); err != nil {
549+
klog.Errorf("failed to check subnet gateway node: %v", err)
550550
}
551551
}
552552

553-
func (c *Controller) checkGatewayReady() error {
554-
klog.V(3).Infoln("start to check gateway status")
553+
func (c *Controller) checkSubnetGatewayNode() error {
554+
klog.V(3).Infoln("start to check subnet gateway node")
555555
subnetList, err := c.subnetsLister.List(labels.Everything())
556556
if err != nil {
557-
klog.Errorf("failed to list subnets %v", err)
557+
klog.Errorf("failed to list subnets: %v", err)
558558
return err
559559
}
560560
nodes, err := c.nodesLister.List(labels.Everything())
561561
if err != nil {
562-
klog.Errorf("failed to list nodes, %v", err)
562+
klog.Errorf("failed to list nodes: %v", err)
563563
return err
564564
}
565565

566566
for _, subnet := range subnetList {
567-
if (subnet.Spec.Vlan != "" && !subnet.Spec.LogicalGateway) ||
567+
if (subnet.Spec.Vlan != "" && (subnet.Spec.U2OInterconnection || !subnet.Spec.LogicalGateway)) ||
568568
subnet.Spec.GatewayNode == "" ||
569569
subnet.Spec.GatewayType != kubeovnv1.GWCentralizedType ||
570570
!subnet.Spec.EnableEcmp {
@@ -602,24 +602,26 @@ func (c *Controller) checkGatewayReady() error {
602602
pinger.Timeout = time.Duration(count) * time.Second
603603
pinger.Interval = 1 * time.Second
604604

605-
success := false
606-
605+
var pingSucceeded bool
607606
pinger.OnRecv = func(_ *goping.Packet) {
608-
success = true
607+
pingSucceeded = true
609608
pinger.Stop()
610609
}
611610
if err = pinger.Run(); err != nil {
612611
klog.Errorf("failed to run pinger for destination %s: %v", ip, err)
613612
return err
614613
}
615614

616-
if !nodeReady(node) {
617-
success = false
618-
}
619-
620-
if !success {
615+
nodeIsReady := nodeReady(node)
616+
if !pingSucceeded || !nodeIsReady {
621617
if exist {
622-
klog.Warningf("failed to ping ovn0 %s or node %s is not ready, delete ecmp policy route for node", ip, node.Name)
618+
if !pingSucceeded {
619+
klog.Warningf("failed to ping ovn0 ip %s on node %s", ip, node.Name)
620+
}
621+
if !nodeIsReady {
622+
klog.Warningf("node %s is not ready", node.Name)
623+
}
624+
klog.Warningf("delete ecmp policy route for node %s ip %s", node.Name, ip)
623625
nextHops.Remove(ip)
624626
delete(nameIPMap, node.Name)
625627
klog.Infof("update policy route for centralized subnet %s, nextHops %s", subnet.Name, nextHops)
@@ -629,7 +631,7 @@ func (c *Controller) checkGatewayReady() error {
629631
}
630632
}
631633
} else {
632-
klog.V(3).Infof("succeed to ping gw %s", ip)
634+
klog.V(3).Infof("succeeded to ping ovn0 ip %s on node %s", ip, node.Name)
633635
if !exist {
634636
nextHops.Add(ip)
635637
if nameIPMap == nil {
@@ -644,7 +646,7 @@ func (c *Controller) checkGatewayReady() error {
644646
}
645647
}
646648
} else if exist {
647-
klog.Infof("subnet %s gatewayNode does not contains node %v, delete policy route for node ip %s", subnet.Name, node.Name, ip)
649+
klog.Infof("subnet %s gateway nodes does not contain node %s, delete policy route for node ip %s", subnet.Name, node.Name, ip)
648650
nextHops.Remove(ip)
649651
delete(nameIPMap, node.Name)
650652
klog.Infof("update policy route for centralized subnet %s, nextHops %s", subnet.Name, nextHops)

0 commit comments

Comments
 (0)