Skip to content

Commit cc9f566

Browse files
committed
cni-server: set node NetworkUnavailable condition after join subnet gateway check (kubeovn#4915)
Signed-off-by: zhangzujian <zhangzujian.7@gmail.com>
1 parent 4cf6035 commit cc9f566

File tree

6 files changed

+92
-16
lines changed

6 files changed

+92
-16
lines changed

dist/images/install.sh

+1
Original file line numberDiff line numberDiff line change
@@ -2341,6 +2341,7 @@ rules:
23412341
- pods/exec
23422342
- namespaces
23432343
- nodes
2344+
- nodes/status
23442345
- configmaps
23452346
verbs:
23462347
- create

kubeovn-helm/templates/ovn-CR.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ rules:
4949
- pods/exec
5050
- namespaces
5151
- nodes
52+
- nodes/status
5253
- configmaps
5354
verbs:
5455
- create

pkg/daemon/init.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ func InitNodeGateway(config *Configuration) error {
8888
klog.Errorf("failed to get ip %s with mask %s, %v", ip, cidr, err)
8989
return err
9090
}
91-
return configureNodeNic(portName, ipAddr, gw, cidr, mac, config.MTU)
91+
return configureNodeNic(config.KubeClient, config.NodeName, portName, ipAddr, gw, cidr, mac, config.MTU)
9292
}
9393

9494
func InitMirror(config *Configuration) error {

pkg/daemon/ovs_linux.go

+42-10
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,20 @@ import (
99
"path"
1010
"path/filepath"
1111
"regexp"
12+
"strconv"
1213
"strings"
1314
"syscall"
1415
"time"
1516

16-
"strconv"
17-
1817
"github.com/Mellanox/sriovnet"
1918
sriovutilfs "github.com/Mellanox/sriovnet/pkg/utils/filesystem"
2019
"github.com/containernetworking/plugins/pkg/ns"
2120
"github.com/containernetworking/plugins/pkg/utils/sysctl"
2221
"github.com/vishvananda/netlink"
2322
"golang.org/x/sys/unix"
23+
corev1 "k8s.io/api/core/v1"
2424
"k8s.io/apimachinery/pkg/types"
25+
"k8s.io/client-go/kubernetes"
2526
"k8s.io/klog/v2"
2627

2728
kubeovnv1 "github.com/kubeovn/kube-ovn/pkg/apis/kubeovn/v1"
@@ -419,7 +420,6 @@ func configureContainerNic(nicName, ifName string, ipAddr, gateway string, isDef
419420
}
420421

421422
return ns.WithNetNSPath(netns.Path(), func(_ ns.NetNS) error {
422-
423423
if nicType != util.InternalType {
424424
if err = netlink.LinkSetName(containerLink, ifName); err != nil {
425425
klog.Error(err)
@@ -610,7 +610,7 @@ func waitNetworkReady(nic, ipAddr, gateway string, underlayGateway, verbose bool
610610
return nil
611611
}
612612

613-
func configureNodeNic(portName, ip, gw, joinCIDR string, macAddr net.HardwareAddr, mtu int) error {
613+
func configureNodeNic(cs kubernetes.Interface, nodeName, portName, ip, gw, joinCIDR string, macAddr net.HardwareAddr, mtu int) error {
614614
ipStr := util.GetIpWithoutMask(ip)
615615
raw, err := ovs.Exec(ovs.MayExist, "add-port", "br-int", util.NodeNic, "--",
616616
"set", "interface", util.NodeNic, "type=internal", "--",
@@ -687,13 +687,21 @@ func configureNodeNic(portName, ip, gw, joinCIDR string, macAddr net.HardwareAdd
687687
}
688688

689689
// ping ovn0 gw to activate the flow
690-
klog.Infof("wait ovn0 gw ready")
691-
if err := waitNetworkReady(util.NodeNic, ip, gw, false, true, gatewayCheckMaxRetry); err != nil {
690+
klog.Info("wait ovn0 gw ready")
691+
status := corev1.ConditionFalse
692+
reason := "JoinSubnetGatewayReachable"
693+
message := fmt.Sprintf("ping check to gateway ip %s succeeded", gw)
694+
if err = waitNetworkReady(util.NodeNic, ip, gw, false, true, gatewayCheckMaxRetry); err != nil {
692695
klog.Errorf("failed to init ovn0 check: %v", err)
693-
return err
696+
status = corev1.ConditionTrue
697+
reason = "JoinSubnetGatewayUnreachable"
698+
message = fmt.Sprintf("ping check to gateway ip %s failed", gw)
699+
}
700+
if err := util.SetNodeNetworkUnavailableCondition(cs, nodeName, status, reason, message); err != nil {
701+
klog.Errorf("failed to set node network unavailable condition: %v", err)
694702
}
695703

696-
return nil
704+
return err
697705
}
698706

699707
// If OVS restart, the ovn0 port will down and prevent host to pod network,
@@ -715,7 +723,31 @@ func (c *Controller) loopOvn0Check() {
715723
}
716724
ip := node.Annotations[util.IpAddressAnnotation]
717725
gw := node.Annotations[util.GatewayAnnotation]
718-
if err := waitNetworkReady(util.NodeNic, ip, gw, false, false, gatewayCheckMaxRetry); err != nil {
726+
status := corev1.ConditionFalse
727+
reason := "JoinSubnetGatewayReachable"
728+
message := fmt.Sprintf("ping check to gateway ip %s succeeded", gw)
729+
if err = waitNetworkReady(util.NodeNic, ip, gw, false, false, 5); err != nil {
730+
klog.Errorf("failed to init ovn0 check: %v", err)
731+
status = corev1.ConditionTrue
732+
reason = "JoinSubnetGatewayUnreachable"
733+
message = fmt.Sprintf("ping check to gateway ip %s failed", gw)
734+
}
735+
736+
var alreadySet bool
737+
for _, condition := range node.Status.Conditions {
738+
if condition.Type == corev1.NodeNetworkUnavailable && condition.Status == corev1.ConditionTrue &&
739+
condition.Reason == reason && condition.Message == message {
740+
alreadySet = true
741+
break
742+
}
743+
}
744+
if !alreadySet {
745+
if err := util.SetNodeNetworkUnavailableCondition(c.config.KubeClient, c.config.NodeName, status, reason, message); err != nil {
746+
klog.Errorf("failed to set node network unavailable condition: %v", err)
747+
}
748+
}
749+
750+
if err != nil {
719751
util.LogFatalAndExit(err, "failed to ping ovn0 gateway %s", gw)
720752
}
721753
}
@@ -1162,7 +1194,7 @@ func setupVethPair(containerID, ifName string, mtu int) (string, string, error)
11621194
// Setup sriov interface in the pod
11631195
// https://github.com/ovn-org/ovn-kubernetes/commit/6c96467d0d3e58cab05641293d1c1b75e5914795
11641196
func setupSriovInterface(containerID, deviceID, vfDriver, ifName string, mtu int, mac string) (string, string, error) {
1165-
var isVfioPciDriver = false
1197+
isVfioPciDriver := false
11661198
if vfDriver == "vfio-pci" {
11671199
matches, err := filepath.Glob(filepath.Join(util.VfioSysDir, "*"))
11681200
if err != nil {

pkg/daemon/ovs_windows.go

+15-5
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ import (
99

1010
"github.com/Microsoft/hcsshim"
1111
"github.com/containernetworking/plugins/pkg/hns"
12+
corev1 "k8s.io/api/core/v1"
1213
"k8s.io/apimachinery/pkg/types"
14+
"k8s.io/client-go/kubernetes"
1315
"k8s.io/klog/v2"
1416

1517
kubeovnv1 "github.com/kubeovn/kube-ovn/pkg/apis/kubeovn/v1"
@@ -258,7 +260,7 @@ func waitNetworkReady(nic, ipAddr, gateway string, underlayGateway, verbose bool
258260
return nil
259261
}
260262

261-
func configureNodeNic(portName, ip, gw, joinCIDR string, macAddr net.HardwareAddr, mtu int) error {
263+
func configureNodeNic(cs kubernetes.Interface, nodeName, portName, ip, gw, joinCIDR string, macAddr net.HardwareAddr, mtu int) error {
262264
ipStr := util.GetIpWithoutMask(ip)
263265
raw, err := ovs.Exec(ovs.MayExist, "add-port", "br-int", util.NodeNic, "--",
264266
"set", "interface", util.NodeNic, "type=internal", "--",
@@ -321,12 +323,20 @@ func configureNodeNic(portName, ip, gw, joinCIDR string, macAddr net.HardwareAdd
321323
}
322324

323325
// ping ovn0 gw to activate the flow
324-
klog.Infof("wait ovn0 gw ready")
325-
if err := waitNetworkReady(util.NodeNic, ip, gw, false, true, gatewayCheckMaxRetry); err != nil {
326+
klog.Info("wait ovn0 gw ready")
327+
status := corev1.ConditionFalse
328+
reason := "JoinSubnetGatewayReachable"
329+
message := fmt.Sprintf("ping check to gateway ip %s succeeded", gw)
330+
if err = waitNetworkReady(util.NodeNic, ip, gw, false, true, gatewayCheckMaxRetry); err != nil {
326331
klog.Errorf("failed to init ovn0 check: %v", err)
327-
return err
332+
status = corev1.ConditionTrue
333+
reason = "JoinSubnetGatewayUnreachable"
334+
message = fmt.Sprintf("ping check to gateway ip %s failed", gw)
328335
}
329-
return nil
336+
if err := util.SetNodeNetworkUnavailableCondition(cs, nodeName, status, reason, message); err != nil {
337+
klog.Errorf("failed to set node network unavailable condition: %v", err)
338+
}
339+
return err
330340
}
331341

332342
// If OVS restart, the ovn0 port will down and prevent host to pod network,

pkg/util/k8s.go

+32
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
package util
22

33
import (
4+
"context"
5+
"encoding/json"
46
"fmt"
57
"net"
68
"net/url"
79
"strings"
810
"time"
911

1012
v1 "k8s.io/api/core/v1"
13+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
14+
"k8s.io/client-go/kubernetes"
1115
"k8s.io/klog/v2"
1216
)
1317

@@ -62,3 +66,31 @@ func ServiceClusterIPs(svc v1.Service) []string {
6266
}
6367
return ips
6468
}
69+
70+
func SetNodeNetworkUnavailableCondition(cs kubernetes.Interface, nodeName string, status v1.ConditionStatus, reason, message string) error {
71+
now := metav1.NewTime(time.Now())
72+
patch := map[string]map[string][]v1.NodeCondition{
73+
"status": {
74+
"conditions": []v1.NodeCondition{{
75+
Type: v1.NodeNetworkUnavailable,
76+
Status: status,
77+
Reason: reason,
78+
Message: message,
79+
LastTransitionTime: now,
80+
LastHeartbeatTime: now,
81+
}},
82+
},
83+
}
84+
data, err := json.Marshal(patch)
85+
if err != nil {
86+
klog.Errorf("failed to marshal patch data: %v", err)
87+
return err
88+
}
89+
90+
if _, err = cs.CoreV1().Nodes().PatchStatus(context.Background(), nodeName, data); err != nil {
91+
klog.Errorf("failed to patch node %s: %v", nodeName, err)
92+
return err
93+
}
94+
95+
return nil
96+
}

0 commit comments

Comments
 (0)