Skip to content

Commit 377d56d

Browse files
committed
Enable set probe (#3145)
* enable set ovs probe Signed-off-by: bobz965 <zhangbingbing2_yewu@cmss.chinamobile.com>
1 parent a7af897 commit 377d56d

File tree

8 files changed

+84
-39
lines changed

8 files changed

+84
-39
lines changed

charts/templates/central-deploy.yaml

+6-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ spec:
4444
- name: ovn-central
4545
image: {{ .Values.global.registry.address }}/{{ .Values.global.images.kubeovn.repository }}:{{ .Values.global.images.kubeovn.tag }}
4646
imagePullPolicy: {{ .Values.image.pullPolicy }}
47-
command: ["/kube-ovn/start-db.sh"]
47+
command:
48+
- /kube-ovn/start-db.sh
4849
securityContext:
4950
capabilities:
5051
add: ["SYS_NICE"]
@@ -71,6 +72,10 @@ spec:
7172
fieldPath: status.podIPs
7273
- name: ENABLE_BIND_LOCAL_IP
7374
value: "{{- .Values.func.ENABLE_BIND_LOCAL_IP }}"
75+
- name: PROBE_INTERVAL
76+
value: "{{ .Values.networking.PROBE_INTERVAL }}"
77+
- name: OVN_LEADER_PROBE_INTERVAL
78+
value: "{{ .Values.networking.OVN_LEADER_PROBE_INTERVAL }}"
7479
resources:
7580
requests:
7681
cpu: {{ index .Values "ovn-central" "requests" "cpu" }}

charts/templates/ovsovn-ds.yaml

+6-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ spec:
4444
{{- if .Values.DPDK }}
4545
command: ["/kube-ovn/start-ovs-dpdk.sh"]
4646
{{- else }}
47-
command: ["/kube-ovn/start-ovs.sh"]
47+
command:
48+
- /kube-ovn/start-ovs.sh
4849
{{- end }}
4950
securityContext:
5051
runAsUser: 0
@@ -74,6 +75,10 @@ spec:
7475
fieldPath: spec.nodeName
7576
- name: OVN_DB_IPS
7677
value: "{{ .Values.MASTER_NODES }}"
78+
- name: OVN_REMOTE_PROBE_INTERVAL
79+
value: "{{ .Values.networking.OVN_REMOTE_PROBE_INTERVAL }}"
80+
- name: OVN_REMOTE_OPENFLOW_INTERVAL
81+
value: "{{ .Values.networking.OVN_REMOTE_OPENFLOW_INTERVAL }}"
7782
volumeMounts:
7883
- mountPath: /var/run/netns
7984
name: host-ns

charts/values.yaml

+4
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@ networking:
4747
ENABLE_ECMP: false
4848
ENABLE_METRICS: true
4949
NODE_LOCAL_DNS_IP: ""
50+
PROBE_INTERVAL: 180000
51+
OVN_LEADER_PROBE_INTERVAL: 5
52+
OVN_REMOTE_PROBE_INTERVAL: 10000
53+
OVN_REMOTE_OPENFLOW_INTERVAL: 180
5054

5155
func:
5256
ENABLE_LB: true

dist/images/install.sh

+17-9
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,7 @@ POD_NIC_TYPE="veth-pair" # veth-pair or internal-port
8080
POD_DEFAULT_FIP_TYPE="" # iptables, pod can set iptables fip automatically by enable fip annotation
8181

8282
# VLAN Config only take effect when NETWORK_TYPE is vlan
83-
PROVIDER_NAME="provider"
8483
VLAN_INTERFACE_NAME=""
85-
VLAN_NAME="ovn-vlan"
8684
VLAN_ID="100"
8785

8886
if [ "$ENABLE_VLAN" = "true" ]; then
@@ -134,7 +132,7 @@ then
134132
--with-dpdk=*)
135133
DPDK=true
136134
DPDK_VERSION="${1#*=}"
137-
if [[ ! "${DPDK_SUPPORTED_VERSIONS[@]}" = "${DPDK_VERSION}" ]] || [[ -z "${DPDK_VERSION}" ]]; then
135+
if [[ ! "${DPDK_SUPPORTED_VERSIONS[*]}" = "${DPDK_VERSION}" ]] || [[ -z "${DPDK_VERSION}" ]]; then
138136
echo "Unsupported DPDK version: ${DPDK_VERSION}"
139137
echo "Supported DPDK versions: ${DPDK_SUPPORTED_VERSIONS[*]}"
140138
exit 1
@@ -201,17 +199,17 @@ fi
201199
echo "[Step 1/6] Label kube-ovn-master node and label datapath type"
202200
count=$(kubectl get no -l$LABEL --no-headers | wc -l)
203201
node_label="$LABEL"
204-
if [ $count -eq 0 ]; then
202+
if [ "${count}" -eq 0 ]; then
205203
count=$(kubectl get no -l$DEPRECATED_LABEL --no-headers | wc -l)
206204
node_label="$DEPRECATED_LABEL"
207-
if [ $count -eq 0 ]; then
205+
if [ "${count}" -eq 0 ]; then
208206
echo "ERROR: No node with label $LABEL or $DEPRECATED_LABEL found"
209207
exit 1
210208
fi
211209
fi
212210
kubectl label no -l$node_label kube-ovn/role=master --overwrite
213211

214-
if [ "$DPDK" = "true" -o "$HYBRID_DPDK" = "true" ]; then
212+
if [ "$DPDK" = "true" ] || [ "$HYBRID_DPDK" = "true" ]; then
215213
kubectl label no -lovn.kubernetes.io/ovs_dp_type!=userspace ovn.kubernetes.io/ovs_dp_type=kernel --overwrite
216214
fi
217215

@@ -3204,7 +3202,8 @@ spec:
32043202
- name: ovn-central
32053203
image: "$REGISTRY/kube-ovn:$VERSION"
32063204
imagePullPolicy: $IMAGE_PULL_POLICY
3207-
command: ["/kube-ovn/start-db.sh"]
3205+
command:
3206+
- /kube-ovn/start-db.sh
32083207
securityContext:
32093208
capabilities:
32103209
add: ["SYS_NICE"]
@@ -3233,6 +3232,10 @@ spec:
32333232
value: "$ENABLE_BIND_LOCAL_IP"
32343233
- name: DEBUG_WRAPPER
32353234
value: "$DEBUG_WRAPPER"
3235+
- name: PROBE_INTERVAL
3236+
value: "180000"
3237+
- name: OVN_LEADER_PROBE_INTERVAL
3238+
value: "5"
32363239
resources:
32373240
requests:
32383241
cpu: 300m
@@ -3516,7 +3519,8 @@ spec:
35163519
- name: openvswitch
35173520
image: "$REGISTRY/kube-ovn:$VERSION"
35183521
imagePullPolicy: $IMAGE_PULL_POLICY
3519-
command: ["/kube-ovn/start-ovs.sh"]
3522+
command:
3523+
- /kube-ovn/start-ovs.sh
35203524
securityContext:
35213525
runAsUser: 0
35223526
privileged: true
@@ -3547,6 +3551,10 @@ spec:
35473551
value: $addresses
35483552
- name: DEBUG_WRAPPER
35493553
value: "$DEBUG_WRAPPER"
3554+
- name: OVN_REMOTE_PROBE_INTERVAL
3555+
value: "10000"
3556+
- name: OVN_REMOTE_OPENFLOW_INTERVAL
3557+
value: "180"
35503558
volumeMounts:
35513559
- mountPath: /var/run/netns
35523560
name: host-ns
@@ -4519,7 +4527,7 @@ if ! sh -c "echo \":$PATH:\" | grep -q \":/usr/local/bin:\""; then
45194527
fi
45204528

45214529
echo "[Step 6/6] Run network diagnose"
4522-
kubectl cp kube-system/$(kubectl -n kube-system get pods -o wide | grep cni | awk '{print $1}' | awk 'NR==1{print}'):/kube-ovn/kubectl-ko /usr/local/bin/kubectl-ko
4530+
kubectl cp kube-system/"$(kubectl -n kube-system get pods -o wide | grep cni | awk '{print $1}' | awk 'NR==1{print}')":/kube-ovn/kubectl-ko /usr/local/bin/kubectl-ko
45234531
chmod +x /usr/local/bin/kubectl-ko
45244532
kubectl ko diagnose all
45254533

dist/images/start-db.sh

+13-8
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ set -eo pipefail
44
DEBUG_WRAPPER=${DEBUG_WRAPPER:-}
55
DEBUG_OPT="--ovn-northd-wrapper=$DEBUG_WRAPPER --ovsdb-nb-wrapper=$DEBUG_WRAPPER --ovsdb-sb-wrapper=$DEBUG_WRAPPER"
66

7+
echo "PROBE_INTERVAL is set to $PROBE_INTERVAL"
8+
echo "OVN_LEADER_PROBE_INTERVAL is set to $OVN_LEADER_PROBE_INTERVAL"
9+
710
# https://bugs.launchpad.net/neutron/+bug/1776778
811
if grep -q "3.10.0-862" /proc/version
912
then
@@ -224,11 +227,12 @@ if [[ "$ENABLE_SSL" == "false" ]]; then
224227
if [[ -z "$NODE_IPS" ]]; then
225228
/usr/share/ovn/scripts/ovn-ctl restart_northd
226229
ovn-nbctl --no-leader-only set-connection ptcp:"${NB_PORT}":["${DB_ADDR}"]
227-
ovn-nbctl --no-leader-only set Connection . inactivity_probe=180000
230+
ovn-nbctl --no-leader-only set Connection . inactivity_probe=${PROBE_INTERVAL}
231+
ovn-nbctl --no-leader-only set NB_Global . options:northd_probe_interval=${PROBE_INTERVAL}
228232
ovn-nbctl --no-leader-only set NB_Global . options:use_logical_dp_groups=true
229233

230234
ovn-sbctl --no-leader-only set-connection ptcp:"${SB_PORT}":["${DB_ADDR}"]
231-
ovn-sbctl --no-leader-only set Connection . inactivity_probe=180000
235+
ovn-sbctl --no-leader-only set Connection . inactivity_probe=${PROBE_INTERVAL}
232236
else
233237
if [[ ! "$NODE_IPS" =~ "$DB_CLUSTER_ADDR" ]]; then
234238
echo "ERROR! host ip $DB_CLUSTER_ADDR not in env NODE_IPS $NODE_IPS"
@@ -272,7 +276,9 @@ if [[ "$ENABLE_SSL" == "false" ]]; then
272276
/etc/ovn/ovnsb_local_config.db
273277
/usr/share/ovn/scripts/ovn-ctl $ovn_ctl_args \
274278
--ovn-manage-ovsdb=no start_northd
275-
ovn-nbctl --no-leader-only set NB_Global . options:northd_probe_interval=180000
279+
ovn-nbctl --no-leader-only set NB_Global . options:inactivity_probe=${PROBE_INTERVAL}
280+
ovn-sbctl --no-leader-only set SB_Global . options:inactivity_probe=${PROBE_INTERVAL}
281+
ovn-nbctl --no-leader-only set NB_Global . options:northd_probe_interval=${PROBE_INTERVAL}
276282
ovn-nbctl --no-leader-only set NB_Global . options:use_logical_dp_groups=true
277283
else
278284
# known leader always first
@@ -352,11 +358,11 @@ else
352358
--ovn-northd-ssl-ca-cert=/var/run/tls/cacert \
353359
restart_northd
354360
ovn-nbctl --no-leader-only -p /var/run/tls/key -c /var/run/tls/cert -C /var/run/tls/cacert set-connection pssl:"${NB_PORT}":["${DB_ADDR}"]
355-
ovn-nbctl --no-leader-only -p /var/run/tls/key -c /var/run/tls/cert -C /var/run/tls/cacert set Connection . inactivity_probe=180000
361+
ovn-nbctl --no-leader-only -p /var/run/tls/key -c /var/run/tls/cert -C /var/run/tls/cacert set Connection . inactivity_probe=${PROBE_INTERVAL}
356362
ovn-nbctl --no-leader-only -p /var/run/tls/key -c /var/run/tls/cert -C /var/run/tls/cacert set NB_Global . options:use_logical_dp_groups=true
357363

358364
ovn-sbctl --no-leader-only -p /var/run/tls/key -c /var/run/tls/cert -C /var/run/tls/cacert set-connection pssl:"${SB_PORT}":["${DB_ADDR}"]
359-
ovn-sbctl --no-leader-only -p /var/run/tls/key -c /var/run/tls/cert -C /var/run/tls/cacert set Connection . inactivity_probe=180000
365+
ovn-sbctl --no-leader-only -p /var/run/tls/key -c /var/run/tls/cert -C /var/run/tls/cacert set Connection . inactivity_probe=${PROBE_INTERVAL}
360366
else
361367
if [[ ! "$NODE_IPS" =~ "$DB_CLUSTER_ADDR" ]]; then
362368
echo "ERROR! host ip $DB_CLUSTER_ADDR not in env NODE_IPS $NODE_IPS"
@@ -408,7 +414,7 @@ else
408414
/etc/ovn/ovnsb_local_config.db
409415
/usr/share/ovn/scripts/ovn-ctl $ovn_ctl_args \
410416
--ovn-manage-ovsdb=no start_northd
411-
ovn-nbctl --no-leader-only -p /var/run/tls/key -c /var/run/tls/cert -C /var/run/tls/cacert set NB_Global . options:northd_probe_interval=180000
417+
ovn-nbctl --no-leader-only -p /var/run/tls/key -c /var/run/tls/cert -C /var/run/tls/cacert set NB_Global . options:northd_probe_interval=${PROBE_INTERVAL}
412418
ovn-nbctl --no-leader-only -p /var/run/tls/key -c /var/run/tls/cert -C /var/run/tls/cacert set NB_Global . options:use_logical_dp_groups=true
413419
else
414420
# get leader if cluster exists
@@ -486,5 +492,4 @@ ovs-appctl -t /var/run/ovn/ovnnb_db.ctl ovsdb-server/memory-trim-on-compaction o
486492
ovs-appctl -t /var/run/ovn/ovnsb_db.ctl ovsdb-server/memory-trim-on-compaction on
487493

488494
chmod 600 /etc/ovn/*
489-
/kube-ovn/kube-ovn-leader-checker
490-
495+
/kube-ovn/kube-ovn-leader-checker --probeInterval=${OVN_LEADER_PROBE_INTERVAL}

dist/images/start-ovs.sh

+19-16
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
#!/bin/bash
22
set -euo pipefail
33

4+
echo "OVN_REMOTE_PROBE_INTERVAL is set to $OVN_REMOTE_PROBE_INTERVAL"
5+
echo "OVN_REMOTE_OPENFLOW_INTERVAL is set to $OVN_REMOTE_OPENFLOW_INTERVAL"
6+
47
HW_OFFLOAD=${HW_OFFLOAD:-false}
58
ENABLE_SSL=${ENABLE_SSL:-false}
69
OVN_DB_IPS=${OVN_DB_IPS:-}
@@ -36,21 +39,21 @@ cat /proc/cmdline"
3639
fi
3740

3841
function cgroup_match {
39-
hash1=$(md5sum /proc/$1/cgroup | awk '{print $1}')
40-
hash2=$(md5sum /proc/$2/cgroup | awk '{print $1}')
42+
hash1=$(md5sum /proc/"$1"/cgroup | awk '{print $1}')
43+
hash2=$(md5sum /proc/"$2"/cgroup | awk '{print $1}')
4144
test -n "$hash1" -a "x$hash1" = "x$hash2"
4245
}
4346

4447
function quit {
45-
gen_name=$(kubectl -n $POD_NAMESPACE get pod $POD_NAME -o jsonpath='{.metadata.generateName}')
46-
revision_hash=$(kubectl -n $POD_NAMESPACE get pod $POD_NAME -o jsonpath='{.metadata.labels.controller-revision-hash}')
47-
revision=$(kubectl -n $POD_NAMESPACE get controllerrevision $gen_name$revision_hash -o jsonpath='{.revision}')
48+
gen_name=$(kubectl -n "${POD_NAMESPACE}" get pod "${POD_NAME}" -o jsonpath='{.metadata.generateName}')
49+
revision_hash=$(kubectl -n "${POD_NAMESPACE}" get pod "${POD_NAME}" -o jsonpath='{.metadata.labels.controller-revision-hash}')
50+
revision=$(kubectl -n "${POD_NAMESPACE}" get controllerrevision "${gen_name}${revision_hash}" -o jsonpath='{.revision}')
4851
ds_name=${gen_name%-}
4952
latest_revision=$(kubectl -n kube-system get controllerrevision --no-headers | awk '$2 == "daemonset.apps/'$ds_name'" {print $3}' | sort -nr | head -n1)
5053
if [ "x$latest_revision" = "x$revision" ]; then
5154
# stop ovn-controller/ovs only when the processes are in the same cgroup
5255
pid=$(/usr/share/ovn/scripts/ovn-ctl status_controller | awk '{print $NF}')
53-
if cgroup_match $pid self; then
56+
if cgroup_match "${pid}" self; then
5457
/usr/share/ovn/scripts/grace_stop_ovn_controller
5558
/usr/share/openvswitch/scripts/ovs-ctl stop
5659
fi
@@ -64,12 +67,12 @@ trap quit EXIT
6467
iptables -V
6568

6669
# Start ovsdb
67-
/usr/share/openvswitch/scripts/ovs-ctl restart --no-ovs-vswitchd --system-id=random --ovsdb-server-wrapper=$DEBUG_WRAPPER
70+
/usr/share/openvswitch/scripts/ovs-ctl restart --no-ovs-vswitchd --system-id=random --ovsdb-server-wrapper="${DEBUG_WRAPPER}"
6871
# Restrict the number of pthreads ovs-vswitchd creates to reduce the
6972
# amount of RSS it uses on hosts with many cores
7073
# https://bugzilla.redhat.com/show_bug.cgi?id=1571379
7174
# https://bugzilla.redhat.com/show_bug.cgi?id=1572797
72-
if [[ `nproc` -gt 12 ]]; then
75+
if [[ $(nproc) -gt 12 ]]; then
7376
ovs-vsctl --no-wait set Open_vSwitch . other_config:n-revalidator-threads=4
7477
ovs-vsctl --no-wait set Open_vSwitch . other_config:n-handler-threads=10
7578
fi
@@ -87,28 +90,28 @@ ovs-appctl -t "$ovsdb_server_ctl" vlog/set reconnect:file:err
8790

8891
function handle_underlay_bridges() {
8992
bridges=($(ovs-vsctl --no-heading --columns=name find bridge external-ids:vendor=kube-ovn))
90-
for br in ${bridges[@]}; do
91-
if ! ip link show $br >/dev/null; then
93+
for br in "${bridges[@]}"; do
94+
if ! ip link show "$br" >/dev/null; then
9295
# the bridge does not exist, leave it to be handled by kube-ovn-cni
9396
echo "deleting ovs bridge $br"
94-
ovs-vsctl --no-wait del-br $br
97+
ovs-vsctl --no-wait del-br "$br"
9598
fi
9699
done
97100

98101
bridges=($(ovs-vsctl --no-heading --columns=name find bridge external-ids:vendor=kube-ovn external-ids:exchange-link-name=true))
99-
for br in ${bridges[@]}; do
102+
for br in "${bridges[@]}"; do
100103
if [ -z $(ip link show $br type openvswitch 2>/dev/null || true) ]; then
101104
# the bridge does not exist, leave it to be handled by kube-ovn-cni
102105
echo "deleting ovs bridge $br"
103-
ovs-vsctl --no-wait del-br $br
106+
ovs-vsctl --no-wait del-br "$br"
104107
fi
105108
done
106109
}
107110

108111
handle_underlay_bridges
109112

110113
# Start vswitchd. restart will automatically set/unset flow-restore-wait which is not what we want
111-
/usr/share/openvswitch/scripts/ovs-ctl restart --no-ovsdb-server --system-id=random --no-mlockall --ovs-vswitchd-wrapper=$DEBUG_WRAPPER
114+
/usr/share/openvswitch/scripts/ovs-ctl restart --no-ovsdb-server --system-id=random --no-mlockall --ovs-vswitchd-wrapper="$DEBUG_WRAPPER"
112115
/usr/share/openvswitch/scripts/ovs-ctl --protocol=udp --dport=6081 enable-protocol
113116

114117
function gen_conn_str {
@@ -137,9 +140,9 @@ ovs-vsctl set open . external-ids:hostname="${KUBE_NODE_NAME}"
137140

138141
# Start ovn-controller
139142
if [[ "$ENABLE_SSL" == "false" ]]; then
140-
/usr/share/ovn/scripts/ovn-ctl --ovn-controller-wrapper=$DEBUG_WRAPPER restart_controller
143+
/usr/share/ovn/scripts/ovn-ctl --ovn-controller-wrapper="$DEBUG_WRAPPER" restart_controller
141144
else
142-
/usr/share/ovn/scripts/ovn-ctl --ovn-controller-ssl-key=/var/run/tls/key --ovn-controller-ssl-cert=/var/run/tls/cert --ovn-controller-ssl-ca-cert=/var/run/tls/cacert --ovn-controller-wrapper=$DEBUG_WRAPPER restart_controller
145+
/usr/share/ovn/scripts/ovn-ctl --ovn-controller-ssl-key=/var/run/tls/key --ovn-controller-ssl-cert=/var/run/tls/cert --ovn-controller-ssl-ca-cert=/var/run/tls/cacert --ovn-controller-wrapper="$DEBUG_WRAPPER" restart_controller
143146
fi
144147

145148
chmod 600 /etc/openvswitch/*

yamls/ovn-dpdk.yaml

+7-2
Original file line numberDiff line numberDiff line change
@@ -204,9 +204,10 @@ spec:
204204
hostNetwork: true
205205
containers:
206206
- name: ovn-central
207-
image: "kubeovn/kube-ovn:v1.10.0"
207+
image: "kubeovn/kube-ovn:v1.12.0"
208208
imagePullPolicy: IfNotPresent
209-
command: ["/kube-ovn/start-db.sh"]
209+
command:
210+
- /kube-ovn/start-db.sh
210211
securityContext:
211212
capabilities:
212213
add: ["SYS_NICE"]
@@ -231,6 +232,10 @@ spec:
231232
valueFrom:
232233
fieldRef:
233234
fieldPath: status.podIPs
235+
- name: PROBE_INTERVAL
236+
value: "180000"
237+
- name: OVN_LEADER_PROBE_INTERVAL
238+
value: "5"
234239
resources:
235240
requests:
236241
cpu: 500m

yamls/ovn-ha.yaml

+12-2
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,8 @@ spec:
9595
- name: ovn-central
9696
image: "kubeovn/kube-ovn:v1.12.0"
9797
imagePullPolicy: IfNotPresent
98-
command: ["/kube-ovn/start-db.sh"]
98+
command:
99+
- /kube-ovn/start-db.sh
99100
securityContext:
100101
capabilities:
101102
add: ["SYS_NICE"]
@@ -122,6 +123,10 @@ spec:
122123
fieldPath: status.podIPs
123124
- name: ENABLE_BIND_LOCAL_IP
124125
value: "true"
126+
- name: PROBE_INTERVAL
127+
value: "180000"
128+
- name: OVN_LEADER_PROBE_INTERVAL
129+
value: "5"
125130
resources:
126131
requests:
127132
cpu: 300m
@@ -238,7 +243,8 @@ spec:
238243
- name: openvswitch
239244
image: "kubeovn/kube-ovn:v1.12.0"
240245
imagePullPolicy: IfNotPresent
241-
command: ["/kube-ovn/start-ovs.sh"]
246+
command:
247+
- /kube-ovn/start-ovs.sh
242248
securityContext:
243249
runAsUser: 0
244250
privileged: true
@@ -265,6 +271,10 @@ spec:
265271
valueFrom:
266272
fieldRef:
267273
fieldPath: spec.nodeName
274+
- name: OVN_REMOTE_PROBE_INTERVAL
275+
value: "10000"
276+
- name: OVN_REMOTE_OPENFLOW_INTERVAL
277+
value: "180"
268278
volumeMounts:
269279
- mountPath: /var/run/netns
270280
name: host-ns

0 commit comments

Comments
 (0)