Skip to content

Commit b70b16c

Browse files
zhaocongqizhangzujian
authored andcommitted
fix: kube-ovn-controller cannot be ready when ENABLE_METRICS is false (#4886)
* fix: kube-ovn-controller cannot be ready when ENABLE_METRICS is false * ovn-monitor: refactor pprof to metrics --------- Signed-off-by: zhaocongqi <1229896069@qq.com>
1 parent a6123dd commit b70b16c

File tree

11 files changed

+128
-45
lines changed

11 files changed

+128
-45
lines changed

charts/kube-ovn/templates/controller-deploy.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ spec:
194194
- /kube-ovn/kube-ovn-healthcheck
195195
- --port=10660
196196
- --tls={{- .Values.func.SECURE_SERVING }}
197+
- --enable-metrics={{- .Values.networking.ENABLE_METRICS }}
197198
periodSeconds: 3
198199
timeoutSeconds: 5
199200
livenessProbe:
@@ -202,6 +203,7 @@ spec:
202203
- /kube-ovn/kube-ovn-healthcheck
203204
- --port=10660
204205
- --tls={{- .Values.func.SECURE_SERVING }}
206+
- --enable-metrics={{- .Values.networking.ENABLE_METRICS }}
205207
initialDelaySeconds: 300
206208
periodSeconds: 7
207209
failureThreshold: 5
@@ -229,4 +231,3 @@ spec:
229231
secret:
230232
optional: true
231233
secretName: kube-ovn-tls
232-

charts/kube-ovn/templates/monitor-deploy.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ spec:
6767
- --logtostderr=false
6868
- --alsologtostderr=true
6969
- --log_file_max_size=200
70+
- --enable-metrics={{- .Values.networking.ENABLE_METRICS }}
7071
securityContext:
7172
runAsUser: {{ include "kubeovn.runAsUser" . }}
7273
privileged: false
@@ -130,6 +131,7 @@ spec:
130131
- /kube-ovn/kube-ovn-healthcheck
131132
- --port=10661
132133
- --tls={{- .Values.func.SECURE_SERVING }}
134+
- --enable-metrics={{- .Values.networking.ENABLE_METRICS }}
133135
timeoutSeconds: 5
134136
readinessProbe:
135137
failureThreshold: 3
@@ -141,6 +143,7 @@ spec:
141143
- /kube-ovn/kube-ovn-healthcheck
142144
- --port=10661
143145
- --tls={{- .Values.func.SECURE_SERVING }}
146+
- --enable-metrics={{- .Values.networking.ENABLE_METRICS }}
144147
timeoutSeconds: 5
145148
nodeSelector:
146149
kubernetes.io/os: "linux"

charts/kube-ovn/templates/ovncni-ds.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ spec:
206206
- /kube-ovn/kube-ovn-healthcheck
207207
- --port=10665
208208
- --tls={{- .Values.func.SECURE_SERVING }}
209+
- --enable-metrics={{- .Values.networking.ENABLE_METRICS }}
209210
timeoutSeconds: 5
210211
livenessProbe:
211212
failureThreshold: 3
@@ -217,6 +218,7 @@ spec:
217218
- /kube-ovn/kube-ovn-healthcheck
218219
- --port=10665
219220
- --tls={{- .Values.func.SECURE_SERVING }}
221+
- --enable-metrics={{- .Values.networking.ENABLE_METRICS }}
220222
timeoutSeconds: 5
221223
resources:
222224
requests:

cmd/controller/controller.go

+29-8
Original file line numberDiff line numberDiff line change
@@ -84,15 +84,36 @@ func CmdMain() {
8484
}()
8585
}
8686

87-
if !config.EnableMetrics {
88-
return
89-
}
90-
metrics.InitKlogMetrics()
91-
metrics.InitClientGoMetrics()
92-
addr := util.JoinHostPort(metricsAddr, config.PprofPort)
93-
if err := metrics.Run(ctx, config.KubeRestConfig, addr, config.SecureServing, servePprofInMetricsServer); err != nil {
94-
util.LogFatalAndExit(err, "failed to run metrics server")
87+
if config.EnableMetrics {
88+
metrics.InitKlogMetrics()
89+
metrics.InitClientGoMetrics()
90+
addr := util.JoinHostPort(metricsAddr, config.PprofPort)
91+
if err := metrics.Run(ctx, config.KubeRestConfig, addr, config.SecureServing, servePprofInMetricsServer); err != nil {
92+
util.LogFatalAndExit(err, "failed to run metrics server")
93+
}
94+
} else {
95+
klog.Info("metrics server is disabled")
96+
listerner, err := net.ListenTCP("tcp", &net.TCPAddr{IP: net.ParseIP(metricsAddr), Port: int(config.PprofPort)})
97+
if err != nil {
98+
util.LogFatalAndExit(err, "failed to listen on %s", util.JoinHostPort(metricsAddr, config.PprofPort))
99+
}
100+
svr := manager.Server{
101+
Name: "health-check",
102+
Server: &http.Server{
103+
Handler: http.NewServeMux(),
104+
MaxHeaderBytes: 1 << 20,
105+
IdleTimeout: 90 * time.Second,
106+
ReadHeaderTimeout: 32 * time.Second,
107+
},
108+
Listener: listerner,
109+
}
110+
go func() {
111+
if err = svr.Start(ctx); err != nil {
112+
util.LogFatalAndExit(err, "failed to run health check server")
113+
}
114+
}()
95115
}
116+
96117
<-ctx.Done()
97118
}()
98119

cmd/daemon/cniserver.go

+29-6
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,6 @@ import (
2929
func main() {
3030
defer klog.Flush()
3131

32-
daemon.InitMetrics()
33-
metrics.InitKlogMetrics()
34-
3532
config := daemon.ParseFlags()
3633
klog.Info(versions.String())
3734

@@ -149,10 +146,36 @@ func main() {
149146
}()
150147
}
151148

152-
listenAddr := util.JoinHostPort(addr, config.PprofPort)
153-
if err = metrics.Run(ctx, nil, listenAddr, config.SecureServing, servePprofInMetricsServer); err != nil {
154-
util.LogFatalAndExit(err, "failed to run metrics server")
149+
if config.EnableMetrics {
150+
daemon.InitMetrics()
151+
metrics.InitKlogMetrics()
152+
listenAddr := util.JoinHostPort(addr, config.PprofPort)
153+
if err = metrics.Run(ctx, nil, listenAddr, config.SecureServing, servePprofInMetricsServer); err != nil {
154+
util.LogFatalAndExit(err, "failed to run metrics server")
155+
}
156+
} else {
157+
klog.Info("metrics server is disabled")
158+
listerner, err := net.ListenTCP("tcp", &net.TCPAddr{IP: net.ParseIP(addr), Port: int(config.PprofPort)})
159+
if err != nil {
160+
util.LogFatalAndExit(err, "failed to listen on %s", util.JoinHostPort(addr, config.PprofPort))
161+
}
162+
svr := manager.Server{
163+
Name: "health-check",
164+
Server: &http.Server{
165+
Handler: http.NewServeMux(),
166+
MaxHeaderBytes: 1 << 20,
167+
IdleTimeout: 90 * time.Second,
168+
ReadHeaderTimeout: 32 * time.Second,
169+
},
170+
Listener: listerner,
171+
}
172+
go func() {
173+
if err = svr.Start(ctx); err != nil {
174+
util.LogFatalAndExit(err, "failed to run health check server")
175+
}
176+
}()
155177
}
178+
156179
<-stopCh
157180
}
158181

cmd/health_check/health_check.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
func CmdMain() {
1616
port := pflag.Int32("port", 0, "Target port")
1717
tls := pflag.Bool("tls", false, "Dial the server with TLS")
18+
enableMetrics := pflag.Bool("enable-metrics", true, "Whether to support metrics query")
1819

1920
klogFlags := flag.NewFlagSet("klog", flag.ExitOnError)
2021
klog.InitFlags(klogFlags)
@@ -46,7 +47,7 @@ func CmdMain() {
4647
}
4748

4849
addr := util.JoinHostPort(ip, *port)
49-
if *tls {
50+
if *enableMetrics && *tls {
5051
addr = "tls://" + addr
5152
} else {
5253
addr = "tcp://" + addr

cmd/ovn_monitor/ovn_monitor.go

+39-20
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
package ovn_monitor
22

33
import (
4-
"os"
5-
"strings"
4+
"net"
5+
"net/http"
6+
"time"
67

78
"k8s.io/klog/v2"
89
"kernel.org/pub/linux/libs/security/libcap/cap"
910
ctrl "sigs.k8s.io/controller-runtime"
11+
"sigs.k8s.io/controller-runtime/pkg/manager"
1012
"sigs.k8s.io/controller-runtime/pkg/manager/signals"
1113

1214
"github.com/kubeovn/kube-ovn/pkg/metrics"
@@ -15,8 +17,6 @@ import (
1517
"github.com/kubeovn/kube-ovn/versions"
1618
)
1719

18-
const port = 10661
19-
2020
func CmdMain() {
2121
defer klog.Flush()
2222

@@ -30,24 +30,43 @@ func CmdMain() {
3030
util.LogFatalAndExit(err, "failed to parse config")
3131
}
3232

33-
addr := config.ListenAddress
34-
if os.Getenv("ENABLE_BIND_LOCAL_IP") == "true" {
35-
if ips := strings.Split(os.Getenv("POD_IPS"), ","); len(ips) == 1 {
36-
addr = util.JoinHostPort(ips[0], port)
37-
}
38-
}
39-
40-
exporter := ovn.NewExporter(config)
41-
if err = exporter.StartConnection(); err != nil {
42-
klog.Errorf("%s failed to connect db socket properly: %s", ovn.GetExporterName(), err)
43-
go exporter.TryClientConnection()
44-
}
45-
exporter.StartOvnMetrics()
46-
4733
ctrl.SetLogger(klog.NewKlogr())
4834
ctx := signals.SetupSignalHandler()
49-
if err = metrics.Run(ctx, nil, addr, config.SecureServing, false); err != nil {
50-
util.LogFatalAndExit(err, "failed to run metrics server")
35+
36+
metricsAddr := util.GetDefaultListenAddr()
37+
if config.EnableMetrics {
38+
exporter := ovn.NewExporter(config)
39+
if err = exporter.StartConnection(); err != nil {
40+
klog.Errorf("%s failed to connect db socket properly: %s", ovn.GetExporterName(), err)
41+
go exporter.TryClientConnection()
42+
}
43+
exporter.StartOvnMetrics()
44+
addr := util.JoinHostPort(metricsAddr, config.MetricsPort)
45+
if err = metrics.Run(ctx, nil, addr, config.SecureServing, false); err != nil {
46+
util.LogFatalAndExit(err, "failed to run metrics server")
47+
}
48+
} else {
49+
klog.Info("metrics server is disabled")
50+
listerner, err := net.ListenTCP("tcp", &net.TCPAddr{IP: net.ParseIP(util.GetDefaultListenAddr()), Port: int(config.MetricsPort)})
51+
if err != nil {
52+
util.LogFatalAndExit(err, "failed to listen on %s", util.JoinHostPort(metricsAddr, config.MetricsPort))
53+
}
54+
svr := manager.Server{
55+
Name: "health-check",
56+
Server: &http.Server{
57+
Handler: http.NewServeMux(),
58+
MaxHeaderBytes: 1 << 20,
59+
IdleTimeout: 90 * time.Second,
60+
ReadHeaderTimeout: 32 * time.Second,
61+
},
62+
Listener: listerner,
63+
}
64+
go func() {
65+
if err = svr.Start(ctx); err != nil {
66+
util.LogFatalAndExit(err, "failed to run health check server")
67+
}
68+
}()
5169
}
70+
5271
<-ctx.Done()
5372
}

cmd/speaker/speaker.go

+5-3
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,11 @@ func CmdMain() {
2828
ctrl.SetLogger(klog.NewKlogr())
2929
ctx := signals.SetupSignalHandler()
3030
go func() {
31-
metrics.InitKlogMetrics()
32-
if err = metrics.Run(ctx, nil, util.JoinHostPort("0.0.0.0", config.PprofPort), false, false); err != nil {
33-
util.LogFatalAndExit(err, "failed to run metrics server")
31+
if config.EnableMetrics {
32+
metrics.InitKlogMetrics()
33+
if err = metrics.Run(ctx, nil, util.JoinHostPort("0.0.0.0", config.PprofPort), false, false); err != nil {
34+
util.LogFatalAndExit(err, "failed to run metrics server")
35+
}
3436
}
3537
<-ctx.Done()
3638
}()

dist/images/install.sh

+11
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ ENABLE_LB_SVC=${ENABLE_LB_SVC:-false}
2222
ENABLE_NAT_GW=${ENABLE_NAT_GW:-true}
2323
ENABLE_KEEP_VM_IP=${ENABLE_KEEP_VM_IP:-true}
2424
ENABLE_ARP_DETECT_IP_CONFLICT=${ENABLE_ARP_DETECT_IP_CONFLICT:-true}
25+
ENABLE_METRICS=${ENABLE_METRICS:-true}
2526
# comma-separated string of nodelocal DNS ip addresses
2627
NODE_LOCAL_DNS_IP=${NODE_LOCAL_DNS_IP:-}
2728
ENABLE_IC=${ENABLE_IC:-$(kubectl get node --show-labels | grep -qw "ovn.kubernetes.io/ic-gw" && echo true || echo false)}
@@ -4339,6 +4340,7 @@ spec:
43394340
- --log_file_max_size=200
43404341
- --enable-lb-svc=$ENABLE_LB_SVC
43414342
- --keep-vm-ip=$ENABLE_KEEP_VM_IP
4343+
- --enable-metrics=$ENABLE_METRICS
43424344
- --node-local-dns-ip=$NODE_LOCAL_DNS_IP
43434345
- --enable-ovn-ipsec=$ENABLE_OVN_IPSEC
43444346
- --secure-serving=${SECURE_SERVING}
@@ -4400,6 +4402,7 @@ spec:
44004402
- /kube-ovn/kube-ovn-healthcheck
44014403
- --port=10660
44024404
- --tls=${SECURE_SERVING}
4405+
- --enable-metrics=$ENABLE_METRICS
44034406
periodSeconds: 3
44044407
timeoutSeconds: 5
44054408
livenessProbe:
@@ -4408,6 +4411,7 @@ spec:
44084411
- /kube-ovn/kube-ovn-healthcheck
44094412
- --port=10660
44104413
- --tls=${SECURE_SERVING}
4414+
- --enable-metrics=$ENABLE_METRICS
44114415
initialDelaySeconds: 300
44124416
periodSeconds: 7
44134417
failureThreshold: 5
@@ -4529,6 +4533,7 @@ spec:
45294533
- --alsologtostderr=true
45304534
- --log_file=/var/log/kube-ovn/kube-ovn-cni.log
45314535
- --log_file_max_size=200
4536+
- --enable-metrics=$ENABLE_METRICS
45324537
- --kubelet-dir=$KUBELET_DIR
45334538
- --enable-tproxy=$ENABLE_TPROXY
45344539
- --ovs-vsctl-concurrency=$OVS_VSCTL_CONCURRENCY
@@ -4620,6 +4625,7 @@ spec:
46204625
- /kube-ovn/kube-ovn-healthcheck
46214626
- --port=10665
46224627
- --tls=${SECURE_SERVING}
4628+
- --enable-metrics=$ENABLE_METRICS
46234629
timeoutSeconds: 5
46244630
readinessProbe:
46254631
failureThreshold: 3
@@ -4630,6 +4636,7 @@ spec:
46304636
- /kube-ovn/kube-ovn-healthcheck
46314637
- --port=10665
46324638
- --tls=${SECURE_SERVING}
4639+
- --enable-metrics=$ENABLE_METRICS
46334640
timeoutSeconds: 5
46344641
resources:
46354642
requests:
@@ -4747,6 +4754,7 @@ spec:
47474754
- --alsologtostderr=true
47484755
- --log_file=/var/log/kube-ovn/kube-ovn-pinger.log
47494756
- --log_file_max_size=200
4757+
- --enable-metrics=$ENABLE_METRICS
47504758
imagePullPolicy: $IMAGE_PULL_POLICY
47514759
securityContext:
47524760
runAsUser: ${RUN_AS_USER}
@@ -4898,6 +4906,7 @@ spec:
48984906
- --logtostderr=false
48994907
- --alsologtostderr=true
49004908
- --log_file_max_size=200
4909+
- --enable-metrics=$ENABLE_METRICS
49014910
securityContext:
49024911
runAsUser: ${RUN_AS_USER}
49034912
privileged: false
@@ -4961,6 +4970,7 @@ spec:
49614970
- /kube-ovn/kube-ovn-healthcheck
49624971
- --port=10661
49634972
- --tls=${SECURE_SERVING}
4973+
- --enable-metrics=$ENABLE_METRICS
49644974
timeoutSeconds: 5
49654975
readinessProbe:
49664976
failureThreshold: 3
@@ -4972,6 +4982,7 @@ spec:
49724982
- /kube-ovn/kube-ovn-healthcheck
49734983
- --port=10661
49744984
- --tls=${SECURE_SERVING}
4985+
- --enable-metrics=$ENABLE_METRICS
49754986
timeoutSeconds: 5
49764987
nodeSelector:
49774988
kubernetes.io/os: "linux"

0 commit comments

Comments
 (0)