upstream: fix deadlock when destroying connections

edsiper · edsiper · commit 0b30552693f4 · 2022-02-15T13:32:22.000-06:00
When workers are enabled and a timeout occurs in a connection most of cases a deadlock is held in the active worker: ==1654992== Thread #4: Attempt to re-lock a non-recursive lock I already hold ==1654992== at 0x484BB44: ??? (in /usr/libexec/valgrind/vgpreload_helgrind-amd64-linux.so) ==1654992== by 0x197579: prepare_destroy_conn_safe (flb_upstream.c:435) ==1654992== by 0x197887: create_conn (flb_upstream.c:533) ==1654992== by 0x197DBB: flb_upstream_conn_get (flb_upstream.c:674) ==1654992== by 0x2396D3: http_post (http.c:86) ==1654992== by 0x23A5E5: cb_http_flush (http.c:338) ==1654992== by 0x17FE6B: output_pre_cb_flush (flb_output.h:511) ==1654992== by 0x503DAA: co_init (amd64.c:117) ==1654992== Lock was previously acquired ==1654992== at 0x484BC0F: ??? (in /usr/libexec/valgrind/vgpreload_helgrind-amd64-linux.so) ==1654992== by 0x19815F: flb_upstream_conn_timeouts (flb_upstream.c:780) ==1654992== by 0x17FEFC: cb_thread_sched_timer (flb_output_thread.c:58) ==1654992== by 0x193ED7: flb_sched_event_handler (flb_scheduler.c:422) ==1654992== by 0x180672: output_thread (flb_output_thread.c:265) ==1654992== by 0x199602: step_callback (flb_worker.c:44) ==1654992== by 0x484E8AA: ??? (in /usr/libexec/valgrind/vgpreload_helgrind-amd64-linux.so) ==1654992== by 0x4E3F926: start_thread (pthread_create.c:435) ==1654992== by 0x4ECF9E3: clone (clone.S:100) The following patch fix the behavior on prepare_destroy_conn_safe by 'trying to acquire' the mutex lock, if it fails to acquire it, it will asssume it's already locked and no new lock is required. Signed-off-by: Eduardo Silva <eduardo@calyptia.com>
diff --git a/src/flb_upstream.c b/src/flb_upstream.c
@@ -429,15 +429,19 @@ static int prepare_destroy_conn(struct flb_upstream_conn *u_conn)
 static inline int prepare_destroy_conn_safe(struct flb_upstream_conn *u_conn)
 {
     int ret;
+    int locked = FLB_FALSE;
     struct flb_upstream *u = u_conn->u;
 
     if (u->thread_safe == FLB_TRUE) {
-        pthread_mutex_lock(&u->mutex_lists);
+        ret = pthread_mutex_trylock(&u->mutex_lists);
+        if (ret == 0) {
+            locked = FLB_TRUE;
+        }
     }
 
     ret = prepare_destroy_conn(u_conn);
 
-    if (u->thread_safe == FLB_TRUE) {
+    if (u->thread_safe == FLB_TRUE && locked) {
         pthread_mutex_unlock(&u->mutex_lists);
     }
 

Original file line number	Diff line number	Diff line change
`@@ -429,15 +429,19 @@ static int prepare_destroy_conn(struct flb_upstream_conn *u_conn)`
`429`	`429`	`static inline int prepare_destroy_conn_safe(struct flb_upstream_conn *u_conn)`
`430`	`430`	`{`
`431`	`431`	`int ret;`
	`432`	`+ int locked = FLB_FALSE;`
`432`	`433`	`struct flb_upstream *u = u_conn->u;`
`433`	`434`
`434`	`435`	`if (u->thread_safe == FLB_TRUE) {`
`435`		`- pthread_mutex_lock(&u->mutex_lists);`
	`436`	`+ ret = pthread_mutex_trylock(&u->mutex_lists);`
	`437`	`+ if (ret == 0) {`
	`438`	`+ locked = FLB_TRUE;`
	`439`	`+ }`
`436`	`440`	`}`
`437`	`441`
`438`	`442`	`ret = prepare_destroy_conn(u_conn);`
`439`	`443`
`440`		`- if (u->thread_safe == FLB_TRUE) {`
	`444`	`+ if (u->thread_safe == FLB_TRUE && locked) {`
`441`	`445`	`pthread_mutex_unlock(&u->mutex_lists);`
`442`	`446`	`}`
`443`	`447`