Skip to content

Commit 0ce06a2

Browse files
committed
fix(healthcheck) retry locking protected fn
When ngx.sleep API is not available (e.g. in the log phase) it's not possible to lock using lua-resty-lock and functions that must run protected were failing. This change adds a retry method that starts a new light thread that has access to ngx.sleep and will succeed to lock. Fix Kong/kong#5137
1 parent f880430 commit 0ce06a2

File tree

2 files changed

+86
-16
lines changed

2 files changed

+86
-16
lines changed

lib/resty/healthcheck.lua

+38-15
Original file line numberDiff line numberDiff line change
@@ -416,17 +416,13 @@ end
416416
------------------------------------------------------------------------------
417417

418418

419-
-- Run the given function holding a lock on the target.
420-
-- WARNING: the callback will run unprotected, so it should never
421-
-- throw an error, but always return `nil + error` instead.
422-
-- @param self The checker object
423-
-- @param ip Target IP
424-
-- @param port Target port
425-
-- @param hostname Target hostname
426-
-- @param fn The function to execute
427-
-- @return The results of the function; or nil and an error message
428-
-- in case it fails locking.
429-
local function locking_target(self, ip, port, hostname, fn)
419+
--- Helper function to actually run the function holding a lock on the target.
420+
-- @see locking_target
421+
local function run_mutexed_fn(premature, self, ip, port, hostname, fn)
422+
if premature then
423+
return
424+
end
425+
430426
local lock, lock_err = resty_lock:new(self.shm_name, {
431427
exptime = 10, -- timeout after which lock is released anyway
432428
timeout = 5, -- max wait time to acquire lock
@@ -436,20 +432,47 @@ local function locking_target(self, ip, port, hostname, fn)
436432
end
437433
local lock_key = key_for(self.TARGET_LOCK, ip, port, hostname)
438434

439-
local ok, err = lock:lock(lock_key)
440-
if not ok then
441-
return nil, "failed to acquire lock: " .. err
435+
local pok, perr = pcall(resty_lock.lock, lock, lock_key)
436+
if not pok then
437+
self:log(DEBUG, "failed to acquire lock: ", perr)
438+
return nil, "failed to acquire lock"
442439
end
443440

444441
local final_ok, final_err = fn()
445442

446-
ok, err = lock:unlock()
443+
local ok, err = lock:unlock()
447444
if not ok then
448445
-- recoverable: not returning this error, only logging it
449446
self:log(ERR, "failed to release lock '", lock_key, "': ", err)
450447
end
451448

452449
return final_ok, final_err
450+
451+
end
452+
453+
454+
-- Run the given function holding a lock on the target.
455+
-- WARNING: the callback will run unprotected, so it should never
456+
-- throw an error, but always return `nil + error` instead.
457+
-- @param self The checker object
458+
-- @param ip Target IP
459+
-- @param port Target port
460+
-- @param hostname Target hostname
461+
-- @param fn The function to execute
462+
-- @return The results of the function; or true in case it fails locking and
463+
-- will retry asynchronously; or nil+err in case it fails to retry.
464+
local function locking_target(self, ip, port, hostname, fn)
465+
local ok, err = run_mutexed_fn(false, self, ip, port, hostname, fn)
466+
if err == "failed to acquire lock" then
467+
local _, terr = ngx.timer.at(0, run_mutexed_fn, self, ip, port, hostname, fn)
468+
if terr ~= nil then
469+
return nil, terr
470+
end
471+
472+
return true
473+
end
474+
475+
return ok, err
453476
end
454477

455478

t/06-report_http_status.t

+48-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use Cwd qw(cwd);
33

44
workers(1);
55

6-
plan tests => repeat_each() * 50;
6+
plan tests => repeat_each() * 53;
77

88
my $pwd = cwd();
99

@@ -444,3 +444,50 @@ checking unhealthy targets: nothing to do
444444
--- no_error_log
445445
unhealthy HTTP increment
446446
event: target status '(127.0.0.1:2119)' from 'true' to 'false'
447+
448+
449+
=== TEST 5: report_http_status() must work in log phase
450+
--- http_config eval
451+
qq{
452+
$::HttpConfig
453+
}
454+
--- config
455+
location = /t {
456+
content_by_lua_block {
457+
ngx.say("OK")
458+
}
459+
log_by_lua_block {
460+
local we = require "resty.worker.events"
461+
assert(we.configure{ shm = "my_worker_events", interval = 0.1 })
462+
local healthcheck = require("resty.healthcheck")
463+
local checker = healthcheck.new({
464+
name = "testing",
465+
shm_name = "test_shm",
466+
type = "http",
467+
checks = {
468+
passive = {
469+
healthy = {
470+
successes = 3,
471+
},
472+
unhealthy = {
473+
tcp_failures = 2,
474+
http_failures = 3,
475+
}
476+
}
477+
}
478+
})
479+
local ok, err = checker:add_target("127.0.0.1", 2119, nil, true)
480+
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
481+
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
482+
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
483+
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
484+
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
485+
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
486+
}
487+
}
488+
--- request
489+
GET /t
490+
--- response_body
491+
OK
492+
--- no_error_log
493+
failed to acquire lock: API disabled in the context of log_by_lua

0 commit comments

Comments
 (0)