Skip to content

Commit 14b894a

Browse files
locaohishamhm
authored andcommitted
fix(healthcheck) retry locking protected fn (#37)
When ngx.sleep API is not available (e.g. in the log phase) it's not possible to lock using lua-resty-lock and functions that must run protected were failing. This change adds retry methods that start new light threads that have access to ngx.sleep and will succeed to lock. Fix Kong/kong#5137
1 parent f72d14e commit 14b894a

File tree

2 files changed

+125
-35
lines changed

2 files changed

+125
-35
lines changed

lib/resty/healthcheck.lua

+77-34
Original file line numberDiff line numberDiff line change
@@ -175,50 +175,72 @@ local function fetch_target_list(self)
175175
end
176176

177177

178-
--- Run the given function holding a lock on the target list.
179-
-- WARNING: the callback will run unprotected, so it should never
180-
-- throw an error, but always return `nil + error` instead.
181-
-- @param self The checker object
182-
-- @param fn The function to execute
183-
-- @return The results of the function; or nil and an error message
184-
-- in case it fails locking.
185-
local function locking_target_list(self, fn)
178+
--- Helper function to run the function holding a lock on the target list.
179+
-- @see locking_target_list
180+
local function run_fn_locked_target_list(premature, self, fn)
181+
182+
if premature then
183+
return
184+
end
186185

187186
local lock, lock_err = resty_lock:new(self.shm_name, {
188-
exptime = 10, -- timeout after which lock is released anyway
189-
timeout = 5, -- max wait time to acquire lock
190-
})
187+
exptime = 10, -- timeout after which lock is released anyway
188+
timeout = 5, -- max wait time to acquire lock
189+
})
190+
191191
if not lock then
192192
return nil, "failed to create lock:" .. lock_err
193193
end
194194

195-
local ok, err = lock:lock(self.TARGET_LIST_LOCK)
196-
if not ok then
197-
return nil, "failed to acquire lock: " .. err
195+
local pok, perr = pcall(resty_lock.lock, lock, self.TARGET_LIST_LOCK)
196+
if not pok then
197+
self:log(DEBUG, "failed to acquire lock: ", perr)
198+
return nil, "failed to acquire lock"
198199
end
199200

200-
local target_list
201-
target_list, err = fetch_target_list(self)
201+
local target_list, err = fetch_target_list(self)
202202

203203
local final_ok, final_err
204204

205205
if target_list then
206-
final_ok, final_err = fn(target_list)
206+
final_ok, final_err = pcall(fn, target_list)
207207
else
208208
final_ok, final_err = nil, err
209209
end
210210

211+
local ok
211212
ok, err = lock:unlock()
212213
if not ok then
213214
-- recoverable: not returning this error, only logging it
214215
self:log(ERR, "failed to release lock '", self.TARGET_LIST_LOCK,
215-
"': ", err)
216+
"': ", err)
216217
end
217218

218219
return final_ok, final_err
219220
end
220221

221222

223+
--- Run the given function holding a lock on the target list.
224+
-- @param self The checker object
225+
-- @param fn The function to execute
226+
-- @return The results of the function; or nil and an error message
227+
-- in case it fails locking.
228+
local function locking_target_list(self, fn)
229+
230+
local ok, err = run_fn_locked_target_list(false, self, fn)
231+
if err == "failed to acquire lock" then
232+
local _, terr = ngx.timer.at(0, run_fn_locked_target_list, self, fn)
233+
if terr ~= nil then
234+
return nil, terr
235+
end
236+
237+
return true
238+
end
239+
240+
return ok, err
241+
end
242+
243+
222244
--- Get a target
223245
local function get_target(self, ip, port, hostname)
224246
hostname = hostname or ip
@@ -416,17 +438,13 @@ end
416438
------------------------------------------------------------------------------
417439

418440

419-
-- Run the given function holding a lock on the target.
420-
-- WARNING: the callback will run unprotected, so it should never
421-
-- throw an error, but always return `nil + error` instead.
422-
-- @param self The checker object
423-
-- @param ip Target IP
424-
-- @param port Target port
425-
-- @param hostname Target hostname
426-
-- @param fn The function to execute
427-
-- @return The results of the function; or nil and an error message
428-
-- in case it fails locking.
429-
local function locking_target(self, ip, port, hostname, fn)
441+
--- Helper function to actually run the function holding a lock on the target.
442+
-- @see locking_target
443+
local function run_mutexed_fn(premature, self, ip, port, hostname, fn)
444+
if premature then
445+
return
446+
end
447+
430448
local lock, lock_err = resty_lock:new(self.shm_name, {
431449
exptime = 10, -- timeout after which lock is released anyway
432450
timeout = 5, -- max wait time to acquire lock
@@ -436,20 +454,45 @@ local function locking_target(self, ip, port, hostname, fn)
436454
end
437455
local lock_key = key_for(self.TARGET_LOCK, ip, port, hostname)
438456

439-
local ok, err = lock:lock(lock_key)
440-
if not ok then
441-
return nil, "failed to acquire lock: " .. err
457+
local pok, perr = pcall(resty_lock.lock, lock, lock_key)
458+
if not pok then
459+
self:log(DEBUG, "failed to acquire lock: ", perr)
460+
return nil, "failed to acquire lock"
442461
end
443462

444-
local final_ok, final_err = fn()
463+
local final_ok, final_err = pcall(fn)
445464

446-
ok, err = lock:unlock()
465+
local ok, err = lock:unlock()
447466
if not ok then
448467
-- recoverable: not returning this error, only logging it
449468
self:log(ERR, "failed to release lock '", lock_key, "': ", err)
450469
end
451470

452471
return final_ok, final_err
472+
473+
end
474+
475+
476+
-- Run the given function holding a lock on the target.
477+
-- @param self The checker object
478+
-- @param ip Target IP
479+
-- @param port Target port
480+
-- @param hostname Target hostname
481+
-- @param fn The function to execute
482+
-- @return The results of the function; or true in case it fails locking and
483+
-- will retry asynchronously; or nil+err in case it fails to retry.
484+
local function locking_target(self, ip, port, hostname, fn)
485+
local ok, err = run_mutexed_fn(false, self, ip, port, hostname, fn)
486+
if err == "failed to acquire lock" then
487+
local _, terr = ngx.timer.at(0, run_mutexed_fn, self, ip, port, hostname, fn)
488+
if terr ~= nil then
489+
return nil, terr
490+
end
491+
492+
return true
493+
end
494+
495+
return ok, err
453496
end
454497

455498

t/06-report_http_status.t

+48-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use Cwd qw(cwd);
33

44
workers(1);
55

6-
plan tests => repeat_each() * 50;
6+
plan tests => repeat_each() * 53;
77

88
my $pwd = cwd();
99

@@ -444,3 +444,50 @@ checking unhealthy targets: nothing to do
444444
--- no_error_log
445445
unhealthy HTTP increment
446446
event: target status '(127.0.0.1:2119)' from 'true' to 'false'
447+
448+
449+
=== TEST 5: report_http_status() must work in log phase
450+
--- http_config eval
451+
qq{
452+
$::HttpConfig
453+
}
454+
--- config
455+
location = /t {
456+
content_by_lua_block {
457+
ngx.say("OK")
458+
}
459+
log_by_lua_block {
460+
local we = require "resty.worker.events"
461+
assert(we.configure{ shm = "my_worker_events", interval = 0.1 })
462+
local healthcheck = require("resty.healthcheck")
463+
local checker = healthcheck.new({
464+
name = "testing",
465+
shm_name = "test_shm",
466+
type = "http",
467+
checks = {
468+
passive = {
469+
healthy = {
470+
successes = 3,
471+
},
472+
unhealthy = {
473+
tcp_failures = 2,
474+
http_failures = 3,
475+
}
476+
}
477+
}
478+
})
479+
local ok, err = checker:add_target("127.0.0.1", 2119, nil, true)
480+
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
481+
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
482+
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
483+
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
484+
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
485+
checker:report_http_status("127.0.0.1", 2119, nil, 500, "passive")
486+
}
487+
}
488+
--- request
489+
GET /t
490+
--- response_body
491+
OK
492+
--- no_error_log
493+
failed to acquire lock: API disabled in the context of log_by_lua

0 commit comments

Comments
 (0)