Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Passive healthcheck support is similar to nginx's fail_timeout #79

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 27 additions & 4 deletions lib/resty/healthcheck.lua
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ local ERR = ngx.ERR
local WARN = ngx.WARN
local DEBUG = ngx.DEBUG
local ngx_log = ngx.log
local ngx_now = ngx.now
local tostring = tostring
local ipairs = ipairs
local cjson = require("cjson.safe").new()
Expand Down Expand Up @@ -473,14 +474,18 @@ end
-- @param hostname the hostname of the target being checked.
-- @return `true` if healthy, `false` if unhealthy, or `nil + error` on failure.
function checker:get_target_status(ip, port, hostname)

local target = get_target(self, ip, port, hostname)
if not target then
return nil, "target not found"
end
return target.internal_health == "healthy"
or target.internal_health == "mostly_healthy"

local ok = target.internal_health == "healthy"
or target.internal_health == "mostly_healthy"
if not ok and target.unhealthy_expire and target.unhealthy_expire < ngx_now() then
self:report_success(ip, port, hostname)
ok = true
end
return ok
end


Expand Down Expand Up @@ -597,7 +602,11 @@ local function incr_counter(self, health_report, ip, port, hostname, limit, ctr_

return locking_target(self, ip, port, hostname, function()
local counter_key = key_for(self.TARGET_COUNTER, ip, port, hostname)
local multictr, err = self.shm:incr(counter_key, ctr_type, 0)
local duration = 0
if ctr_type ~= CTR_SUCCESS then
duration = self.checks.passive.unhealthy.duration
end
local multictr, err = self.shm:incr(counter_key, ctr_type, 0, duration)
if err then
return nil, err
end
Expand Down Expand Up @@ -1118,6 +1127,15 @@ function checker:event_handler(event_name, ip, port, hostname)
self:log(DEBUG, "event: target status '", hostname or "", "(", ip, ":",
port, ")' from '", from, "' to '", to, "', ver: ", self.status_ver)
end

local duration = self.checks.passive.unhealthy.duration
if duration > 0
and (event_name == self.events.unhealthy or event_name == self.events.mostly_unhealthy)
and target_found.internal_health ~= event_name then

target_found.unhealthy_expire = ngx_now() + duration
self:log(DEBUG, "set target_found.unhealthy_expire = ", target_found.unhealthy_expire)
end
target_found.internal_health = event_name

elseif event_name == self.events.clear then
Expand Down Expand Up @@ -1298,6 +1316,7 @@ local defaults = {
tcp_failures = 2,
timeouts = 7,
http_failures = 5,
duration = 0,
},
},
},
Expand Down Expand Up @@ -1468,9 +1487,13 @@ function _M.new(opts)
self:log(DEBUG, "Got initial target list (", #self.targets, " targets)")

-- load individual statuses
local duration = self.checks.passive.unhealthy.duration
for _, target in ipairs(self.targets) do
local state_key = key_for(self.TARGET_STATE, target.ip, target.port, target.hostname)
target.internal_health = INTERNAL_STATES[self.shm:get(state_key)]
if duration > 0 and (target.internal_health == "unhealthy" or target.internal_health == "mostly_unhealthy") then
target.unhealthy_expire = ngx_now() - duration
end
self:log(DEBUG, "Got initial status ", target.internal_health, " ",
target.hostname, " ", target.ip, ":", target.port)
-- fill-in the hash part for easy lookup
Expand Down
173 changes: 173 additions & 0 deletions t/21-fail-duration.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
use Test::Nginx::Socket::Lua;
use Cwd qw(cwd);

workers(1);

plan tests => repeat_each() * 15;

my $pwd = cwd();

our $HttpConfig = qq{
lua_package_path "$pwd/lib/?.lua;;";
lua_shared_dict test_shm 8m;
lua_shared_dict my_worker_events 8m;
};

run_tests();

__DATA__



=== TEST 1: report fail duration
--- http_config eval
qq{
$::HttpConfig

server {
listen 2122;
location = /status {
return 200;
}
}
}
--- config
location = /t {
content_by_lua_block {
local we = require "resty.worker.events"
assert(we.configure{ shm = "my_worker_events", interval = 0.1 })
local healthcheck = require("resty.healthcheck")
local checker = healthcheck.new({
name = "testing",
shm_name = "test_shm",
type = "http",
checks = {
active = {
http_path = "/status",
healthy = {
interval = 999, -- we don't want active checks
successes = 3,
},
unhealthy = {
interval = 999, -- we don't want active checks
tcp_failures = 3,
http_failures = 5,
timeouts = 2,
}
},
passive = {
healthy = {
successes = 3,
},
unhealthy = {
tcp_failures = 3,
http_failures = 5,
timeouts = 2,
duration = 2,
}
}
}
})
ngx.sleep(0.1) -- wait for initial timers to run once
local ok, err = checker:add_target("127.0.0.1", 2113, nil, true)
we.poll()
checker:report_timeout("127.0.0.1", 2113, nil, "passive")
ngx.sleep(3)
checker:report_timeout("127.0.0.1", 2113, nil, "passive")
we.poll()
ngx.say(checker:get_target_status("127.0.0.1", 2113)) -- true
}
}
--- request
GET /t
--- response_body
true
--- timeout: 6s
--- error_log
checking healthy targets: nothing to do
checking unhealthy targets: nothing to do
unhealthy TIMEOUT increment (1/2) for '(127.0.0.1:2113)'
unhealthy TIMEOUT increment (1/2) for '(127.0.0.1:2113)'
event: target status '(127.0.0.1:2113)' from 'true' to 'true'



=== TEST 2: unhealthy duration
--- http_config eval
qq{
$::HttpConfig

server {
listen 2122;
location = /status {
return 200;
}
}
}
--- config
location = /t {
content_by_lua_block {
local we = require "resty.worker.events"
assert(we.configure{ shm = "my_worker_events", interval = 0.1 })
local healthcheck = require("resty.healthcheck")
local checker = healthcheck.new({
name = "testing",
shm_name = "test_shm",
type = "http",
checks = {
active = {
http_path = "/status",
healthy = {
interval = 999, -- we don't want active checks
successes = 3,
},
unhealthy = {
interval = 999, -- we don't want active checks
tcp_failures = 3,
http_failures = 5,
timeouts = 2,
}
},
passive = {
healthy = {
successes = 3,
},
unhealthy = {
tcp_failures = 3,
http_failures = 5,
timeouts = 2,
duration = 2,
}
}
}
})
ngx.sleep(0.1) -- wait for initial timers to run once
local ok, err = checker:add_target("127.0.0.1", 2113, nil, true)
we.poll()
checker:report_timeout("127.0.0.1", 2113, nil, "passive")
checker:report_timeout("127.0.0.1", 2113, nil, "passive")
we.poll()
ngx.say(checker:get_target_status("127.0.0.1", 2113)) -- false
ngx.sleep(3)
ngx.say(checker:get_target_status("127.0.0.1", 2113)) -- true
checker:report_timeout("127.0.0.1", 2113, nil, "passive")
checker:report_timeout("127.0.0.1", 2113, nil, "passive")
we.poll()
ngx.say(checker:get_target_status("127.0.0.1", 2113)) -- false
}
}
--- request
GET /t
--- response_body
false
true
false
--- timeout: 6s
--- error_log
checking healthy targets: nothing to do
checking unhealthy targets: nothing to do
unhealthy TIMEOUT increment (1/2) for '(127.0.0.1:2113)'
unhealthy TIMEOUT increment (2/2) for '(127.0.0.1:2113)'
event: target status '(127.0.0.1:2113)' from 'true' to 'false'
set target_found.unhealthy_expire