From 70af0cd18509a8f40f9c8dde2da9b7a500131f17 Mon Sep 17 00:00:00 2001 From: Eugene Nuribekov Date: Wed, 26 Jun 2024 13:15:52 +0000 Subject: [PATCH] Fix HM response processing. Original issue to investigate was a suspection that automatic CRC calculation will never works properly. After investigation it turned out that response never achieves this place. This commit contains fixes, light refactoring for better readability and removing unused function arguments. Test cases for checking this changes added to tempesta-test. --- fw/apm.c | 27 ++++++++++++++++++--------- fw/apm.h | 3 +-- fw/http.c | 56 +++++++++++++++++++++++++++++++------------------------ 3 files changed, 51 insertions(+), 35 deletions(-) diff --git a/fw/apm.c b/fw/apm.c index e06853309..3f1ab3240 100644 --- a/fw/apm.c +++ b/fw/apm.c @@ -1329,8 +1329,7 @@ tfw_apm_hm_srv_rcount_update(TfwStr *uri_path, void *apmref) } static inline u32 -__tfw_apm_crc32_calc(TfwMsgIter *it, TfwStr *chunk , struct sk_buff *skb_head, - TfwStr *body) +__tfw_apm_crc32_calc(TfwMsgIter *it, TfwStr *chunk) { u32 crc = 0; @@ -1340,11 +1339,21 @@ __tfw_apm_crc32_calc(TfwMsgIter *it, TfwStr *chunk , struct sk_buff *skb_head, return crc; } +/** +* Validate response from the given server. +* Check: +* - Is response status code belongs to monitored set +* - Integrity of the response body +* - CRC +* Successful passing all of the checks considered +* as a sign that server alive. +*/ bool -tfw_apm_hm_srv_alive(int status, TfwStr *body, struct sk_buff *skb_head, - void *apmref) +tfw_apm_hm_srv_alive(TfwHttpResp *resp, TfwServer *srv) { - TfwApmHM *hm = READ_ONCE(((TfwApmRef *)apmref)->hmctl.hm); + int status = resp->status; + TfwStr *body = &resp->body; + TfwApmHM *hm = READ_ONCE(((TfwApmRef *)(srv->apmref))->hmctl.hm); u32 crc32 = 0; TfwMsgIter it; TfwStr chunk = {0}; @@ -1363,7 +1372,7 @@ tfw_apm_hm_srv_alive(int status, TfwStr *body, struct sk_buff *skb_head, } if (unlikely(tfw_body_iter_init(&it, &chunk, body->data, body->skb, - skb_head))) + resp->msg.skb_head))) { T_WARN_NL("Invalid body. Health monitor '%s': status '%d' \n", hm->name, status); @@ -1375,11 +1384,11 @@ tfw_apm_hm_srv_alive(int status, TfwStr *body, struct sk_buff *skb_head, * from body of first response and store it into monitor. */ if (!hm->crc32 && hm->auto_crc) { - hm->crc32 = __tfw_apm_crc32_calc(&it, &chunk, skb_head, body); - T_WARN("Auto CRC generated"); + hm->crc32 = __tfw_apm_crc32_calc(&it, &chunk); + T_WARN("Auto CRC generated\n"); } else if (hm->crc32) { - crc32 = __tfw_apm_crc32_calc(&it, &chunk, skb_head, body); + crc32 = __tfw_apm_crc32_calc(&it, &chunk); if (hm->crc32 != crc32) goto crc_err; } diff --git a/fw/apm.h b/fw/apm.h index 3e7f0bb20..bcab872ce 100644 --- a/fw/apm.h +++ b/fw/apm.h @@ -88,8 +88,7 @@ TfwHMStats *tfw_apm_hm_stats(void *apmref); /* Health monitor procedures ('health_check' directive). */ void tfw_apm_hm_srv_rcount_update(TfwStr *uri_path, void *apmref); -bool tfw_apm_hm_srv_alive(int status, TfwStr *body, struct sk_buff *skb_head, - void *apmref); +bool tfw_apm_hm_srv_alive(TfwHttpResp *resp, TfwServer *srv); void tfw_apm_hm_enable_srv(TfwServer *srv, const char *hm_name); void tfw_apm_hm_disable_srv(TfwServer *srv); bool tfw_apm_hm_srv_eq(const char *name, TfwServer *srv); diff --git a/fw/http.c b/fw/http.c index b1bc9d478..c286d8cc1 100644 --- a/fw/http.c +++ b/fw/http.c @@ -1609,25 +1609,20 @@ do { \ } } -static bool -tfw_http_hm_suspend(TfwHttpResp *resp, TfwServer *srv) +/** + * Try to mark server as suspended. + * In case of HM is active do it, otherwise left unchanged. + */ +static void +tfw_http_hm_try_suspend(TfwHttpResp *resp, TfwServer *srv) { unsigned long old_flags, flags = READ_ONCE(srv->flags); - /* - * We need to count a total response statistics in - * tfw_apm_hm_srv_limit(), even if health monitor is disabled. - * In this case limit calculation won't be accounted. - */ - bool lim_exceeded = tfw_apm_hm_srv_limit(resp->status, srv->apmref); - if (!(flags & TFW_SRV_F_HMONITOR)) - return true; - if (!lim_exceeded) - return false; + while (flags & TFW_SRV_F_HMONITOR) { - do { old_flags = cmpxchg(&srv->flags, flags, - flags | TFW_SRV_F_SUSPEND); + flags | TFW_SRV_F_SUSPEND); + if (likely(old_flags == flags)) { T_WARN_ADDR_STATUS("server has been suspended: limit " "for bad responses is exceeded", @@ -1635,28 +1630,41 @@ tfw_http_hm_suspend(TfwHttpResp *resp, TfwServer *srv) resp->status); break; } - flags = old_flags; - } while (flags & TFW_SRV_F_HMONITOR); - return true; + flags = old_flags; + } } +/** +* The main function of Health Monioring. +* Getting response from the server, it updates responses statistics, +* checks HM limits and makes solution about server health. +*/ static void tfw_http_hm_control(TfwHttpResp *resp) { TfwServer *srv = (TfwServer *)resp->conn->peer; - if (tfw_http_hm_suspend(resp, srv)) - return; + /* + * Total response statistics is counted permanently regardless + * of the state of the health monitor. + */ + bool lim_exceeded = tfw_apm_hm_srv_limit(resp->status, srv->apmref); - if (!tfw_srv_suspended(srv) || - !tfw_apm_hm_srv_alive(resp->status, &resp->body, resp->msg.skb_head, - srv->apmref)) - { + if (tfw_srv_suspended(srv)) { + T_DBG2("Server suspended"); return; } - tfw_srv_mark_alive(srv); + if(lim_exceeded) { + T_WARN("Server error limit exceeded"); + tfw_http_hm_try_suspend(resp, srv); + } + + if(tfw_apm_hm_srv_alive(resp, srv)) { + T_DBG2("Mark server alive"); + tfw_srv_mark_alive(srv); + } } static inline void