Skip to content

Commit

Permalink
Fix HM response processing.
Browse files Browse the repository at this point in the history
Original issue to investigate was a suspection that automatic CRC
calculation will never works properly.
After investigation it turned out that response never achieves
this place.
This commit contains fixes, light refactoring for better readability
and removing unused function arguments.
Test cases for checking this changes added to tempesta-test.
  • Loading branch information
enuribekov-tempesta committed Jun 26, 2024
1 parent ca0072b commit 70af0cd
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 35 deletions.
27 changes: 18 additions & 9 deletions fw/apm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1329,8 +1329,7 @@ tfw_apm_hm_srv_rcount_update(TfwStr *uri_path, void *apmref)
}

static inline u32
__tfw_apm_crc32_calc(TfwMsgIter *it, TfwStr *chunk , struct sk_buff *skb_head,
TfwStr *body)
__tfw_apm_crc32_calc(TfwMsgIter *it, TfwStr *chunk)
{
u32 crc = 0;

Expand All @@ -1340,11 +1339,21 @@ __tfw_apm_crc32_calc(TfwMsgIter *it, TfwStr *chunk , struct sk_buff *skb_head,
return crc;
}

/**
* Validate response from the given server.
* Check:
* - Is response status code belongs to monitored set
* - Integrity of the response body
* - CRC
* Successful passing all of the checks considered
* as a sign that server alive.
*/
bool
tfw_apm_hm_srv_alive(int status, TfwStr *body, struct sk_buff *skb_head,
void *apmref)
tfw_apm_hm_srv_alive(TfwHttpResp *resp, TfwServer *srv)
{
TfwApmHM *hm = READ_ONCE(((TfwApmRef *)apmref)->hmctl.hm);
int status = resp->status;
TfwStr *body = &resp->body;
TfwApmHM *hm = READ_ONCE(((TfwApmRef *)(srv->apmref))->hmctl.hm);
u32 crc32 = 0;
TfwMsgIter it;
TfwStr chunk = {0};
Expand All @@ -1363,7 +1372,7 @@ tfw_apm_hm_srv_alive(int status, TfwStr *body, struct sk_buff *skb_head,
}

if (unlikely(tfw_body_iter_init(&it, &chunk, body->data, body->skb,
skb_head)))
resp->msg.skb_head)))
{
T_WARN_NL("Invalid body. Health monitor '%s': status '%d' \n",
hm->name, status);
Expand All @@ -1375,11 +1384,11 @@ tfw_apm_hm_srv_alive(int status, TfwStr *body, struct sk_buff *skb_head,
* from body of first response and store it into monitor.
*/
if (!hm->crc32 && hm->auto_crc) {
hm->crc32 = __tfw_apm_crc32_calc(&it, &chunk, skb_head, body);
T_WARN("Auto CRC generated");
hm->crc32 = __tfw_apm_crc32_calc(&it, &chunk);
T_WARN("Auto CRC generated\n");

} else if (hm->crc32) {
crc32 = __tfw_apm_crc32_calc(&it, &chunk, skb_head, body);
crc32 = __tfw_apm_crc32_calc(&it, &chunk);
if (hm->crc32 != crc32)
goto crc_err;
}
Expand Down
3 changes: 1 addition & 2 deletions fw/apm.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,7 @@ TfwHMStats *tfw_apm_hm_stats(void *apmref);

/* Health monitor procedures ('health_check' directive). */
void tfw_apm_hm_srv_rcount_update(TfwStr *uri_path, void *apmref);
bool tfw_apm_hm_srv_alive(int status, TfwStr *body, struct sk_buff *skb_head,
void *apmref);
bool tfw_apm_hm_srv_alive(TfwHttpResp *resp, TfwServer *srv);
void tfw_apm_hm_enable_srv(TfwServer *srv, const char *hm_name);
void tfw_apm_hm_disable_srv(TfwServer *srv);
bool tfw_apm_hm_srv_eq(const char *name, TfwServer *srv);
Expand Down
56 changes: 32 additions & 24 deletions fw/http.c
Original file line number Diff line number Diff line change
Expand Up @@ -1609,54 +1609,62 @@ do { \
}
}

static bool
tfw_http_hm_suspend(TfwHttpResp *resp, TfwServer *srv)
/**
* Try to mark server as suspended.
* In case of HM is active do it, otherwise left unchanged.
*/
static void
tfw_http_hm_try_suspend(TfwHttpResp *resp, TfwServer *srv)
{
unsigned long old_flags, flags = READ_ONCE(srv->flags);
/*
* We need to count a total response statistics in
* tfw_apm_hm_srv_limit(), even if health monitor is disabled.
* In this case limit calculation won't be accounted.
*/
bool lim_exceeded = tfw_apm_hm_srv_limit(resp->status, srv->apmref);

if (!(flags & TFW_SRV_F_HMONITOR))
return true;
if (!lim_exceeded)
return false;
while (flags & TFW_SRV_F_HMONITOR) {

do {
old_flags = cmpxchg(&srv->flags, flags,
flags | TFW_SRV_F_SUSPEND);
flags | TFW_SRV_F_SUSPEND);

if (likely(old_flags == flags)) {
T_WARN_ADDR_STATUS("server has been suspended: limit "
"for bad responses is exceeded",
&srv->addr, TFW_WITH_PORT,
resp->status);
break;
}
flags = old_flags;
} while (flags & TFW_SRV_F_HMONITOR);

return true;
flags = old_flags;
}
}

/**
* The main function of Health Monioring.
* Getting response from the server, it updates responses statistics,
* checks HM limits and makes solution about server health.
*/
static void
tfw_http_hm_control(TfwHttpResp *resp)
{
TfwServer *srv = (TfwServer *)resp->conn->peer;

if (tfw_http_hm_suspend(resp, srv))
return;
/*
* Total response statistics is counted permanently regardless
* of the state of the health monitor.
*/
bool lim_exceeded = tfw_apm_hm_srv_limit(resp->status, srv->apmref);

if (!tfw_srv_suspended(srv) ||
!tfw_apm_hm_srv_alive(resp->status, &resp->body, resp->msg.skb_head,
srv->apmref))
{
if (tfw_srv_suspended(srv)) {
T_DBG2("Server suspended");
return;
}

tfw_srv_mark_alive(srv);
if(lim_exceeded) {
T_WARN("Server error limit exceeded");
tfw_http_hm_try_suspend(resp, srv);
}

if(tfw_apm_hm_srv_alive(resp, srv)) {
T_DBG2("Mark server alive");
tfw_srv_mark_alive(srv);
}
}

static inline void
Expand Down

0 comments on commit 70af0cd

Please sign in to comment.