Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

En/fix 1988 1 #2151

Merged
merged 7 commits into from
Aug 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions etc/tempesta_fw.conf
Original file line number Diff line number Diff line change
Expand Up @@ -1199,7 +1199,7 @@
# REQ_STRING is a string with health monitoring request.
#
# Default:
# request "GET / HTTTP/1.0\r\n\r\n";
# request "GET / HTTP/1.0\r\n\r\n";
#
# URL_STRING is a string with url; client requests with this url will be
# used as health monitoring requests.
Expand All @@ -1223,7 +1223,7 @@
#
# Example:
# health_check auto {
# request "GET / HTTTP/1.0\r\n\r\n";
# request "GET / HTTP/1.0\r\n\r\n";
# request_url "/";
# resp_code 200;
# timeout 10;
Expand Down
37 changes: 23 additions & 14 deletions fw/apm.c
Original file line number Diff line number Diff line change
Expand Up @@ -628,8 +628,8 @@ typedef struct {
#define TFW_APM_MIN_TMINTRVL 5 /* Minimum time interval (secs). */

#define TFW_APM_HM_AUTO "auto"
#define TFW_APM_DFLT_REQ "\"GET / HTTTP/1.0\r\n\r\n\""
#define TFW_APM_DFLT_URL "\"/\""
#define TFW_APM_DFLT_REQ "GET / HTTP/1.0\r\n\r\n"
#define TFW_APM_DFLT_URL "/"

/*
* APM Data structure.
Expand Down Expand Up @@ -1329,8 +1329,7 @@ tfw_apm_hm_srv_rcount_update(TfwStr *uri_path, void *apmref)
}

static inline u32
__tfw_apm_crc32_calc(TfwMsgIter *it, TfwStr *chunk , struct sk_buff *skb_head,
TfwStr *body)
__tfw_apm_crc32_calc(TfwMsgIter *it, TfwStr *chunk)
{
u32 crc = 0;

Expand All @@ -1340,11 +1339,21 @@ __tfw_apm_crc32_calc(TfwMsgIter *it, TfwStr *chunk , struct sk_buff *skb_head,
return crc;
}

/**
* Validate response from the given server.
* Check:
* - Is response status code belongs to monitored set
* - Integrity of the response body
* - CRC
* Successful passing all of the checks considered
* as a sign that the server is alive.
*/
bool
tfw_apm_hm_srv_alive(int status, TfwStr *body, struct sk_buff *skb_head,
void *apmref)
tfw_apm_hm_srv_alive(TfwHttpResp *resp, TfwServer *srv)
{
TfwApmHM *hm = READ_ONCE(((TfwApmRef *)apmref)->hmctl.hm);
int status = resp->status;
TfwStr *body = &resp->body;
TfwApmHM *hm = READ_ONCE(((TfwApmRef *)(srv->apmref))->hmctl.hm);
u32 crc32 = 0;
TfwMsgIter it;
TfwStr chunk = {0};
Expand All @@ -1363,7 +1372,7 @@ tfw_apm_hm_srv_alive(int status, TfwStr *body, struct sk_buff *skb_head,
}

if (unlikely(tfw_body_iter_init(&it, &chunk, body->data, body->skb,
skb_head)))
resp->msg.skb_head)))
{
T_WARN_NL("Invalid body. Health monitor '%s': status '%d' \n",
hm->name, status);
Expand All @@ -1375,9 +1384,9 @@ tfw_apm_hm_srv_alive(int status, TfwStr *body, struct sk_buff *skb_head,
* from body of first response and store it into monitor.
*/
if (!hm->crc32 && hm->auto_crc) {
hm->crc32 = __tfw_apm_crc32_calc(&it, &chunk, skb_head, body);
hm->crc32 = __tfw_apm_crc32_calc(&it, &chunk);
} else if (hm->crc32) {
crc32 = __tfw_apm_crc32_calc(&it, &chunk, skb_head, body);
crc32 = __tfw_apm_crc32_calc(&it, &chunk);
if (hm->crc32 != crc32)
goto crc_err;
}
Expand Down Expand Up @@ -1704,7 +1713,7 @@ tfw_apm_create_def_hm(void)
}

static TfwApmHMCfg *
tfw_amp_create_hm_entry(void)
tfw_apm_create_hm_entry(void)
{
TfwApmHMCfg *hm_entry = kzalloc(sizeof(TfwApmHMCfg), GFP_KERNEL);
if (!hm_entry)
Expand All @@ -1725,7 +1734,7 @@ tfw_apm_create_def_health_stat_srv(void)
if (tfw_hm_cfg_200_created)
return 0;

hm_entry = tfw_amp_create_hm_entry();
hm_entry = tfw_apm_create_hm_entry();
if (!hm_entry)
return -ENOMEM;

Expand Down Expand Up @@ -1901,7 +1910,7 @@ tfw_cfgop_apm_server_failover(TfwCfgSpec *cs, TfwCfgEntry *ce)
if (tfw_cfg_check_range(tframe, 1, USHRT_MAX))
return -EINVAL;

hm_entry = tfw_amp_create_hm_entry();
hm_entry = tfw_apm_create_hm_entry();
if (!hm_entry)
return -ENOMEM;
tfw_hm_entry_set_code(hm_entry, code);
Expand All @@ -1928,7 +1937,7 @@ tfw_cfgop_apm_health_stat_srv(TfwCfgSpec *cs, TfwCfgEntry *ce)
val);
return -EINVAL;
}
hm_entry = tfw_amp_create_hm_entry();
hm_entry = tfw_apm_create_hm_entry();
if (!hm_entry)
return -ENOMEM;
tfw_hm_entry_set_code(hm_entry, code);
Expand Down
3 changes: 1 addition & 2 deletions fw/apm.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,7 @@ TfwHMStats *tfw_apm_hm_stats(void *apmref);

/* Health monitor procedures ('health_check' directive). */
void tfw_apm_hm_srv_rcount_update(TfwStr *uri_path, void *apmref);
bool tfw_apm_hm_srv_alive(int status, TfwStr *body, struct sk_buff *skb_head,
void *apmref);
bool tfw_apm_hm_srv_alive(TfwHttpResp *resp, TfwServer *srv);
void tfw_apm_hm_enable_srv(TfwServer *srv, const char *hm_name);
void tfw_apm_hm_disable_srv(TfwServer *srv);
bool tfw_apm_hm_srv_eq(const char *name, TfwServer *srv);
Expand Down
66 changes: 39 additions & 27 deletions fw/http.c
Original file line number Diff line number Diff line change
Expand Up @@ -1616,54 +1616,66 @@ do { \
}
}

static bool
tfw_http_hm_suspend(TfwHttpResp *resp, TfwServer *srv)
/**
* Try to mark server as suspended.
* In case of HM is active do it, otherwise left unchanged.
*/
static void
tfw_http_hm_try_suspend(TfwHttpResp *resp, TfwServer *srv)
{
unsigned long old_flags, flags = READ_ONCE(srv->flags);
/*
* We need to count a total response statistics in
* tfw_apm_hm_srv_limit(), even if health monitor is disabled.
* In this case limit calculation won't be accounted.
*/
bool lim_exceeded = tfw_apm_hm_srv_limit(resp->status, srv->apmref);

if (!(flags & TFW_SRV_F_HMONITOR))
return true;
if (!lim_exceeded)
return false;
while (flags & TFW_SRV_F_HMONITOR) {

do {
old_flags = cmpxchg(&srv->flags, flags,
flags | TFW_SRV_F_SUSPEND);
flags | TFW_SRV_F_SUSPEND);

if (likely(old_flags == flags)) {
T_WARN_ADDR_STATUS("server has been suspended: limit "
"for bad responses is exceeded",
&srv->addr, TFW_WITH_PORT,
resp->status);
break;
}
flags = old_flags;
} while (flags & TFW_SRV_F_HMONITOR);

return true;
flags = old_flags;
}
}

/**
* The main function of Health Monioring.
* Getting response from the server, it updates responses statistics,
* checks HM limits and makes solution about server health.
krizhanovsky marked this conversation as resolved.
Show resolved Hide resolved
*/
static void
tfw_http_hm_control(TfwHttpResp *resp)
{
TfwServer *srv = (TfwServer *)resp->conn->peer;

if (tfw_http_hm_suspend(resp, srv))
return;
/*
* Total response statistics is counted permanently regardless
* of the state of the health monitor.
*/
bool lim_exceeded = tfw_apm_hm_srv_limit(resp->status, srv->apmref);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we calc the limit before checking flag srv->flags & TFW_SRV_F_HMONITOR, seems we calc the limit even for servers doesn't have health monitor. Or am I wrong?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, because these counters used for statistics too.
Perhaps this can be done much more elegantly, but I have kept the existing logic because statistics go beyond that.


if (!tfw_srv_suspended(srv) ||
!tfw_apm_hm_srv_alive(resp->status, &resp->body, resp->msg.skb_head,
srv->apmref))
{
if (!(srv->flags & TFW_SRV_F_HMONITOR))
return;

if (tfw_srv_suspended(srv)) {
T_DBG_ADDR("Server suspended", &srv->addr, TFW_WITH_PORT);
return;
}

tfw_srv_mark_alive(srv);
if (lim_exceeded) {
T_WARN_ADDR("Error limit exceeded for server",
&srv->addr, TFW_WITH_PORT);
tfw_http_hm_try_suspend(resp, srv);
}

if (tfw_apm_hm_srv_alive(resp, srv)) {
T_DBG_ADDR("Mark server alive", &srv->addr, TFW_WITH_PORT);
tfw_srv_mark_alive(srv);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function must be renamed or commented here: it's very confusing to see a function call with name 'mark_alive' right after thec all 'try_suspend'

Copy link
Contributor Author

@enuribekov-tempesta enuribekov-tempesta Aug 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tfw_http_hm_try_suspend() is the conditional function and has following comment:

/*
 * Try to mark server as suspended.
 * In case of HM is active do it, otherwise left unchanged.
 */

tfw_srv_mark_alive() is just inline for clearing flag

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logic is not clear for me. We trying to suspend server using tfw_http_hm_try_suspend even in case if server was suspended in previous call we checking is it alive tfw_apm_hm_srv_alive. Why?

}
}

static inline void
Expand Down Expand Up @@ -5870,7 +5882,7 @@ tfw_http_req_process(TfwConn *conn, TfwStream *stream, struct sk_buff *skb,
case T_BAD:
T_DBG2("Drop invalid HTTP request\n");
TFW_INC_STAT_BH(clnt.msgs_parserr);
return tfw_http_req_parse_drop_with_fin(req, 400, NULL,
return tfw_http_req_parse_drop_with_fin(req, 400, NULL,
r == T_COMPRESSION
? HTTP2_ECODE_COMPRESSION
: HTTP2_ECODE_PROTO);
Expand Down Expand Up @@ -7575,7 +7587,7 @@ tfw_cfgop_max_header_list_size(TfwCfgSpec *cs, TfwCfgEntry *ce)
return -EINVAL;
if (ce->attr_n) {
T_ERR_NL("Unexpected attributes\n");
return -EINVAL;
return -EINVAL;
}

r = tfw_cfg_parse_uint(ce->vals[0], &max_header_list_size);
Expand All @@ -7587,7 +7599,7 @@ tfw_cfgop_max_header_list_size(TfwCfgSpec *cs, TfwCfgEntry *ce)

return 0;
}

static void
tfw_cfgop_cleanup_max_header_list_size(TfwCfgSpec *cs)
{
Expand Down