-
Notifications
You must be signed in to change notification settings - Fork 2.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
rebase heartbeat branch #428
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,6 +24,30 @@ | |
|
||
static uint32_t ctx_id; /* context generation */ | ||
|
||
static void | ||
core_failed_servers_init(struct context *ctx) | ||
{ | ||
int i; | ||
|
||
for (i = 0; i < 2; i++) { | ||
array_init(&(ctx->failed_servers[i]), 10, sizeof(struct server *)); | ||
} | ||
} | ||
|
||
static void | ||
core_failed_servers_deinit(struct context *ctx) | ||
{ | ||
uint32_t i, n, nsize; | ||
|
||
for (i = 0; i < 2; i++) { | ||
nsize = array_n(&(ctx->failed_servers[i])); | ||
for (n = 0; n < nsize; n++) { | ||
array_pop(&(ctx->failed_servers[n])); | ||
} | ||
array_deinit(&(ctx->failed_servers[n])); | ||
} | ||
} | ||
|
||
static rstatus_t | ||
core_calc_connections(struct context *ctx) | ||
{ | ||
|
@@ -60,6 +84,11 @@ core_ctx_create(struct instance *nci) | |
ctx->stats = NULL; | ||
ctx->evb = NULL; | ||
array_null(&ctx->pool); | ||
array_null(&(ctx->failed_servers[0])); | ||
array_null(&(ctx->failed_servers[1])); | ||
ctx->failed_idx = 0; | ||
ctx->fails = &(ctx->failed_servers[0]); | ||
|
||
ctx->max_timeout = nci->stats_interval; | ||
ctx->timeout = ctx->max_timeout; | ||
ctx->max_nfd = 0; | ||
|
@@ -93,6 +122,8 @@ core_ctx_create(struct instance *nci) | |
return NULL; | ||
} | ||
|
||
core_failed_servers_init(ctx); | ||
|
||
/* create stats per server pool */ | ||
ctx->stats = stats_create(nci->stats_port, nci->stats_addr, nci->stats_interval, | ||
nci->hostname, &ctx->pool); | ||
|
@@ -261,6 +292,41 @@ core_error(struct context *ctx, struct conn *conn) | |
core_close(ctx, conn); | ||
} | ||
|
||
static void | ||
retry_connection(struct context *ctx) | ||
{ | ||
struct array *servers; | ||
int idx; | ||
struct server *server; | ||
int64_t now; | ||
uint32_t i, nsize; | ||
rstatus_t status; | ||
|
||
servers = ctx->fails; | ||
idx = (ctx->failed_idx == 0) ? 1 : 0; | ||
|
||
ctx->failed_idx = idx; | ||
ctx->fails = &(ctx->failed_servers[idx]); | ||
|
||
now = nc_usec_now(); | ||
nsize = array_n(servers); | ||
if (nsize == 0) { | ||
return; | ||
} | ||
|
||
for (i = 0; i < nsize; i++) { | ||
server = *(struct server **)array_pop(servers); | ||
if (server->next_retry == 0 || server->next_retry < now) { | ||
status = server_reconnect(ctx, server); | ||
if (status != NC_OK) { | ||
add_failed_server(ctx, server); | ||
} | ||
} else { | ||
add_failed_server(ctx, server); | ||
} | ||
} | ||
} | ||
|
||
static void | ||
core_timeout(struct context *ctx) | ||
{ | ||
|
@@ -272,14 +338,14 @@ core_timeout(struct context *ctx) | |
msg = msg_tmo_min(); | ||
if (msg == NULL) { | ||
ctx->timeout = ctx->max_timeout; | ||
return; | ||
break; | ||
} | ||
|
||
/* skip over req that are in-error or done */ | ||
|
||
if (msg->error || msg->done) { | ||
msg_tmo_delete(msg); | ||
continue; | ||
break; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the intent of changing this from a Should the return below also be a break;? (it seems like in the typical case, msg is null or reprocessed because it's removed before it times out, so this doesn't matter too much) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This also seems possibly concerning - what would happen if these still belonged to a connection (e.g. multiple heartbeats), and we retried a connection before processing some requests that had also errored on that connection. I guess that I see that retry_connection will call |
||
} | ||
|
||
/* | ||
|
@@ -304,6 +370,8 @@ core_timeout(struct context *ctx) | |
|
||
core_close(ctx, conn); | ||
} | ||
|
||
retry_connection(ctx); | ||
} | ||
|
||
rstatus_t | ||
|
@@ -324,6 +392,7 @@ core_core(void *arg, uint32_t events) | |
conn->client ? 'c' : (conn->proxy ? 'p' : 's'), conn->sd); | ||
|
||
conn->events = events; | ||
conn->restore(ctx, conn); | ||
|
||
/* error takes precedence over read | write */ | ||
if (events & EVENT_ERR) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: can move this below the
if (nsize == 0)
to avoid the call to gettimeofday when no servers fail