Skip to content

Commit

Permalink
Clean up ODS stats
Browse files Browse the repository at this point in the history
Summary:
## This stack

Improve logging to help troubleshooting.

## This diff

Bumping the stats from the logging methods helps keep things consistent by making the mapping between Scuba and ODS easy to discover and to understand. It also helps keep the main loop readable.

While this is mostly a refactoring, I noticed that we were using `process_failed` for two completely unrelated cases with different semantics, oops. Let's add a `process_retriable_error` to fix that ambiguity.

Reviewed By: mitrandir77

Differential Revision: D66699635

fbshipit-source-id: 224c4c9a73fbf1b1632c9a0ed2c5dcc5f6941c3c
  • Loading branch information
andreacampi authored and facebook-github-bot committed Dec 4, 2024
1 parent 60ad73e commit fbc6266
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 13 deletions.
37 changes: 29 additions & 8 deletions eden/mononoke/async_requests/worker/src/scuba.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,20 @@ use context::CoreContext;
use futures_stats::FutureStats;
use slog::info;
use source_control::AsyncRequestError;
use stats::define_stats;
use stats::prelude::*;

use crate::worker::AsyncMethodRequestWorker;

define_stats! {
prefix = "async_requests.worker.process";

process_complete_failed: timeseries("complete.failed"; Count),
process_retriable_error: timeseries("retriable.error"; Count),
process_succeeded: timeseries("succeeded"; Count),
process_error: timeseries("error"; Count),
}

impl AsyncMethodRequestWorker {
pub(crate) fn prepare_ctx(
&self,
Expand Down Expand Up @@ -59,24 +70,32 @@ pub(crate) fn log_result(
) {
let mut scuba = ctx.scuba().clone();

let (status, error) = match result.thrift() {
let (status, error, succeeded, complete_failed, method_error) = match result.thrift() {
ThriftAsynchronousRequestResult::error(error) => match error {
AsyncRequestError::request_error(error) => {
("REQUEST_ERROR", Some(format!("{:?}", error)))
("REQUEST_ERROR", Some(format!("{:?}", error)), 0, 0, 1)
}
AsyncRequestError::internal_error(error) => {
("INTERNAL_ERROR", Some(format!("{:?}", error)))
}
AsyncRequestError::UnknownField(error) => {
("UNKNOWN_ERROR", Some(format!("unknown error: {:?}", error)))
("INTERNAL_ERROR", Some(format!("{:?}", error)), 0, 0, 1)
}
AsyncRequestError::UnknownField(error) => (
"UNKNOWN_ERROR",
Some(format!("unknown error: {:?}", error)),
0,
0,
1,
),
},
_ => match complete_result {
Ok(_complete) => ("SUCCESS", None),
Err(err) => ("SAVE_ERROR", Some(err.to_string())),
Ok(_complete) => ("SUCCESS", None, 1, 0, 0),
Err(err) => ("SAVE_ERROR", Some(err.to_string()), 0, 1, 0),
},
};

STATS::process_succeeded.add_value(succeeded);
STATS::process_complete_failed.add_value(complete_failed);
STATS::process_error.add_value(method_error);

scuba.add_future_stats(stats);
scuba.add("status", status);

Expand All @@ -91,6 +110,8 @@ pub(crate) fn log_result(
pub(crate) fn log_retriable_error(ctx: CoreContext, stats: &FutureStats, error: Error) {
let mut scuba = ctx.scuba().clone();

STATS::process_retriable_error.add_value(1);

scuba.add_future_stats(stats);
scuba.add("status", "RETRIABLE_ERROR");
scuba.unsampled();
Expand Down
5 changes: 0 additions & 5 deletions eden/mononoke/async_requests/worker/src/worker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,7 @@ define_stats! {
cleanup_error: timeseries("cleanup.error"; Count),
dequeue_error: timeseries("dequeue.error"; Count),
process_aborted: timeseries("process.aborted"; Count),
process_complete_failed: timeseries("process.complete.failed"; Count),
process_failed: timeseries("process.failed"; Count),
process_succeeded: timeseries("process.succeeded"; Count),
requested: timeseries("requested"; Count),

stats_error: timeseries("stats.error"; Count),
Expand Down Expand Up @@ -341,7 +339,6 @@ impl AsyncMethodRequestWorker {
// Save the result.
match result {
Ok(work_result) => {
STATS::process_succeeded.add_value(1);
let complete_result = self
.queue
.complete(&ctx, &req_id, work_result.clone())
Expand All @@ -355,7 +352,6 @@ impl AsyncMethodRequestWorker {
);
}
Err(err) => {
STATS::process_complete_failed.add_value(1);
error!(
ctx.logger(),
"[{}] failed to save result: {:?}", &req_id.0, err
Expand All @@ -364,7 +360,6 @@ impl AsyncMethodRequestWorker {
};
}
Err(err) => {
STATS::process_failed.add_value(1);
info!(
ctx.logger(),
"[{}] worker failed to process request, will retry: {:?}",
Expand Down

0 comments on commit fbc6266

Please sign in to comment.