Skip to content

Commit

Permalink
Improve autopilot liveness check (#2236)
Browse files Browse the repository at this point in the history
# Description
This PR creates a shared liveness implementation between shadow and
regular autopilot mode. Both now populate a thread-safe last auction
timestamp whenever an auction has processed. The liveness check compares
the elapsed time since that recorded timestamp with the maximum auction
age.
<!-- List of detailed changes (how the change is accomplished) -->
# Changes
- [x] Liveness checks are based on the last timestamp an auction runloop
has successfully completed.
- [x] The same liveness implementation is used across shadow and regular
autopilot mode. Regular autopilot no longer uses the last update time in
the solvable orders cache.

## How to test
This can be tested manually by running the autopilot locally and
checking http://localhost:9589/liveness. It responds with 200 if the
autopilot is considered alive, 503 otherwise. Max auction age can also
be tweaked using the --max-auction-age argument when running the
autopilot.

## Related Issues

- Fixes  #2090
  • Loading branch information
KRD-Kai authored Jan 15, 2024
1 parent 398351d commit 74fdf64
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 22 deletions.
41 changes: 30 additions & 11 deletions crates/autopilot/src/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,21 +56,39 @@ use {
token_info::{CachedTokenInfoFetcher, TokenInfoFetcher},
token_list::{AutoUpdatingTokenList, TokenListConfiguration},
},
std::{collections::HashSet, sync::Arc, time::Duration},
std::{
collections::HashSet,
sync::{Arc, RwLock},
time::{Duration, Instant},
},
tracing::Instrument,
url::Url,
};

struct Liveness {
solvable_orders_cache: Arc<SolvableOrdersCache>,
pub struct Liveness {
max_auction_age: Duration,
last_auction_time: RwLock<Instant>,
}

#[async_trait::async_trait]
impl LivenessChecking for Liveness {
async fn is_alive(&self) -> bool {
let age = self.solvable_orders_cache.last_update_time().elapsed();
age <= self.max_auction_age
let last_auction_time = self.last_auction_time.read().unwrap();
let auction_age = last_auction_time.elapsed();
auction_age <= self.max_auction_age
}
}

impl Liveness {
pub fn new(max_auction_age: Duration) -> Liveness {
Liveness {
max_auction_age,
last_auction_time: RwLock::new(Instant::now()),
}
}

pub fn auction(&self) {
*self.last_auction_time.write().unwrap() = Instant::now();
}
}

Expand Down Expand Up @@ -549,11 +567,9 @@ pub async fn run(args: Arguments) {
.update(block)
.await
.expect("failed to perform initial solvable orders update");
let liveness = Liveness {
max_auction_age: args.max_auction_age,
solvable_orders_cache: solvable_orders_cache.clone(),
};
shared::metrics::serve_metrics(Arc::new(liveness), args.metrics_address);

let liveness = Arc::new(Liveness::new(args.max_auction_age));
shared::metrics::serve_metrics(liveness.clone(), args.metrics_address);

let on_settlement_event_updater =
crate::on_settlement_event_updater::OnSettlementEventUpdater {
Expand Down Expand Up @@ -607,6 +623,7 @@ pub async fn run(args: Arguments) {
in_flight_orders: Default::default(),
persistence: infra::persistence::Persistence::new(args.s3.into().unwrap(), Arc::new(db))
.await,
liveness: liveness.clone(),
};
run.run_forever().await;
unreachable!("run loop exited");
Expand Down Expand Up @@ -653,14 +670,16 @@ async fn shadow_mode(args: Arguments) -> ! {
.await
};

shared::metrics::serve_metrics(Arc::new(shadow::Liveness), args.metrics_address);
let liveness = Arc::new(Liveness::new(args.max_auction_age));
shared::metrics::serve_metrics(liveness.clone(), args.metrics_address);

let shadow = shadow::RunLoop::new(
orderbook,
drivers,
trusted_tokens,
args.score_cap,
args.solve_deadline,
liveness.clone(),
);
shadow.run_forever().await;

Expand Down
4 changes: 4 additions & 0 deletions crates/autopilot/src/run_loop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use {
solve::{self, TradedAmounts},
},
infra::{self, persistence::dto},
run::Liveness,
solvable_orders::SolvableOrdersCache,
},
::observe::metrics,
Expand Down Expand Up @@ -49,6 +50,7 @@ pub struct RunLoop {
pub max_settlement_transaction_wait: Duration,
pub solve_deadline: Duration,
pub in_flight_orders: Arc<Mutex<InFlightOrders>>,
pub liveness: Arc<Liveness>,
}

impl RunLoop {
Expand All @@ -64,6 +66,8 @@ impl RunLoop {
|| last_block.replace(current_block) != Some(current_block)
{
observe::log_auction_delta(id, &previous, &auction);
self.liveness.auction();

self.single_run(id, auction)
.instrument(tracing::info_span!("auction", id))
.await;
Expand Down
18 changes: 7 additions & 11 deletions crates/autopilot/src/shadow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,26 +16,18 @@ use {
solve::{self},
},
infra,
run::Liveness,
run_loop::{self, observe},
},
::observe::metrics,
number::nonzero::U256 as NonZeroU256,
primitive_types::{H160, U256},
rand::seq::SliceRandom,
shared::{metrics::LivenessChecking, token_list::AutoUpdatingTokenList},
std::{cmp, time::Duration},
shared::token_list::AutoUpdatingTokenList,
std::{cmp, sync::Arc, time::Duration},
tracing::Instrument,
};

pub struct Liveness;
#[async_trait::async_trait]
impl LivenessChecking for Liveness {
async fn is_alive(&self) -> bool {
// can we somehow check that we keep processing auctions?
true
}
}

pub struct RunLoop {
orderbook: infra::shadow::Orderbook,
drivers: Vec<Driver>,
Expand All @@ -44,6 +36,7 @@ pub struct RunLoop {
block: u64,
score_cap: U256,
solve_deadline: Duration,
liveness: Arc<Liveness>,
}

impl RunLoop {
Expand All @@ -53,6 +46,7 @@ impl RunLoop {
trusted_tokens: AutoUpdatingTokenList,
score_cap: U256,
solve_deadline: Duration,
liveness: Arc<Liveness>,
) -> Self {
Self {
orderbook,
Expand All @@ -62,6 +56,7 @@ impl RunLoop {
block: 0,
score_cap,
solve_deadline,
liveness,
}
}

Expand All @@ -74,6 +69,7 @@ impl RunLoop {
};
observe::log_auction_delta(id, &previous, &auction);
previous = Some(auction.clone());
self.liveness.auction();

self.single_run(id, auction)
.instrument(tracing::info_span!("auction", id))
Expand Down

0 comments on commit 74fdf64

Please sign in to comment.