Skip to content

Commit

Permalink
Improve handling of tasks in coordinator, one per Tributary scanner
Browse files Browse the repository at this point in the history
  • Loading branch information
kayabaNerve committed Sep 26, 2023
1 parent 9f3840d commit 60491a0
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 98 deletions.
188 changes: 99 additions & 89 deletions coordinator/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ async fn add_tributary<D: Db, Pro: Processors, P: P2p>(
spec.n(),
spec
.i(Ristretto::generator() * key.deref())
.expect("adding a tribuary for a set we aren't in set for"),
.expect("adding a tribtuary for a set we aren't in set for"),
)
.unwrap(),
},
Expand Down Expand Up @@ -195,13 +195,22 @@ pub async fn scan_substrate<D: Db, Pro: Processors>(
}
}

pub(crate) trait RIDTrait<FRid>:
Clone + Fn(NetworkId, [u8; 32], RecognizedIdType, [u8; 32], u32) -> FRid
{
}
impl<FRid, F: Clone + Fn(NetworkId, [u8; 32], RecognizedIdType, [u8; 32], u32) -> FRid>
RIDTrait<FRid> for F
{
}

#[allow(clippy::type_complexity)]
pub async fn scan_tributaries<
pub(crate) async fn scan_tributaries<
D: Db,
Pro: Processors,
P: P2p,
FRid: Future<Output = ()>,
RID: Clone + Fn(NetworkId, [u8; 32], RecognizedIdType, [u8; 32], u32) -> FRid,
FRid: Send + Future<Output = ()>,
RID: 'static + Send + Sync + RIDTrait<FRid>,
>(
raw_db: D,
key: Zeroizing<<Ristretto as Ciphersuite>::F>,
Expand All @@ -213,64 +222,74 @@ pub async fn scan_tributaries<
) {
log::info!("scanning tributaries");

// Handle new Tributary blocks
let mut tributary_readers = vec![];
let mut tributary_db = tributary::TributaryDb::new(raw_db.clone());
loop {
while let Ok(ActiveTributary { spec, tributary }) = {
match new_tributary.try_recv() {
Ok(tributary) => Ok(tributary),
Err(broadcast::error::TryRecvError::Empty) => Err(()),
Err(broadcast::error::TryRecvError::Lagged(_)) => {
panic!("scan_tributaries lagged to handle new_tributary")
}
Err(broadcast::error::TryRecvError::Closed) => panic!("new_tributary sender closed"),
}
} {
tributary_readers.push((spec, tributary.reader()));
}

for (spec, reader) in &tributary_readers {
tributary::scanner::handle_new_blocks::<_, _, _, _, _, _, P>(
&mut tributary_db,
&key,
recognized_id.clone(),
&processors,
|set, tx| {
match new_tributary.recv().await {
Ok(ActiveTributary { spec, tributary }) => {
// For each Tributary, spawn a dedicated scanner task
tokio::spawn({
let raw_db = raw_db.clone();
let key = key.clone();
let recognized_id = recognized_id.clone();
let p2p = p2p.clone();
let processors = processors.clone();
let serai = serai.clone();
async move {
let spec = &spec;
let reader = tributary.reader();
let mut tributary_db = tributary::TributaryDb::new(raw_db.clone());
loop {
match serai.publish(&tx).await {
Ok(_) => {
log::info!("set key pair for {set:?}");
break;
}
// This is assumed to be some ephemeral error due to the assumed fault-free
// creation
// TODO2: Differentiate connection errors from invariants
Err(e) => {
// Check if this failed because the keys were already set by someone else
if matches!(serai.get_keys(spec.set()).await, Ok(Some(_))) {
log::info!("another coordinator set key pair for {:?}", set);
break;
tributary::scanner::handle_new_blocks::<_, _, _, _, _, _, P>(
&mut tributary_db,
&key,
recognized_id.clone(),
&processors,
|set, tx| {
let serai = serai.clone();
async move {
loop {
match serai.publish(&tx).await {
Ok(_) => {
log::info!("set key pair for {set:?}");
break;
}
// This is assumed to be some ephemeral error due to the assumed fault-free
// creation
// TODO2: Differentiate connection errors from invariants
Err(e) => {
// Check if this failed because the keys were already set by someone else
if matches!(serai.get_keys(spec.set()).await, Ok(Some(_))) {
log::info!("another coordinator set key pair for {:?}", set);
break;
}

log::error!(
"couldn't connect to Serai node to publish set_keys TX: {:?}",
e
);
tokio::time::sleep(Duration::from_secs(10)).await;
}
}
}
}

log::error!("couldn't connect to Serai node to publish set_keys TX: {:?}", e);
tokio::time::sleep(Duration::from_secs(10)).await;
}
}
},
spec,
&reader,
)
.await;

// Sleep for half the block time
// TODO: Define a notification system for when a new block occurs
sleep(Duration::from_secs((Tributary::<D, Transaction, P>::block_time() / 2).into()))
.await;
}
}
},
spec,
reader,
)
.await;
});
}
Err(broadcast::error::RecvError::Lagged(_)) => {
panic!("scan_tributaries lagged to handle new_tributary")
}
Err(broadcast::error::RecvError::Closed) => panic!("new_tributary sender closed"),
}

// Sleep for half the block time
// TODO2: Define a notification system for when a new block occurs
sleep(Duration::from_secs((Tributary::<D, Transaction, P>::block_time() / 2).into())).await;
}
}

Expand All @@ -288,7 +307,7 @@ pub async fn heartbeat_tributaries<D: Db, P: P2p>(
Ok(tributary) => Ok(tributary),
Err(broadcast::error::TryRecvError::Empty) => Err(()),
Err(broadcast::error::TryRecvError::Lagged(_)) => {
panic!("heartbeat lagged to handle new_tributary")
panic!("heartbeat_tributaries lagged to handle new_tributary")
}
Err(broadcast::error::TryRecvError::Closed) => panic!("new_tributary sender closed"),
}
Expand Down Expand Up @@ -328,30 +347,27 @@ pub async fn handle_p2p<D: Db, P: P2p>(
p2p: P,
mut new_tributary: broadcast::Receiver<ActiveTributary<D, P>>,
) {
// TODO: Merge this into the below loop. We don't need an extra task here
let tributaries = Arc::new(RwLock::new(HashMap::new()));
tokio::spawn({
let tributaries = tributaries.clone();
async move {
loop {
match new_tributary.recv().await {
Ok(tributary) => {
tributaries.write().await.insert(tributary.spec.genesis(), tributary);
}
Err(broadcast::error::RecvError::Lagged(_)) => {
panic!("handle_p2p lagged to handle new_tributary")
}
Err(broadcast::error::RecvError::Closed) => panic!("new_tributary sender closed"),
loop {
while let Ok(tributary) = {
match new_tributary.try_recv() {
Ok(tributary) => Ok(tributary),
Err(broadcast::error::TryRecvError::Empty) => Err(()),
Err(broadcast::error::TryRecvError::Lagged(_)) => {
panic!("handle_p2p lagged to handle new_tributary")
}
Err(broadcast::error::TryRecvError::Closed) => panic!("new_tributary sender closed"),
}
} {
// TODO: Because the below maintains a read lock, this will never process until all prior P2P
// messages were handled. That's a notable latency spike
tributaries.write().await.insert(tributary.spec.genesis(), tributary);
}
});

loop {
let mut msg = p2p.receive().await;
// Spawn a dedicated task to handle this message, ensuring any singularly latent message
// doesn't hold everything up
// TODO2: Move to one task per tributary (or two. One for Tendermint, one for Tributary)
// TODO: Move to one task per tributary (or two. One for Tendermint, one for Tributary)
tokio::spawn({
let p2p = p2p.clone();
let tributaries = tributaries.clone();
Expand Down Expand Up @@ -494,26 +510,21 @@ pub async fn handle_processors<D: Db, Pro: Processors, P: P2p>(
) {
let pub_key = Ristretto::generator() * key.deref();

// TODO: Merge this into the below loop. We don't need an extra task here
let tributaries = Arc::new(RwLock::new(HashMap::new()));
tokio::spawn({
let tributaries = tributaries.clone();
async move {
loop {
match new_tributary.recv().await {
Ok(tributary) => {
tributaries.write().await.insert(tributary.spec.genesis(), tributary);
}
Err(broadcast::error::RecvError::Lagged(_)) => {
panic!("handle_processors lagged to handle new_tributary")
}
Err(broadcast::error::RecvError::Closed) => panic!("new_tributary sender closed"),
let mut tributaries = HashMap::new();
loop {
while let Ok(tributary) = {
match new_tributary.try_recv() {
Ok(tributary) => Ok(tributary),
Err(broadcast::error::TryRecvError::Empty) => Err(()),
Err(broadcast::error::TryRecvError::Lagged(_)) => {
panic!("handle_processors lagged to handle new_tributary")
}
Err(broadcast::error::TryRecvError::Closed) => panic!("new_tributary sender closed"),
}
} {
tributaries.insert(tributary.spec.genesis(), tributary);
}
});

loop {
// TODO: Dispatch this message to a task dedicated to handling this processor, preventing one
// processor from holding up all the others. This would require a peek method be added to the
// message-queue (to view multiple future messages at once)
Expand All @@ -525,7 +536,7 @@ pub async fn handle_processors<D: Db, Pro: Processors, P: P2p>(
// (which means there's a lack of multisig rotation)
let spec = {
let mut spec = None;
for tributary in tributaries.read().await.values() {
for tributary in tributaries.values() {
if tributary.spec.set().network == msg.network {
spec = Some(tributary.spec.clone());
break;
Expand Down Expand Up @@ -763,7 +774,6 @@ pub async fn handle_processors<D: Db, Pro: Processors, P: P2p>(
// If this created a transaction, publish it
if let Some(mut tx) = tx {
log::trace!("processor message effected transaction {}", hex::encode(tx.hash()));
let tributaries = tributaries.read().await;
let Some(tributary) = tributaries.get(&genesis) else {
// TODO: This can happen since Substrate tells the Processor to generate commitments
// at the same time it tells the Tributary to be created
Expand Down
5 changes: 2 additions & 3 deletions coordinator/src/tributary/handle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ use frost_schnorrkel::Schnorrkel;

use serai_client::{
Signature,
primitives::NetworkId,
validator_sets::primitives::{ValidatorSet, KeyPair, musig_context, set_keys_message},
subxt::utils::Encoded,
Serai,
Expand Down Expand Up @@ -225,13 +224,13 @@ pub fn generated_key_pair<D: Db>(
DkgConfirmer::share(spec, key, attempt, preprocesses, key_pair)
}

pub async fn handle_application_tx<
pub(crate) async fn handle_application_tx<
D: Db,
Pro: Processors,
FPst: Future<Output = ()>,
PST: Clone + Fn(ValidatorSet, Encoded) -> FPst,
FRid: Future<Output = ()>,
RID: Clone + Fn(NetworkId, [u8; 32], RecognizedIdType, [u8; 32], u32) -> FRid,
RID: crate::RIDTrait<FRid>,
>(
tx: Transaction,
spec: &TributarySpec,
Expand Down
10 changes: 4 additions & 6 deletions coordinator/src/tributary/scanner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@ use zeroize::Zeroizing;

use ciphersuite::{Ciphersuite, Ristretto};

use serai_client::{
primitives::NetworkId, validator_sets::primitives::ValidatorSet, subxt::utils::Encoded,
};
use serai_client::{validator_sets::primitives::ValidatorSet, subxt::utils::Encoded};

use tributary::{
Transaction as TributaryTransaction, Block, TributaryReader,
Expand Down Expand Up @@ -40,7 +38,7 @@ async fn handle_block<
FPst: Future<Output = ()>,
PST: Clone + Fn(ValidatorSet, Encoded) -> FPst,
FRid: Future<Output = ()>,
RID: Clone + Fn(NetworkId, [u8; 32], RecognizedIdType, [u8; 32], u32) -> FRid,
RID: crate::RIDTrait<FRid>,
P: P2p,
>(
db: &mut TributaryDb<D>,
Expand Down Expand Up @@ -101,13 +99,13 @@ async fn handle_block<
// TODO2: Trigger any necessary re-attempts
}

pub async fn handle_new_blocks<
pub(crate) async fn handle_new_blocks<
D: Db,
Pro: Processors,
FPst: Future<Output = ()>,
PST: Clone + Fn(ValidatorSet, Encoded) -> FPst,
FRid: Future<Output = ()>,
RID: Clone + Fn(NetworkId, [u8; 32], RecognizedIdType, [u8; 32], u32) -> FRid,
RID: crate::RIDTrait<FRid>,
P: P2p,
>(
db: &mut TributaryDb<D>,
Expand Down

0 comments on commit 60491a0

Please sign in to comment.