diff --git a/.github/workflows/mini-tests.yml b/.github/workflows/mini-tests.yml new file mode 100644 index 000000000..634d30b69 --- /dev/null +++ b/.github/workflows/mini-tests.yml @@ -0,0 +1,28 @@ +name: mini/ Tests + +on: + push: + branches: + - develop + paths: + - "mini/**" + + pull_request: + paths: + - "mini/**" + + workflow_dispatch: + +jobs: + test-common: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac + + - name: Test Dependencies + uses: ./.github/actions/test-dependencies + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Run Tests + run: GITHUB_CI=true RUST_BACKTRACE=1 cargo test --all-features -p mini-serai diff --git a/Cargo.lock b/Cargo.lock index 07aa481f1..a3bb7df35 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3068,6 +3068,19 @@ dependencies = [ "serde_json", ] +[[package]] +name = "generator" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cc16584ff22b460a382b7feec54b23d2908d858152e5739a120b949293bd74e" +dependencies = [ + "cc", + "libc", + "log", + "rustversion", + "windows 0.48.0", +] + [[package]] name = "generic-array" version = "0.14.7" @@ -4505,6 +4518,19 @@ version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +[[package]] +name = "loom" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86a17963e5073acf8d3e2637402657c6b467218f36fe10d696b3e1095ae019bf" +dependencies = [ + "cfg-if", + "generator", + "scoped-tls", + "tracing", + "tracing-subscriber 0.3.17", +] + [[package]] name = "lru" version = "0.10.1" @@ -4615,6 +4641,15 @@ dependencies = [ "regex-automata 0.1.10", ] +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.10", +] + [[package]] name = "matches" version = "0.1.10" @@ -4701,6 +4736,13 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "mini-serai" +version = "0.1.0" +dependencies = [ + "loom", +] + [[package]] name = "minimal-ed448" version = "0.4.0" @@ -5147,6 +5189,16 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + [[package]] name = "num-bigint" version = "0.4.4" @@ -5357,6 +5409,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + [[package]] name = "p256" version = "0.13.2" @@ -7694,7 +7752,7 @@ dependencies = [ "thiserror", "tracing", "tracing-log", - "tracing-subscriber", + "tracing-subscriber 0.2.25", ] [[package]] @@ -7931,6 +7989,12 @@ dependencies = [ "zeroize", ] +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + [[package]] name = "scopeguard" version = "1.2.0" @@ -8102,7 +8166,6 @@ dependencies = [ "frost-schnorrkel", "futures", "hex", - "lazy_static", "libp2p", "log", "modular-frost", @@ -9311,7 +9374,7 @@ dependencies = [ "sp-std", "tracing", "tracing-core", - "tracing-subscriber", + "tracing-subscriber 0.2.25", ] [[package]] @@ -10198,7 +10261,7 @@ dependencies = [ "ansi_term", "chrono", "lazy_static", - "matchers", + "matchers 0.0.1", "parking_lot 0.11.2", "regex", "serde", @@ -10212,6 +10275,24 @@ dependencies = [ "tracing-serde", ] +[[package]] +name = "tracing-subscriber" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77" +dependencies = [ + "matchers 0.1.0", + "nu-ansi-term", + "once_cell", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + [[package]] name = "tributary-chain" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 41535fea0..dbf6910e4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -51,6 +51,8 @@ members = [ "substrate/client", + "mini", + "tests/no-std", "tests/docker", diff --git a/coins/bitcoin/src/wallet/mod.rs b/coins/bitcoin/src/wallet/mod.rs index abc10cab8..7cff854b1 100644 --- a/coins/bitcoin/src/wallet/mod.rs +++ b/coins/bitcoin/src/wallet/mod.rs @@ -71,6 +71,11 @@ impl ReceivedOutput { self.offset } + /// The Bitcoin output for this output. + pub fn output(&self) -> &TxOut { + &self.output + } + /// The outpoint for this output. pub fn outpoint(&self) -> &OutPoint { &self.outpoint diff --git a/coins/bitcoin/src/wallet/send.rs b/coins/bitcoin/src/wallet/send.rs index 007ba527a..7dde21867 100644 --- a/coins/bitcoin/src/wallet/send.rs +++ b/coins/bitcoin/src/wallet/send.rs @@ -116,6 +116,12 @@ impl SignableTransaction { self.needed_fee } + /// Returns the fee this transaction will use. + pub fn fee(&self) -> u64 { + self.prevouts.iter().map(|prevout| prevout.value).sum::() - + self.tx.output.iter().map(|prevout| prevout.value).sum::() + } + /// Create a new SignableTransaction. /// /// If a change address is specified, any leftover funds will be sent to it if the leftover funds diff --git a/coordinator/Cargo.toml b/coordinator/Cargo.toml index 810d12a83..d00e9abe2 100644 --- a/coordinator/Cargo.toml +++ b/coordinator/Cargo.toml @@ -15,7 +15,6 @@ rustdoc-args = ["--cfg", "docsrs"] [dependencies] async-trait = "0.1" -lazy_static = "1" zeroize = "^1.5" rand_core = "0.6" diff --git a/coordinator/src/main.rs b/coordinator/src/main.rs index a242bde46..0a75c16d4 100644 --- a/coordinator/src/main.rs +++ b/coordinator/src/main.rs @@ -6,7 +6,7 @@ use core::{ops::Deref, future::Future}; use std::{ sync::Arc, time::{SystemTime, Duration}, - collections::{VecDeque, HashMap}, + collections::HashMap, }; use zeroize::{Zeroize, Zeroizing}; @@ -27,15 +27,17 @@ use serai_client::{primitives::NetworkId, Public, Serai}; use message_queue::{Service, client::MessageQueue}; use futures::stream::StreamExt; -use tokio::{sync::RwLock, time::sleep}; - -use ::tributary::{ - ReadWrite, ProvidedError, TransactionKind, TransactionTrait, Block, Tributary, TributaryReader, +use tokio::{ + sync::{RwLock, mpsc, broadcast}, + time::sleep, }; +use ::tributary::{ReadWrite, ProvidedError, TransactionKind, TransactionTrait, Block, Tributary}; + mod tributary; -#[rustfmt::skip] -use crate::tributary::{TributarySpec, SignData, Transaction, TributaryDb, scanner::RecognizedIdType}; +use crate::tributary::{ + TributarySpec, SignData, Transaction, TributaryDb, NonceDecider, scanner::RecognizedIdType, +}; mod db; use db::MainDb; @@ -53,26 +55,21 @@ mod substrate; #[cfg(test)] pub mod tests; -lazy_static::lazy_static! { - // This is a static to satisfy lifetime expectations - static ref NEW_TRIBUTARIES: RwLock> = RwLock::new(VecDeque::new()); -} - +#[derive(Clone)] pub struct ActiveTributary { pub spec: TributarySpec, - pub tributary: Arc>>, + pub tributary: Arc>, } -type Tributaries = HashMap<[u8; 32], ActiveTributary>; - -// Adds a tributary into the specified HahMap -async fn add_tributary( +// Adds a tributary into the specified HashMap +async fn add_tributary( db: D, key: Zeroizing<::F>, + processors: &Pro, p2p: P, - tributaries: &mut Tributaries, + tributaries: &broadcast::Sender>, spec: TributarySpec, -) -> TributaryReader { +) { log::info!("adding tributary {:?}", spec.set()); let tributary = Tributary::<_, Transaction, _>::new( @@ -80,21 +77,40 @@ async fn add_tributary( db, spec.genesis(), spec.start_time(), - key, + key.clone(), spec.validators(), p2p, ) .await .unwrap(); - let reader = tributary.reader(); - - tributaries.insert( - tributary.genesis(), - ActiveTributary { spec, tributary: Arc::new(RwLock::new(tributary)) }, - ); + // Trigger a DKG for the newly added Tributary + // If we're rebooting, we'll re-fire this message + // This is safe due to the message-queue deduplicating based off the intent system + let set = spec.set(); + processors + .send( + set.network, + processor_messages::CoordinatorMessage::KeyGen( + processor_messages::key_gen::CoordinatorMessage::GenerateKey { + id: processor_messages::key_gen::KeyGenId { set, attempt: 0 }, + params: frost::ThresholdParams::new( + spec.t(), + spec.n(), + spec + .i(Ristretto::generator() * key.deref()) + .expect("adding a tribtuary for a set we aren't in set for"), + ) + .unwrap(), + }, + ), + ) + .await; - reader + tributaries + .send(ActiveTributary { spec, tributary: Arc::new(tributary) }) + .map_err(|_| "all ActiveTributary recipients closed") + .unwrap(); } pub async fn scan_substrate( @@ -102,6 +118,7 @@ pub async fn scan_substrate( key: Zeroizing<::F>, processors: Pro, serai: Arc, + new_tributary_spec: mpsc::UnboundedSender, ) { log::info!("scanning substrate"); @@ -158,12 +175,9 @@ pub async fn scan_substrate( // Save it to the database MainDb::new(db).add_active_tributary(&spec); - // Add it to the queue - // If we reboot before this is read from the queue, the fact it was saved to the database - // means it'll be handled on reboot - async { - NEW_TRIBUTARIES.write().await.push_back(spec); - } + // If we reboot before this is read, the fact it was saved to the database means it'll be + // handled on reboot + new_tributary_spec.send(spec).unwrap(); }, &processors, &serai, @@ -180,13 +194,22 @@ pub async fn scan_substrate( } } +pub(crate) trait RIDTrait: + Clone + Fn(NetworkId, [u8; 32], RecognizedIdType, [u8; 32], u32) -> FRid +{ +} +impl FRid> + RIDTrait for F +{ +} + #[allow(clippy::type_complexity)] -pub async fn scan_tributaries< +pub(crate) async fn scan_tributaries< D: Db, Pro: Processors, P: P2p, - FRid: Future, - RID: Clone + Fn(NetworkId, [u8; 32], RecognizedIdType, [u8; 32]) -> FRid, + FRid: Send + Future, + RID: 'static + Send + Sync + RIDTrait, >( raw_db: D, key: Zeroizing<::F>, @@ -194,117 +217,111 @@ pub async fn scan_tributaries< p2p: P, processors: Pro, serai: Arc, - tributaries: Arc>>, + mut new_tributary: broadcast::Receiver>, ) { log::info!("scanning tributaries"); - let mut tributary_readers = vec![]; - for ActiveTributary { spec, tributary } in tributaries.read().await.values() { - tributary_readers.push((spec.clone(), tributary.read().await.reader())); - } - - // Handle new Tributary blocks - let mut tributary_db = tributary::TributaryDb::new(raw_db.clone()); loop { - // The following handle_new_blocks function may take an arbitrary amount of time - // Accordingly, it may take a long time to acquire a write lock on the tributaries table - // By definition of NEW_TRIBUTARIES, we allow tributaries to be added almost immediately, - // meaning the Substrate scanner won't become blocked on this - { - let mut new_tributaries = NEW_TRIBUTARIES.write().await; - while let Some(spec) = new_tributaries.pop_front() { - let reader = add_tributary( - raw_db.clone(), - key.clone(), - p2p.clone(), - // This is a short-lived write acquisition, which is why it should be fine - &mut *tributaries.write().await, - spec.clone(), - ) - .await; - - // Trigger a DKG for the newly added Tributary - let set = spec.set(); - processors - .send( - set.network, - processor_messages::CoordinatorMessage::KeyGen( - processor_messages::key_gen::CoordinatorMessage::GenerateKey { - id: processor_messages::key_gen::KeyGenId { set, attempt: 0 }, - params: frost::ThresholdParams::new( - spec.t(), - spec.n(), - spec - .i(Ristretto::generator() * key.deref()) - .expect("adding a tribuary for a set we aren't in set for"), - ) - .unwrap(), - }, - ), - ) - .await; - - tributary_readers.push((spec, reader)); - } - } - - for (spec, reader) in &tributary_readers { - tributary::scanner::handle_new_blocks::<_, _, _, _, _, _, P>( - &mut tributary_db, - &key, - recognized_id.clone(), - &processors, - |set, tx| { + match new_tributary.recv().await { + Ok(ActiveTributary { spec, tributary }) => { + // For each Tributary, spawn a dedicated scanner task + tokio::spawn({ + let raw_db = raw_db.clone(); + let key = key.clone(); + let recognized_id = recognized_id.clone(); + let p2p = p2p.clone(); + let processors = processors.clone(); let serai = serai.clone(); async move { + let spec = &spec; + let reader = tributary.reader(); + let mut tributary_db = tributary::TributaryDb::new(raw_db.clone()); loop { - match serai.publish(&tx).await { - Ok(_) => { - log::info!("set key pair for {set:?}"); - break; - } - // This is assumed to be some ephemeral error due to the assumed fault-free - // creation - // TODO2: Differentiate connection errors from invariants - Err(e) => { - // Check if this failed because the keys were already set by someone else - if matches!(serai.get_keys(spec.set()).await, Ok(Some(_))) { - log::info!("another coordinator set key pair for {:?}", set); - break; + // Obtain the next block notification now to prevent obtaining it immediately after + // the next block occurs + let next_block_notification = tributary.next_block_notification().await; + + tributary::scanner::handle_new_blocks::<_, _, _, _, _, _, P>( + &mut tributary_db, + &key, + recognized_id.clone(), + &processors, + |set, tx| { + let serai = serai.clone(); + async move { + loop { + match serai.publish(&tx).await { + Ok(_) => { + log::info!("set key pair for {set:?}"); + break; + } + // This is assumed to be some ephemeral error due to the assumed fault-free + // creation + // TODO2: Differentiate connection errors from invariants + Err(e) => { + // Check if this failed because the keys were already set by someone else + if matches!(serai.get_keys(spec.set()).await, Ok(Some(_))) { + log::info!("another coordinator set key pair for {:?}", set); + break; + } + + log::error!( + "couldn't connect to Serai node to publish set_keys TX: {:?}", + e + ); + tokio::time::sleep(Duration::from_secs(10)).await; + } + } + } } - - log::error!("couldn't connect to Serai node to publish set_keys TX: {:?}", e); - tokio::time::sleep(Duration::from_secs(10)).await; - } - } + }, + spec, + &reader, + ) + .await; + + next_block_notification + .await + .map_err(|_| "") + .expect("tributary dropped its notifications?"); } } - }, - spec, - reader, - ) - .await; + }); + } + Err(broadcast::error::RecvError::Lagged(_)) => { + panic!("scan_tributaries lagged to handle new_tributary") + } + Err(broadcast::error::RecvError::Closed) => panic!("new_tributary sender closed"), } - - // Sleep for half the block time - // TODO2: Define a notification system for when a new block occurs - sleep(Duration::from_secs((Tributary::::block_time() / 2).into())).await; } } pub async fn heartbeat_tributaries( p2p: P, - tributaries: Arc>>, + mut new_tributary: broadcast::Receiver>, ) { let ten_blocks_of_time = Duration::from_secs((10 * Tributary::::block_time()).into()); + let mut readers = vec![]; loop { - for ActiveTributary { spec: _, tributary } in tributaries.read().await.values() { - let tributary = tributary.read().await; - let tip = tributary.tip().await; - let block_time = SystemTime::UNIX_EPOCH + - Duration::from_secs(tributary.reader().time_of_block(&tip).unwrap_or(0)); + while let Ok(ActiveTributary { spec, tributary }) = { + match new_tributary.try_recv() { + Ok(tributary) => Ok(tributary), + Err(broadcast::error::TryRecvError::Empty) => Err(()), + Err(broadcast::error::TryRecvError::Lagged(_)) => { + panic!("heartbeat_tributaries lagged to handle new_tributary") + } + Err(broadcast::error::TryRecvError::Closed) => panic!("new_tributary sender closed"), + } + } { + readers.push(tributary.reader()); + } + + for tributary in &readers { + let tip = tributary.tip(); + let block_time = + SystemTime::UNIX_EPOCH + Duration::from_secs(tributary.time_of_block(&tip).unwrap_or(0)); // Only trigger syncing if the block is more than a minute behind if SystemTime::now() > (block_time + Duration::from_secs(60)) { @@ -331,130 +348,155 @@ pub async fn heartbeat_tributaries( pub async fn handle_p2p( our_key: ::G, p2p: P, - tributaries: Arc>>, + mut new_tributary: broadcast::Receiver>, ) { - loop { - let mut msg = p2p.receive().await; - // Spawn a dedicated task to handle this message, ensuring any singularly latent message - // doesn't hold everything up - // TODO2: Move to one task per tributary (or two. One for Tendermint, one for Tributary) - tokio::spawn({ - let p2p = p2p.clone(); - let tributaries = tributaries.clone(); - async move { - match msg.kind { - P2pMessageKind::KeepAlive => {} - - P2pMessageKind::Tributary(genesis) => { - let tributaries = tributaries.read().await; - let Some(tributary) = tributaries.get(&genesis) else { - log::debug!("received p2p message for unknown network"); - return; - }; + let channels = Arc::new(RwLock::new(HashMap::new())); + tokio::spawn({ + let p2p = p2p.clone(); + let channels = channels.clone(); + async move { + loop { + let tributary = new_tributary.recv().await.unwrap(); + let genesis = tributary.spec.genesis(); - log::trace!("handling message for tributary {:?}", tributary.spec.set()); - if tributary.tributary.read().await.handle_message(&msg.msg).await { - P2p::broadcast(&p2p, msg.kind, msg.msg).await; - } - } + let (send, mut recv) = mpsc::unbounded_channel(); + channels.write().await.insert(genesis, send); - // TODO2: Rate limit this per timestamp - // And/or slash on Heartbeat which justifies a response, since the node obviously was - // offline and we must now use our bandwidth to compensate for them? - P2pMessageKind::Heartbeat(genesis) => { - if msg.msg.len() != 40 { - log::error!("validator sent invalid heartbeat"); - return; - } + tokio::spawn({ + let p2p = p2p.clone(); + async move { + loop { + let mut msg: Message

= recv.recv().await.unwrap(); + match msg.kind { + P2pMessageKind::KeepAlive => {} + + P2pMessageKind::Tributary(msg_genesis) => { + assert_eq!(msg_genesis, genesis); + log::trace!("handling message for tributary {:?}", tributary.spec.set()); + if tributary.tributary.handle_message(&msg.msg).await { + P2p::broadcast(&p2p, msg.kind, msg.msg).await; + } + } - let tributaries = tributaries.read().await; - let Some(tributary) = tributaries.get(&genesis) else { - log::debug!("received heartbeat message for unknown network"); - return; - }; - let tributary_read = tributary.tributary.read().await; - - /* - // Have sqrt(n) nodes reply with the blocks - let mut responders = (tributary.spec.n() as f32).sqrt().floor() as u64; - // Try to have at least 3 responders - if responders < 3 { - responders = tributary.spec.n().min(3).into(); - } - */ - - // Have up to three nodes respond - let responders = u64::from(tributary.spec.n().min(3)); - - // Decide which nodes will respond by using the latest block's hash as a mutually agreed - // upon entropy source - // This isn't a secure source of entropy, yet it's fine for this - let entropy = u64::from_le_bytes(tributary_read.tip().await[.. 8].try_into().unwrap()); - // If n = 10, responders = 3, we want start to be 0 ..= 7 (so the highest is 7, 8, 9) - // entropy % (10 + 1) - 3 = entropy % 8 = 0 ..= 7 - let start = - usize::try_from(entropy % (u64::from(tributary.spec.n() + 1) - responders)).unwrap(); - let mut selected = false; - for validator in - &tributary.spec.validators()[start .. (start + usize::try_from(responders).unwrap())] - { - if our_key == validator.0 { - selected = true; - break; - } - } - if !selected { - log::debug!("received heartbeat and not selected to respond"); - return; - } + // TODO2: Rate limit this per timestamp + // And/or slash on Heartbeat which justifies a response, since the node obviously + // was offline and we must now use our bandwidth to compensate for them? + // TODO: Dedicated task for heartbeats + P2pMessageKind::Heartbeat(msg_genesis) => { + assert_eq!(msg_genesis, genesis); + if msg.msg.len() != 40 { + log::error!("validator sent invalid heartbeat"); + continue; + } - log::debug!("received heartbeat and selected to respond"); + let tributary_read = &tributary.tributary; - let reader = tributary_read.reader(); - drop(tributary_read); + /* + // Have sqrt(n) nodes reply with the blocks + let mut responders = (tributary.spec.n() as f32).sqrt().floor() as u64; + // Try to have at least 3 responders + if responders < 3 { + responders = tributary.spec.n().min(3).into(); + } + */ + + // Have up to three nodes respond + let responders = u64::from(tributary.spec.n().min(3)); + + // Decide which nodes will respond by using the latest block's hash as a mutually + // agreed upon entropy source + // This isn't a secure source of entropy, yet it's fine for this + let entropy = + u64::from_le_bytes(tributary_read.tip().await[.. 8].try_into().unwrap()); + // If n = 10, responders = 3, we want `start` to be 0 ..= 7 + // (so the highest is 7, 8, 9) + // entropy % (10 + 1) - 3 = entropy % 8 = 0 ..= 7 + let start = + usize::try_from(entropy % (u64::from(tributary.spec.n() + 1) - responders)) + .unwrap(); + let mut selected = false; + for validator in &tributary.spec.validators() + [start .. (start + usize::try_from(responders).unwrap())] + { + if our_key == validator.0 { + selected = true; + break; + } + } + if !selected { + log::debug!("received heartbeat and not selected to respond"); + continue; + } - let mut latest = msg.msg[.. 32].try_into().unwrap(); - while let Some(next) = reader.block_after(&latest) { - let mut res = reader.block(&next).unwrap().serialize(); - res.extend(reader.commit(&next).unwrap()); - // Also include the timestamp used within the Heartbeat - res.extend(&msg.msg[32 .. 40]); - p2p.send(msg.sender, P2pMessageKind::Block(tributary.spec.genesis()), res).await; - latest = next; - } - } + log::debug!("received heartbeat and selected to respond"); - P2pMessageKind::Block(genesis) => { - let mut msg_ref: &[u8] = msg.msg.as_ref(); - let Ok(block) = Block::::read(&mut msg_ref) else { - log::error!("received block message with an invalidly serialized block"); - return; - }; - // Get just the commit - msg.msg.drain(.. (msg.msg.len() - msg_ref.len())); - msg.msg.drain((msg.msg.len() - 8) ..); - - let tributaries = tributaries.read().await; - let Some(tributary) = tributaries.get(&genesis) else { - log::debug!("received block message for unknown network"); - return; - }; + let reader = tributary_read.reader(); + + let mut latest = msg.msg[.. 32].try_into().unwrap(); + while let Some(next) = reader.block_after(&latest) { + let mut res = reader.block(&next).unwrap().serialize(); + res.extend(reader.commit(&next).unwrap()); + // Also include the timestamp used within the Heartbeat + res.extend(&msg.msg[32 .. 40]); + p2p + .send(msg.sender, P2pMessageKind::Block(tributary.spec.genesis()), res) + .await; + latest = next; + } + } - let res = tributary.tributary.read().await.sync_block(block, msg.msg).await; - log::debug!("received block from {:?}, sync_block returned {}", msg.sender, res); + P2pMessageKind::Block(msg_genesis) => { + assert_eq!(msg_genesis, genesis); + let mut msg_ref: &[u8] = msg.msg.as_ref(); + let Ok(block) = Block::::read(&mut msg_ref) else { + log::error!("received block message with an invalidly serialized block"); + continue; + }; + // Get just the commit + msg.msg.drain(.. (msg.msg.len() - msg_ref.len())); + msg.msg.drain((msg.msg.len() - 8) ..); + + let res = tributary.tributary.sync_block(block, msg.msg).await; + log::debug!("received block from {:?}, sync_block returned {}", msg.sender, res); + } + } + } } + }); + } + } + }); + + loop { + let msg = p2p.receive().await; + match msg.kind { + P2pMessageKind::KeepAlive => {} + P2pMessageKind::Tributary(genesis) => { + if let Some(channel) = channels.read().await.get(&genesis) { + channel.send(msg).unwrap(); } } - }); + P2pMessageKind::Heartbeat(genesis) => { + if let Some(channel) = channels.read().await.get(&genesis) { + channel.send(msg).unwrap(); + } + } + P2pMessageKind::Block(genesis) => { + if let Some(channel) = channels.read().await.get(&genesis) { + channel.send(msg).unwrap(); + } + } + } } } -pub async fn publish_transaction( +pub async fn publish_signed_transaction( tributary: &Tributary, tx: Transaction, ) { log::debug!("publishing transaction {}", hex::encode(tx.hash())); if let TransactionKind::Signed(signed) = tx.kind() { + // TODO: What if we try to publish TX with a nonce of 5 when the blockchain only has 3? if tributary .next_nonce(signed.signer) .await @@ -467,317 +509,377 @@ pub async fn publish_transaction( assert!(tributary.add_transaction(tx).await, "created an invalid transaction"); } } else { - panic!("non-signed transaction passed to publish_transaction"); + panic!("non-signed transaction passed to publish_signed_transaction"); } } pub async fn handle_processors( - mut db: D, + db: D, key: Zeroizing<::F>, serai: Arc, mut processors: Pro, - tributaries: Arc>>, + mut new_tributary: broadcast::Receiver>, ) { let pub_key = Ristretto::generator() * key.deref(); - loop { - // TODO: Dispatch this message to a task dedicated to handling this processor, preventing one - // processor from holding up all the others. This would require a peek method be added to the - // message-queue (to view multiple future messages at once) - // TODO: Do we handle having handled a message, by DB, yet having rebooted before `ack`ing it? - // Does the processor? - let msg = processors.recv().await; - - // TODO2: This is slow, and only works as long as a network only has a single Tributary - // (which means there's a lack of multisig rotation) - let spec = { - let mut spec = None; - for tributary in tributaries.read().await.values() { - if tributary.spec.set().network == msg.network { - spec = Some(tributary.spec.clone()); - break; - } - } - spec.expect("received message from processor we don't have a tributary for") - }; - - let genesis = spec.genesis(); - // TODO: We probably want to NOP here, not panic? - let my_i = spec.i(pub_key).expect("processor message for network we aren't a validator in"); - - let tx = match msg.msg.clone() { - ProcessorMessage::KeyGen(inner_msg) => match inner_msg { - key_gen::ProcessorMessage::Commitments { id, commitments } => { - Some(Transaction::DkgCommitments(id.attempt, commitments, Transaction::empty_signed())) - } - key_gen::ProcessorMessage::Shares { id, mut shares } => { - // Create a MuSig-based machine to inform Substrate of this key generation - let nonces = crate::tributary::dkg_confirmation_nonces(&key, &spec, id.attempt); - - let mut tx_shares = Vec::with_capacity(shares.len()); - for i in 1 ..= spec.n() { - let i = Participant::new(i).unwrap(); - if i == my_i { - continue; - } - tx_shares - .push(shares.remove(&i).expect("processor didn't send share for another validator")); - } + let channels = Arc::new(RwLock::new(HashMap::new())); + tokio::spawn({ + let processors = processors.clone(); + let channels = channels.clone(); + async move { + loop { + let channels = channels.clone(); + let ActiveTributary { spec, tributary } = new_tributary.recv().await.unwrap(); + let genesis = spec.genesis(); + tokio::spawn({ + let mut db = db.clone(); + let key = key.clone(); + let serai = serai.clone(); + let mut processors = processors.clone(); + async move { + let (send, mut recv) = mpsc::unbounded_channel(); + // TODO: Support multisig rotation (not per-Tributary yet per-network?) + channels.write().await.insert(spec.set().network, send); - Some(Transaction::DkgShares { - attempt: id.attempt, - shares: tx_shares, - confirmation_nonces: nonces, - signed: Transaction::empty_signed(), - }) - } - key_gen::ProcessorMessage::GeneratedKeyPair { id, substrate_key, network_key } => { - assert_eq!( - id.set.network, msg.network, - "processor claimed to be a different network than it was for GeneratedKeyPair", - ); - // TODO: Also check the other KeyGenId fields - - // Tell the Tributary the key pair, get back the share for the MuSig signature - let mut txn = db.txn(); - let share = crate::tributary::generated_key_pair::( - &mut txn, - &key, - &spec, - &(Public(substrate_key), network_key.try_into().unwrap()), - id.attempt, - ); - txn.commit(); - - match share { - Ok(share) => { - Some(Transaction::DkgConfirmed(id.attempt, share, Transaction::empty_signed())) - } - Err(p) => todo!("participant {p:?} sent invalid DKG confirmation preprocesses"), - } - } - }, - ProcessorMessage::Sign(msg) => match msg { - sign::ProcessorMessage::Preprocess { id, preprocess } => { - if id.attempt == 0 { - let mut txn = db.txn(); - MainDb::::save_first_preprocess(&mut txn, id.id, preprocess); - txn.commit(); - - None - } else { - Some(Transaction::SignPreprocess(SignData { - plan: id.id, - attempt: id.attempt, - data: preprocess, - signed: Transaction::empty_signed(), - })) - } - } - sign::ProcessorMessage::Share { id, share } => Some(Transaction::SignShare(SignData { - plan: id.id, - attempt: id.attempt, - data: share, - signed: Transaction::empty_signed(), - })), - sign::ProcessorMessage::Completed { key: _, id, tx } => { - let r = Zeroizing::new(::F::random(&mut OsRng)); - #[allow(non_snake_case)] - let R = ::generator() * r.deref(); - let mut tx = Transaction::SignCompleted { - plan: id, - tx_hash: tx, - first_signer: pub_key, - signature: SchnorrSignature { R, s: ::F::ZERO }, - }; - let signed = SchnorrSignature::sign(&key, r, tx.sign_completed_challenge()); - match &mut tx { - Transaction::SignCompleted { signature, .. } => { - *signature = signed; - } - _ => unreachable!(), - } - Some(tx) - } - }, - ProcessorMessage::Coordinator(inner_msg) => match inner_msg { - coordinator::ProcessorMessage::SubstrateBlockAck { network, block, plans } => { - assert_eq!( - network, msg.network, - "processor claimed to be a different network than it was for SubstrateBlockAck", - ); - - // Safe to use its own txn since this is static and just needs to be written before we - // provide SubstrateBlock - let mut txn = db.txn(); - TributaryDb::::set_plan_ids(&mut txn, genesis, block, &plans); - txn.commit(); - - Some(Transaction::SubstrateBlock(block)) - } - coordinator::ProcessorMessage::BatchPreprocess { id, block, preprocess } => { - log::info!( - "informed of batch (sign ID {}, attempt {}) for block {}", - hex::encode(id.id), - id.attempt, - hex::encode(block), - ); - // If this is the first attempt instance, wait until we synchronize around the batch - // first - if id.attempt == 0 { - // Save the preprocess to disk so we can publish it later - // This is fine to use its own TX since it's static and just needs to be written - // before this message finishes it handling (or with this message's finished handling) - let mut txn = db.txn(); - MainDb::::save_first_preprocess(&mut txn, id.id, preprocess); - txn.commit(); - - Some(Transaction::Batch(block.0, id.id)) - } else { - Some(Transaction::BatchPreprocess(SignData { - plan: id.id, - attempt: id.attempt, - data: preprocess, - signed: Transaction::empty_signed(), - })) - } - } - coordinator::ProcessorMessage::BatchShare { id, share } => { - Some(Transaction::BatchShare(SignData { - plan: id.id, - attempt: id.attempt, - data: share.to_vec(), - signed: Transaction::empty_signed(), - })) - } - }, - ProcessorMessage::Substrate(inner_msg) => match inner_msg { - processor_messages::substrate::ProcessorMessage::Update { batch } => { - assert_eq!( - batch.batch.network, msg.network, - "processor sent us a batch for a different network than it was for", - ); - // TODO: Check this key's key pair's substrate key is authorized to publish batches - - // Save this batch to the disk - MainDb::new(&mut db).save_batch(batch); - - /* - Use a dedicated task to publish batches due to the latency potentially incurred. - - This does not guarantee the batch has actually been published when the message is - `ack`ed to message-queue. Accordingly, if we reboot, these batches would be dropped - (as we wouldn't see the `Update` again, triggering our re-attempt to publish). - - The solution to this is to have the task try not to publish the batch which caused it - to be spawned, yet all saved batches which have yet to published. This does risk having - multiple tasks trying to publish all pending batches, yet these aren't notably complex. - */ - tokio::spawn({ - let mut db = db.clone(); - let serai = serai.clone(); - let network = msg.network; - async move { - // Since we have a new batch, publish all batches yet to be published to Serai - // This handles the edge-case where batch n+1 is signed before batch n is - while let Some(batch) = { - // Get the next-to-execute batch ID - let next = { - let mut first = true; - loop { - if !first { - log::error!( - "couldn't connect to Serai node to get the next batch ID for {network:?}", + loop { + let msg: processors::Message = recv.recv().await.unwrap(); + + // TODO: We probably want to NOP here, not panic? + // TODO: We do have to track produced Batches in order to ensure their integrity + let my_i = + spec.i(pub_key).expect("processor message for network we aren't a validator in"); + + let tx = match msg.msg.clone() { + ProcessorMessage::KeyGen(inner_msg) => match inner_msg { + key_gen::ProcessorMessage::Commitments { id, commitments } => { + Some(Transaction::DkgCommitments( + id.attempt, + commitments, + Transaction::empty_signed(), + )) + } + key_gen::ProcessorMessage::Shares { id, mut shares } => { + // Create a MuSig-based machine to inform Substrate of this key generation + let nonces = crate::tributary::dkg_confirmation_nonces(&key, &spec, id.attempt); + + let mut tx_shares = Vec::with_capacity(shares.len()); + for i in 1 ..= spec.n() { + let i = Participant::new(i).unwrap(); + if i == my_i { + continue; + } + tx_shares.push( + shares + .remove(&i) + .expect("processor didn't send share for another validator"), ); - tokio::time::sleep(Duration::from_secs(5)).await; } - first = false; - let Ok(latest_block) = serai.get_latest_block().await else { continue }; - let Ok(last) = - serai.get_last_batch_for_network(latest_block.hash(), network).await - else { - continue; + Some(Transaction::DkgShares { + attempt: id.attempt, + shares: tx_shares, + confirmation_nonces: nonces, + signed: Transaction::empty_signed(), + }) + } + key_gen::ProcessorMessage::GeneratedKeyPair { + id, + substrate_key, + network_key, + } => { + assert_eq!( + id.set.network, msg.network, + "processor claimed to be a different network than it was for GeneratedKeyPair", + ); + // TODO: Also check the other KeyGenId fields + + // Tell the Tributary the key pair, get back the share for the MuSig signature + let mut txn = db.txn(); + let share = crate::tributary::generated_key_pair::( + &mut txn, + &key, + &spec, + &(Public(substrate_key), network_key.try_into().unwrap()), + id.attempt, + ); + txn.commit(); + + match share { + Ok(share) => Some(Transaction::DkgConfirmed( + id.attempt, + share, + Transaction::empty_signed(), + )), + Err(p) => { + todo!("participant {p:?} sent invalid DKG confirmation preprocesses") + } + } + } + }, + ProcessorMessage::Sign(msg) => match msg { + sign::ProcessorMessage::Preprocess { id, preprocess } => { + if id.attempt == 0 { + let mut txn = db.txn(); + MainDb::::save_first_preprocess(&mut txn, id.id, preprocess); + txn.commit(); + + None + } else { + Some(Transaction::SignPreprocess(SignData { + plan: id.id, + attempt: id.attempt, + data: preprocess, + signed: Transaction::empty_signed(), + })) + } + } + sign::ProcessorMessage::Share { id, share } => { + Some(Transaction::SignShare(SignData { + plan: id.id, + attempt: id.attempt, + data: share, + signed: Transaction::empty_signed(), + })) + } + sign::ProcessorMessage::Completed { key: _, id, tx } => { + let r = Zeroizing::new(::F::random(&mut OsRng)); + #[allow(non_snake_case)] + let R = ::generator() * r.deref(); + let mut tx = Transaction::SignCompleted { + plan: id, + tx_hash: tx, + first_signer: pub_key, + signature: SchnorrSignature { R, s: ::F::ZERO }, + }; + let signed = SchnorrSignature::sign(&key, r, tx.sign_completed_challenge()); + match &mut tx { + Transaction::SignCompleted { signature, .. } => { + *signature = signed; + } + _ => unreachable!(), + } + Some(tx) + } + }, + ProcessorMessage::Coordinator(inner_msg) => match inner_msg { + coordinator::ProcessorMessage::SubstrateBlockAck { network, block, plans } => { + assert_eq!( + network, msg.network, + "processor claimed to be a different network than it was for SubstrateBlockAck", + ); + + // Safe to use its own txn since this is static and just needs to be written + // before we provide SubstrateBlock + let mut txn = db.txn(); + // TODO: This needs to be scoped per multisig + TributaryDb::::set_plan_ids(&mut txn, genesis, block, &plans); + txn.commit(); + + Some(Transaction::SubstrateBlock(block)) + } + coordinator::ProcessorMessage::BatchPreprocess { id, block, preprocess } => { + log::info!( + "informed of batch (sign ID {}, attempt {}) for block {}", + hex::encode(id.id), + id.attempt, + hex::encode(block), + ); + // If this is the first attempt instance, wait until we synchronize around the + // batch first + if id.attempt == 0 { + // Save the preprocess to disk so we can publish it later + // This is fine to use its own TX since it's static and just needs to be + // written before this message finishes it handling (or with this message's + // finished handling) + let mut txn = db.txn(); + MainDb::::save_first_preprocess(&mut txn, id.id, preprocess); + txn.commit(); + + Some(Transaction::Batch(block.0, id.id)) + } else { + Some(Transaction::BatchPreprocess(SignData { + plan: id.id, + attempt: id.attempt, + data: preprocess, + signed: Transaction::empty_signed(), + })) + } + } + coordinator::ProcessorMessage::BatchShare { id, share } => { + Some(Transaction::BatchShare(SignData { + plan: id.id, + attempt: id.attempt, + data: share.to_vec(), + signed: Transaction::empty_signed(), + })) + } + }, + ProcessorMessage::Substrate(inner_msg) => match inner_msg { + processor_messages::substrate::ProcessorMessage::Update { batch } => { + assert_eq!( + batch.batch.network, msg.network, + "processor sent us a batch for a different network than it was for", + ); + // TODO: Check this key's key pair's substrate key is authorized to publish + // batches + + // Save this batch to the disk + MainDb::new(&mut db).save_batch(batch); + + /* + Use a dedicated task to publish batches due to the latency potentially + incurred. + + This does not guarantee the batch has actually been published when the + message is `ack`ed to message-queue. Accordingly, if we reboot, these batches + would be dropped (as we wouldn't see the `Update` again, triggering our + re-attempt to publish). + + The solution to this is to have the task try not to publish the batch which + caused it to be spawned, yet all saved batches which have yet to published. + This does risk having multiple tasks trying to publish all pending batches, + yet these aren't notably complex. + */ + tokio::spawn({ + let mut db = db.clone(); + let serai = serai.clone(); + let network = msg.network; + async move { + // Since we have a new batch, publish all batches yet to be published to + // Serai + // This handles the edge-case where batch n+1 is signed before batch n is + while let Some(batch) = { + // Get the next-to-execute batch ID + let next = { + let mut first = true; + loop { + if !first { + log::error!( + "{} {network:?}", + "couldn't connect to Serai node to get the next batch ID for", + ); + tokio::time::sleep(Duration::from_secs(5)).await; + } + first = false; + + let Ok(latest_block) = serai.get_latest_block().await else { + continue; + }; + let Ok(last) = serai + .get_last_batch_for_network(latest_block.hash(), network) + .await + else { + continue; + }; + break if let Some(last) = last { last + 1 } else { 0 }; + } + }; + + // If we have this batch, attempt to publish it + MainDb::new(&mut db).batch(network, next) + } { + let id = batch.batch.id; + let block = batch.batch.block; + + let tx = Serai::execute_batch(batch); + // This publish may fail if this transactions already exists in the + // mempool, which is possible, or if this batch was already executed + // on-chain + // Either case will have eventual resolution and be handled by the above + // check on if this batch should execute + if serai.publish(&tx).await.is_ok() { + log::info!( + "published batch {network:?} {id} (block {})", + hex::encode(block) + ); + } + } + } + }); + + None + } + }, + }; + + // If this created a transaction, publish it + if let Some(mut tx) = tx { + log::trace!("processor message effected transaction {}", hex::encode(tx.hash())); + + match tx.kind() { + TransactionKind::Provided(_) => { + log::trace!("providing transaction {}", hex::encode(tx.hash())); + let res = tributary.provide_transaction(tx).await; + if !(res.is_ok() || (res == Err(ProvidedError::AlreadyProvided))) { + panic!("provided an invalid transaction: {res:?}"); + } + } + TransactionKind::Unsigned => { + log::trace!("publishing unsigned transaction {}", hex::encode(tx.hash())); + // Ignores the result since we can't differentiate already in-mempool from + // already on-chain from invalid + // TODO: Don't ignore the result + tributary.add_transaction(tx).await; + } + TransactionKind::Signed(_) => { + log::trace!( + "getting next nonce for Tributary TX in response to processor message" + ); + + let nonce = loop { + let Some(nonce) = NonceDecider::::nonce(&db, genesis, &tx) + .expect("signed TX didn't have nonce") + else { + // This can be None if: + // 1) We scanned the relevant transaction(s) in a Tributary block + // 2) The processor was sent a message and responded + // 3) The Tributary TXN has yet to be committed + log::warn!( + "nonce has yet to be saved for processor-instigated transaction" + ); + sleep(Duration::from_millis(100)).await; + continue; + }; + break nonce; }; - break if let Some(last) = last { last + 1 } else { 0 }; + tx.sign(&mut OsRng, genesis, &key, nonce); + + publish_signed_transaction(&tributary, tx).await; } - }; - - // If we have this batch, attempt to publish it - MainDb::new(&mut db).batch(network, next) - } { - let id = batch.batch.id; - let block = batch.batch.block; - - let tx = Serai::execute_batch(batch); - // This publish may fail if this transactions already exists in the mempool, which - // is possible, or if this batch was already executed on-chain - // Either case will have eventual resolution and be handled by the above check on - // if this block should execute - if serai.publish(&tx).await.is_ok() { - log::info!("published batch {network:?} {id} (block {})", hex::encode(block)); } } - } - }); - None - } - }, - }; - - // If this created a transaction, publish it - if let Some(mut tx) = tx { - log::trace!("processor message effected transaction {}", hex::encode(tx.hash())); - let tributaries = tributaries.read().await; - log::trace!("read global tributaries"); - let Some(tributary) = tributaries.get(&genesis) else { - // TODO: This can happen since Substrate tells the Processor to generate commitments - // at the same time it tells the Tributary to be created - // There's no guarantee the Tributary will have been created though - panic!("processor is operating on tributary we don't have"); - }; - let tributary = tributary.tributary.read().await; - log::trace!("read specific tributary"); - - match tx.kind() { - TransactionKind::Provided(_) => { - log::trace!("providing transaction {}", hex::encode(tx.hash())); - let res = tributary.provide_transaction(tx).await; - if !(res.is_ok() || (res == Err(ProvidedError::AlreadyProvided))) { - panic!("provided an invalid transaction: {res:?}"); + processors.ack(msg).await; + } } - } - TransactionKind::Unsigned => { - log::trace!("publishing unsigned transaction {}", hex::encode(tx.hash())); - // Ignores the result since we can't differentiate already in-mempool from already - // on-chain from invalid - // TODO: Don't ignore the result - tributary.add_transaction(tx).await; - } - TransactionKind::Signed(_) => { - // Get the next nonce - // TODO: This should be deterministic, not just DB-backed, to allow rebuilding validators - // without the prior instance's DB - // let mut txn = db.txn(); - // let nonce = MainDb::tx_nonce(&mut txn, msg.id, tributary); - - // TODO: This isn't deterministic, or at least DB-backed, and accordingly is unsafe - log::trace!("getting next nonce for Tributary TX in response to processor message"); - let nonce = tributary - .next_nonce(Ristretto::generator() * key.deref()) - .await - .expect("publishing a TX to a tributary we aren't in"); - tx.sign(&mut OsRng, genesis, &key, nonce); - - publish_transaction(&tributary, tx).await; - - // txn.commit(); - } + }); } } + }); - processors.ack(msg).await; + let mut last_msg = None; + loop { + // TODO: We dispatch this to an async task per-processor, yet we don't move to the next message + // yet as all processor messages are shoved into a global queue. + // Modify message-queue to offer per-sender queues, not per-receiver. + // Alternatively, a peek method with local delineation of handled messages would work. + + // TODO: Do we handle having handled a message, by DB, yet having rebooted before `ack`ing it? + // Does the processor? + let msg = processors.recv().await; + if last_msg == Some(msg.id) { + sleep(Duration::from_secs(1)).await; + continue; + } + last_msg = Some(msg.id); + + // TODO: Race conditions with above tributary availability? + // TODO: How does this hold up to multisig rotation? + if let Some(channel) = channels.read().await.get(&msg.network) { + channel.send(msg).unwrap(); + } else { + log::warn!("received processor message for network we don't have a channel for"); + } } } @@ -790,33 +892,77 @@ pub async fn run( ) { let serai = Arc::new(serai); + let (new_tributary_spec_send, mut new_tributary_spec_recv) = mpsc::unbounded_channel(); + // Reload active tributaries from the database + for spec in MainDb::new(&mut raw_db).active_tributaries().1 { + new_tributary_spec_send.send(spec).unwrap(); + } + // Handle new Substrate blocks - tokio::spawn(scan_substrate(raw_db.clone(), key.clone(), processors.clone(), serai.clone())); + tokio::spawn(scan_substrate( + raw_db.clone(), + key.clone(), + processors.clone(), + serai.clone(), + new_tributary_spec_send, + )); // Handle the Tributaries - // Arc so this can be shared between the Tributary scanner task and the P2P task - // Write locks on this may take a while to acquire - let tributaries = Arc::new(RwLock::new(HashMap::<[u8; 32], ActiveTributary>::new())); + // This should be large enough for an entire rotation of all tributaries + // If it's too small, the coordinator fail to boot, which is a decent sanity check + let (new_tributary, mut new_tributary_listener_1) = broadcast::channel(32); + let new_tributary_listener_2 = new_tributary.subscribe(); + let new_tributary_listener_3 = new_tributary.subscribe(); + let new_tributary_listener_4 = new_tributary.subscribe(); + let new_tributary_listener_5 = new_tributary.subscribe(); - // Reload active tributaries from the database - for spec in MainDb::new(&mut raw_db).active_tributaries().1 { - let _ = add_tributary( - raw_db.clone(), - key.clone(), - p2p.clone(), - &mut *tributaries.write().await, - spec, - ) - .await; - } + // Spawn a task to further add Tributaries as needed + tokio::spawn({ + let raw_db = raw_db.clone(); + let key = key.clone(); + let processors = processors.clone(); + let p2p = p2p.clone(); + async move { + loop { + let spec = new_tributary_spec_recv.recv().await.unwrap(); + add_tributary( + raw_db.clone(), + key.clone(), + &processors, + p2p.clone(), + &new_tributary, + spec.clone(), + ) + .await; + } + } + }); // When we reach synchrony on an event requiring signing, send our preprocess for it let recognized_id = { let raw_db = raw_db.clone(); let key = key.clone(); - let tributaries = tributaries.clone(); - move |network, genesis, id_type, id| { + + let tributaries = Arc::new(RwLock::new(HashMap::new())); + tokio::spawn({ + let tributaries = tributaries.clone(); + async move { + loop { + match new_tributary_listener_1.recv().await { + Ok(tributary) => { + tributaries.write().await.insert(tributary.spec.genesis(), tributary.tributary); + } + Err(broadcast::error::RecvError::Lagged(_)) => { + panic!("recognized_id lagged to handle new_tributary") + } + Err(broadcast::error::RecvError::Closed) => panic!("new_tributary sender closed"), + } + } + } + }); + + move |network, genesis, id_type, id, nonce| { let raw_db = raw_db.clone(); let key = key.clone(); let tributaries = tributaries.clone(); @@ -851,21 +997,14 @@ pub async fn run( }), }; + tx.sign(&mut OsRng, genesis, &key, nonce); + let tributaries = tributaries.read().await; let Some(tributary) = tributaries.get(&genesis) else { + // TODO: This may happen if the task above is simply slow panic!("tributary we don't have came to consensus on an Batch"); }; - let tributary = tributary.tributary.read().await; - - // TODO: Same note as prior nonce acquisition - log::trace!("getting next nonce for Tributary TX containing Batch signing data"); - let nonce = tributary - .next_nonce(Ristretto::generator() * key.deref()) - .await - .expect("publishing a TX to a tributary we aren't in"); - tx.sign(&mut OsRng, genesis, &key, nonce); - - publish_transaction(&tributary, tx).await; + publish_signed_transaction(tributary, tx).await; } } }; @@ -880,19 +1019,19 @@ pub async fn run( p2p.clone(), processors.clone(), serai.clone(), - tributaries.clone(), + new_tributary_listener_2, )); } // Spawn the heartbeat task, which will trigger syncing if there hasn't been a Tributary block // in a while (presumably because we're behind) - tokio::spawn(heartbeat_tributaries(p2p.clone(), tributaries.clone())); + tokio::spawn(heartbeat_tributaries(p2p.clone(), new_tributary_listener_3)); // Handle P2P messages - tokio::spawn(handle_p2p(Ristretto::generator() * key.deref(), p2p, tributaries.clone())); + tokio::spawn(handle_p2p(Ristretto::generator() * key.deref(), p2p, new_tributary_listener_4)); // Handle all messages from processors - handle_processors(raw_db, key, serai, processors, tributaries).await; + handle_processors(raw_db, key, serai, processors, new_tributary_listener_5).await; } #[tokio::main] diff --git a/coordinator/src/p2p.rs b/coordinator/src/p2p.rs index 8d8cf68d1..bc252d506 100644 --- a/coordinator/src/p2p.rs +++ b/coordinator/src/p2p.rs @@ -149,10 +149,6 @@ struct Behavior { mdns: libp2p::mdns::tokio::Behaviour, } -lazy_static::lazy_static! { - static ref TIME_OF_LAST_P2P_MESSAGE: Mutex = Mutex::new(Instant::now()); -} - #[allow(clippy::type_complexity)] #[derive(Clone)] pub struct LibP2p( @@ -246,10 +242,16 @@ impl LibP2p { let (receive_send, receive_recv) = mpsc::unbounded_channel(); tokio::spawn({ + let mut time_of_last_p2p_message = Instant::now(); + #[allow(clippy::needless_pass_by_ref_mut)] // False positive - async fn broadcast_raw(p2p: &mut Swarm, msg: Vec) { + async fn broadcast_raw( + p2p: &mut Swarm, + time_of_last_p2p_message: &mut Instant, + msg: Vec, + ) { // Update the time of last message - *TIME_OF_LAST_P2P_MESSAGE.lock().await = Instant::now(); + *time_of_last_p2p_message = Instant::now(); match p2p.behaviour_mut().gossipsub.publish(IdentTopic::new(LIBP2P_TOPIC), msg.clone()) { Err(PublishError::SigningError(e)) => panic!("signing error when broadcasting: {e}"), @@ -267,8 +269,7 @@ impl LibP2p { async move { // Run this task ad-infinitum loop { - let time_since_last = - Instant::now().duration_since(*TIME_OF_LAST_P2P_MESSAGE.lock().await); + let time_since_last = Instant::now().duration_since(time_of_last_p2p_message); tokio::select! { biased; @@ -276,6 +277,7 @@ impl LibP2p { msg = broadcast_recv.recv() => { broadcast_raw( &mut swarm, + &mut time_of_last_p2p_message, msg.expect("broadcast_recv closed. are we shutting down?") ).await; } @@ -324,7 +326,11 @@ impl LibP2p { // (where a finalized block only occurs due to network activity), meaning this won't be // run _ = tokio::time::sleep(Duration::from_secs(80).saturating_sub(time_since_last)) => { - broadcast_raw(&mut swarm, P2pMessageKind::KeepAlive.serialize()).await; + broadcast_raw( + &mut swarm, + &mut time_of_last_p2p_message, + P2pMessageKind::KeepAlive.serialize() + ).await; } } } diff --git a/coordinator/src/substrate/mod.rs b/coordinator/src/substrate/mod.rs index 117b03ba9..5ed4f7ac8 100644 --- a/coordinator/src/substrate/mod.rs +++ b/coordinator/src/substrate/mod.rs @@ -1,4 +1,4 @@ -use core::{ops::Deref, time::Duration, future::Future}; +use core::{ops::Deref, time::Duration}; use std::collections::{HashSet, HashMap}; use zeroize::Zeroizing; @@ -9,7 +9,7 @@ use serai_client::{ SeraiError, Block, Serai, primitives::{BlockHash, NetworkId}, validator_sets::{ - primitives::{Session, ValidatorSet, KeyPair}, + primitives::{ValidatorSet, KeyPair}, ValidatorSetsEvent, }, in_instructions::InInstructionsEvent, @@ -43,12 +43,7 @@ async fn in_set( Ok(Some(data.participants.iter().any(|(participant, _)| participant.0 == key))) } -async fn handle_new_set< - D: Db, - Fut: Future, - CNT: Clone + Fn(&mut D, TributarySpec) -> Fut, - Pro: Processors, ->( +async fn handle_new_set( db: &mut D, key: &Zeroizing<::F>, create_new_tributary: CNT, @@ -84,7 +79,7 @@ async fn handle_new_set< let time = time + SUBSTRATE_TO_TRIBUTARY_TIME_DELAY; let spec = TributarySpec::new(block.hash(), time, set, set_data); - create_new_tributary(db, spec.clone()).await; + create_new_tributary(db, spec.clone()); } else { log::info!("not present in set {:?}", set); } @@ -100,28 +95,26 @@ async fn handle_key_gen( set: ValidatorSet, key_pair: KeyPair, ) -> Result<(), SeraiError> { - if in_set(key, serai, set).await?.expect("KeyGen occurred for a set which doesn't exist") { - processors - .send( - set.network, - CoordinatorMessage::Substrate( - processor_messages::substrate::CoordinatorMessage::ConfirmKeyPair { - context: SubstrateContext { - serai_time: block.time().unwrap() / 1000, - network_latest_finalized_block: serai - .get_latest_block_for_network(block.hash(), set.network) - .await? - // The processor treats this as a magic value which will cause it to find a network - // block which has a time greater than or equal to the Serai time - .unwrap_or(BlockHash([0; 32])), - }, - set, - key_pair, + processors + .send( + set.network, + CoordinatorMessage::Substrate( + processor_messages::substrate::CoordinatorMessage::ConfirmKeyPair { + context: SubstrateContext { + serai_time: block.time().unwrap() / 1000, + network_latest_finalized_block: serai + .get_latest_block_for_network(block.hash(), set.network) + .await? + // The processor treats this as a magic value which will cause it to find a network + // block which has a time greater than or equal to the Serai time + .unwrap_or(BlockHash([0; 32])), }, - ), - ) - .await; - } + set, + key_pair, + }, + ), + ) + .await; Ok(()) } @@ -155,13 +148,10 @@ async fn handle_batch_and_burns( if let InInstructionsEvent::Batch { network, id, block: network_block } = batch { network_had_event(&mut burns, &mut batches, network); - // Track what Serai acknowledges as the latest block for this network - // If this Substrate block has multiple batches, the last batch's block will overwrite the - // prior batches - // Since batches within a block are guaranteed to be ordered, thanks to their incremental ID, - // the last batch will be the latest batch, so its block will be the latest block - // This is just a mild optimization to prevent needing an additional RPC call to grab this - batch_block.insert(network, network_block); + // Make sure this is the only Batch event for this network in this Block + // TODO: Make sure Serai rejects multiple Batchs within the same block. It should, as of an + // yet to be merged branch + assert!(batch_block.insert(network, network_block).is_none()); // Add the batch included by this block batches.get_mut(&network).unwrap().push(id); @@ -206,11 +196,6 @@ async fn handle_batch_and_burns( }, network, block: block.number(), - key: serai - .get_keys(ValidatorSet { network, session: Session(0) }) // TODO2 - .await? - .map(|keys| keys.1.into_inner()) - .expect("batch/burn for network which never set keys"), burns: burns.remove(&network).unwrap(), batches: batches.remove(&network).unwrap(), }, @@ -225,12 +210,7 @@ async fn handle_batch_and_burns( // Handle a specific Substrate block, returning an error when it fails to get data // (not blocking / holding) #[allow(clippy::needless_pass_by_ref_mut)] // False positive? -async fn handle_block< - D: Db, - Fut: Future, - CNT: Clone + Fn(&mut D, TributarySpec) -> Fut, - Pro: Processors, ->( +async fn handle_block( db: &mut SubstrateDb, key: &Zeroizing<::F>, create_new_tributary: CNT, @@ -305,12 +285,7 @@ async fn handle_block< Ok(()) } -pub async fn handle_new_blocks< - D: Db, - Fut: Future, - CNT: Clone + Fn(&mut D, TributarySpec) -> Fut, - Pro: Processors, ->( +pub async fn handle_new_blocks( db: &mut SubstrateDb, key: &Zeroizing<::F>, create_new_tributary: CNT, diff --git a/coordinator/src/tests/tributary/dkg.rs b/coordinator/src/tests/tributary/dkg.rs index 1da472fa8..874a78954 100644 --- a/coordinator/src/tests/tributary/dkg.rs +++ b/coordinator/src/tests/tributary/dkg.rs @@ -86,7 +86,7 @@ async fn dkg_test() { handle_new_blocks::<_, _, _, _, _, _, LocalP2p>( &mut scanner_db, key, - |_, _, _, _| async { + |_, _, _, _, _| async { panic!("provided TX caused recognized_id to be called in new_processors") }, &processors, @@ -112,7 +112,7 @@ async fn dkg_test() { handle_new_blocks::<_, _, _, _, _, _, LocalP2p>( &mut scanner_db, &keys[0], - |_, _, _, _| async { + |_, _, _, _, _| async { panic!("provided TX caused recognized_id to be called after Commitments") }, &processors, @@ -191,7 +191,7 @@ async fn dkg_test() { handle_new_blocks::<_, _, _, _, _, _, LocalP2p>( &mut scanner_db, &keys[0], - |_, _, _, _| async { + |_, _, _, _, _| async { panic!("provided TX caused recognized_id to be called after some shares") }, &processors, @@ -239,7 +239,7 @@ async fn dkg_test() { handle_new_blocks::<_, _, _, _, _, _, LocalP2p>( &mut scanner_db, &keys[0], - |_, _, _, _| async { panic!("provided TX caused recognized_id to be called after shares") }, + |_, _, _, _, _| async { panic!("provided TX caused recognized_id to be called after shares") }, &processors, |_, _| async { panic!("test tried to publish a new Serai TX") }, &spec, @@ -306,7 +306,7 @@ async fn dkg_test() { handle_new_blocks::<_, _, _, _, _, _, LocalP2p>( &mut scanner_db, &keys[0], - |_, _, _, _| async { + |_, _, _, _, _| async { panic!("provided TX caused recognized_id to be called after DKG confirmation") }, &processors, diff --git a/coordinator/src/tests/tributary/handle_p2p.rs b/coordinator/src/tests/tributary/handle_p2p.rs index becf5059f..87576dd8f 100644 --- a/coordinator/src/tests/tributary/handle_p2p.rs +++ b/coordinator/src/tests/tributary/handle_p2p.rs @@ -1,11 +1,11 @@ use core::time::Duration; -use std::{sync::Arc, collections::HashMap}; +use std::sync::Arc; use rand_core::OsRng; use ciphersuite::{Ciphersuite, Ristretto}; -use tokio::{sync::RwLock, time::sleep}; +use tokio::{sync::broadcast, time::sleep}; use serai_db::MemDb; @@ -27,18 +27,18 @@ async fn handle_p2p_test() { let mut tributaries = new_tributaries(&keys, &spec).await; + let mut tributary_senders = vec![]; let mut tributary_arcs = vec![]; for (i, (p2p, tributary)) in tributaries.drain(..).enumerate() { - let tributary = Arc::new(RwLock::new(tributary)); + let tributary = Arc::new(tributary); tributary_arcs.push(tributary.clone()); - tokio::spawn(handle_p2p( - Ristretto::generator() * *keys[i], - p2p, - Arc::new(RwLock::new(HashMap::from([( - spec.genesis(), - ActiveTributary { spec: spec.clone(), tributary }, - )]))), - )); + let (new_tributary_send, new_tributary_recv) = broadcast::channel(5); + tokio::spawn(handle_p2p(Ristretto::generator() * *keys[i], p2p, new_tributary_recv)); + new_tributary_send + .send(ActiveTributary { spec: spec.clone(), tributary }) + .map_err(|_| "failed to send ActiveTributary") + .unwrap(); + tributary_senders.push(new_tributary_send); } let tributaries = tributary_arcs; @@ -46,22 +46,22 @@ async fn handle_p2p_test() { // We don't wait one block of time as we may have missed the chance for this block sleep(Duration::from_secs((2 * Tributary::::block_time()).into())) .await; - let tip = tributaries[0].read().await.tip().await; + let tip = tributaries[0].tip().await; assert!(tip != spec.genesis()); // Sleep one second to make sure this block propagates sleep(Duration::from_secs(1)).await; // Make sure every tributary has it for tributary in &tributaries { - assert!(tributary.read().await.reader().block(&tip).is_some()); + assert!(tributary.reader().block(&tip).is_some()); } // Then after another block of time, we should have yet another new block sleep(Duration::from_secs(Tributary::::block_time().into())).await; - let new_tip = tributaries[0].read().await.tip().await; + let new_tip = tributaries[0].tip().await; assert!(new_tip != tip); sleep(Duration::from_secs(1)).await; for tributary in tributaries { - assert!(tributary.read().await.reader().block(&new_tip).is_some()); + assert!(tributary.reader().block(&new_tip).is_some()); } } diff --git a/coordinator/src/tests/tributary/sync.rs b/coordinator/src/tests/tributary/sync.rs index ced97bd6b..af4bb0c79 100644 --- a/coordinator/src/tests/tributary/sync.rs +++ b/coordinator/src/tests/tributary/sync.rs @@ -1,14 +1,11 @@ use core::time::Duration; -use std::{ - sync::Arc, - collections::{HashSet, HashMap}, -}; +use std::{sync::Arc, collections::HashSet}; use rand_core::OsRng; use ciphersuite::{group::GroupEncoding, Ciphersuite, Ristretto}; -use tokio::{sync::RwLock, time::sleep}; +use tokio::{sync::broadcast, time::sleep}; use serai_db::MemDb; @@ -37,19 +34,20 @@ async fn sync_test() { let (syncer_p2p, syncer_tributary) = tributaries.pop().unwrap(); // Have the rest form a P2P net + let mut tributary_senders = vec![]; let mut tributary_arcs = vec![]; let mut p2p_threads = vec![]; for (i, (p2p, tributary)) in tributaries.drain(..).enumerate() { - let tributary = Arc::new(RwLock::new(tributary)); + let tributary = Arc::new(tributary); tributary_arcs.push(tributary.clone()); - let thread = tokio::spawn(handle_p2p( - Ristretto::generator() * *keys[i], - p2p, - Arc::new(RwLock::new(HashMap::from([( - spec.genesis(), - ActiveTributary { spec: spec.clone(), tributary }, - )]))), - )); + let (new_tributary_send, new_tributary_recv) = broadcast::channel(5); + let thread = + tokio::spawn(handle_p2p(Ristretto::generator() * *keys[i], p2p, new_tributary_recv)); + new_tributary_send + .send(ActiveTributary { spec: spec.clone(), tributary }) + .map_err(|_| "failed to send ActiveTributary") + .unwrap(); + tributary_senders.push(new_tributary_send); p2p_threads.push(thread); } let tributaries = tributary_arcs; @@ -60,14 +58,14 @@ async fn sync_test() { // propose by our 'offline' validator let block_time = u64::from(Tributary::::block_time()); sleep(Duration::from_secs(3 * block_time)).await; - let tip = tributaries[0].read().await.tip().await; + let tip = tributaries[0].tip().await; assert!(tip != spec.genesis()); // Sleep one second to make sure this block propagates sleep(Duration::from_secs(1)).await; // Make sure every tributary has it for tributary in &tributaries { - assert!(tributary.read().await.reader().block(&tip).is_some()); + assert!(tributary.reader().block(&tip).is_some()); } // Now that we've confirmed the other tributaries formed a net without issue, drop the syncer's @@ -76,31 +74,36 @@ async fn sync_test() { // Have it join the net let syncer_key = Ristretto::generator() * *syncer_key; - let syncer_tributary = Arc::new(RwLock::new(syncer_tributary)); - let syncer_tributaries = Arc::new(RwLock::new(HashMap::from([( - spec.genesis(), - ActiveTributary { spec: spec.clone(), tributary: syncer_tributary.clone() }, - )]))); - tokio::spawn(handle_p2p(syncer_key, syncer_p2p.clone(), syncer_tributaries.clone())); + let syncer_tributary = Arc::new(syncer_tributary); + let (syncer_tributary_send, syncer_tributary_recv) = broadcast::channel(5); + tokio::spawn(handle_p2p(syncer_key, syncer_p2p.clone(), syncer_tributary_recv)); + syncer_tributary_send + .send(ActiveTributary { spec: spec.clone(), tributary: syncer_tributary.clone() }) + .map_err(|_| "failed to send ActiveTributary to syncer") + .unwrap(); // It shouldn't automatically catch up. If it somehow was, our test would be broken // Sanity check this - let tip = tributaries[0].read().await.tip().await; + let tip = tributaries[0].tip().await; sleep(Duration::from_secs(2 * block_time)).await; - assert!(tributaries[0].read().await.tip().await != tip); - assert_eq!(syncer_tributary.read().await.tip().await, spec.genesis()); + assert!(tributaries[0].tip().await != tip); + assert_eq!(syncer_tributary.tip().await, spec.genesis()); // Start the heartbeat protocol - tokio::spawn(heartbeat_tributaries(syncer_p2p, syncer_tributaries)); + let (syncer_heartbeat_tributary_send, syncer_heartbeat_tributary_recv) = broadcast::channel(5); + tokio::spawn(heartbeat_tributaries(syncer_p2p, syncer_heartbeat_tributary_recv)); + syncer_heartbeat_tributary_send + .send(ActiveTributary { spec: spec.clone(), tributary: syncer_tributary.clone() }) + .map_err(|_| "failed to send ActiveTributary to heartbeat") + .unwrap(); // The heartbeat is once every 10 blocks sleep(Duration::from_secs(10 * block_time)).await; - assert!(syncer_tributary.read().await.tip().await != spec.genesis()); + assert!(syncer_tributary.tip().await != spec.genesis()); // Verify it synced to the tip let syncer_tip = { - let tributary = tributaries[0].write().await; - let syncer_tributary = syncer_tributary.write().await; + let tributary = &tributaries[0]; let tip = tributary.tip().await; let syncer_tip = syncer_tributary.tip().await; @@ -114,7 +117,7 @@ async fn sync_test() { sleep(Duration::from_secs(block_time)).await; // Verify it's now keeping up - assert!(syncer_tributary.read().await.tip().await != syncer_tip); + assert!(syncer_tributary.tip().await != syncer_tip); // Verify it's now participating in consensus // Because only `t` validators are used in a commit, take n - t nodes offline @@ -128,7 +131,6 @@ async fn sync_test() { // wait for a block sleep(Duration::from_secs(block_time)).await; - let syncer_tributary = syncer_tributary.read().await; if syncer_tributary .reader() .parsed_commit(&syncer_tributary.tip().await) diff --git a/coordinator/src/tributary/handle.rs b/coordinator/src/tributary/handle.rs index cfca4500d..257f6932a 100644 --- a/coordinator/src/tributary/handle.rs +++ b/coordinator/src/tributary/handle.rs @@ -17,7 +17,6 @@ use frost_schnorrkel::Schnorrkel; use serai_client::{ Signature, - primitives::NetworkId, validator_sets::primitives::{ValidatorSet, KeyPair, musig_context, set_keys_message}, subxt::utils::Encoded, Serai, @@ -36,7 +35,8 @@ use serai_db::{Get, Db}; use crate::{ processors::Processors, tributary::{ - Transaction, TributarySpec, Topic, DataSpecification, TributaryDb, scanner::RecognizedIdType, + Transaction, TributarySpec, Topic, DataSpecification, TributaryDb, nonce_decider::NonceDecider, + scanner::RecognizedIdType, }, }; @@ -224,13 +224,13 @@ pub fn generated_key_pair( DkgConfirmer::share(spec, key, attempt, preprocesses, key_pair) } -pub async fn handle_application_tx< +pub(crate) async fn handle_application_tx< D: Db, Pro: Processors, FPst: Future, PST: Clone + Fn(ValidatorSet, Encoded) -> FPst, FRid: Future, - RID: Clone + Fn(NetworkId, [u8; 32], RecognizedIdType, [u8; 32]) -> FRid, + RID: crate::RIDTrait, >( tx: Transaction, spec: &TributarySpec, @@ -414,7 +414,8 @@ pub async fn handle_application_tx< Transaction::Batch(_, batch) => { // Because this Batch has achieved synchrony, its batch ID should be authorized TributaryDb::::recognize_topic(txn, genesis, Topic::Batch(batch)); - recognized_id(spec.set().network, genesis, RecognizedIdType::Batch, batch).await; + let nonce = NonceDecider::::handle_batch(txn, genesis, batch); + recognized_id(spec.set().network, genesis, RecognizedIdType::Batch, batch, nonce).await; } Transaction::SubstrateBlock(block) => { @@ -423,9 +424,10 @@ pub async fn handle_application_tx< despite us not providing that transaction", ); - for id in plan_ids { + let nonces = NonceDecider::::handle_substrate_block(txn, genesis, &plan_ids); + for (nonce, id) in nonces.into_iter().zip(plan_ids.into_iter()) { TributaryDb::::recognize_topic(txn, genesis, Topic::Sign(id)); - recognized_id(spec.set().network, genesis, RecognizedIdType::Plan, id).await; + recognized_id(spec.set().network, genesis, RecognizedIdType::Plan, id, nonce).await; } } @@ -441,6 +443,7 @@ pub async fn handle_application_tx< &data.signed, ) { Some(Some(preprocesses)) => { + NonceDecider::::selected_for_signing_batch(txn, genesis, data.plan); processors .send( spec.set().network, @@ -498,6 +501,7 @@ pub async fn handle_application_tx< &data.signed, ) { Some(Some(preprocesses)) => { + NonceDecider::::selected_for_signing_plan(txn, genesis, data.plan); processors .send( spec.set().network, diff --git a/coordinator/src/tributary/mod.rs b/coordinator/src/tributary/mod.rs index c97107db7..9ae31ce1d 100644 --- a/coordinator/src/tributary/mod.rs +++ b/coordinator/src/tributary/mod.rs @@ -30,6 +30,9 @@ use tributary::{ mod db; pub use db::*; +mod nonce_decider; +pub use nonce_decider::*; + mod handle; pub use handle::*; diff --git a/coordinator/src/tributary/nonce_decider.rs b/coordinator/src/tributary/nonce_decider.rs new file mode 100644 index 000000000..eb95c5395 --- /dev/null +++ b/coordinator/src/tributary/nonce_decider.rs @@ -0,0 +1,127 @@ +use core::marker::PhantomData; + +use serai_db::{Get, DbTxn, Db}; + +use crate::tributary::Transaction; + +/// Decides the nonce which should be used for a transaction on a Tributary. +/// +/// Deterministically builds a list of nonces to use based on the on-chain events and expected +/// transactions in response. Enables rebooting/rebuilding validators with full safety. +pub struct NonceDecider(PhantomData); + +const BATCH_CODE: u8 = 0; +const BATCH_SIGNING_CODE: u8 = 1; +const PLAN_CODE: u8 = 2; +const PLAN_SIGNING_CODE: u8 = 3; + +impl NonceDecider { + fn next_nonce_key(genesis: [u8; 32]) -> Vec { + D::key(b"coordinator_tributary_nonce", b"next", genesis) + } + fn allocate_nonce(txn: &mut D::Transaction<'_>, genesis: [u8; 32]) -> u32 { + let key = Self::next_nonce_key(genesis); + let next = + txn.get(&key).map(|bytes| u32::from_le_bytes(bytes.try_into().unwrap())).unwrap_or(3); + txn.put(key, (next + 1).to_le_bytes()); + next + } + + fn item_nonce_key(genesis: [u8; 32], code: u8, id: [u8; 32]) -> Vec { + D::key( + b"coordinator_tributary_nonce", + b"item", + [genesis.as_slice(), [code].as_ref(), id.as_ref()].concat(), + ) + } + fn set_nonce( + txn: &mut D::Transaction<'_>, + genesis: [u8; 32], + code: u8, + id: [u8; 32], + nonce: u32, + ) { + txn.put(Self::item_nonce_key(genesis, code, id), nonce.to_le_bytes()) + } + fn db_nonce(getter: &G, genesis: [u8; 32], code: u8, id: [u8; 32]) -> Option { + getter + .get(Self::item_nonce_key(genesis, code, id)) + .map(|bytes| u32::from_le_bytes(bytes.try_into().unwrap())) + } + + pub fn handle_batch(txn: &mut D::Transaction<'_>, genesis: [u8; 32], batch: [u8; 32]) -> u32 { + let nonce_for = Self::allocate_nonce(txn, genesis); + Self::set_nonce(txn, genesis, BATCH_CODE, batch, nonce_for); + nonce_for + } + pub fn selected_for_signing_batch( + txn: &mut D::Transaction<'_>, + genesis: [u8; 32], + batch: [u8; 32], + ) { + let nonce_for = Self::allocate_nonce(txn, genesis); + Self::set_nonce(txn, genesis, BATCH_SIGNING_CODE, batch, nonce_for); + } + + pub fn handle_substrate_block( + txn: &mut D::Transaction<'_>, + genesis: [u8; 32], + plans: &[[u8; 32]], + ) -> Vec { + let mut res = Vec::with_capacity(plans.len()); + for plan in plans { + let nonce_for = Self::allocate_nonce(txn, genesis); + Self::set_nonce(txn, genesis, PLAN_CODE, *plan, nonce_for); + res.push(nonce_for); + } + res + } + pub fn selected_for_signing_plan( + txn: &mut D::Transaction<'_>, + genesis: [u8; 32], + plan: [u8; 32], + ) { + let nonce_for = Self::allocate_nonce(txn, genesis); + Self::set_nonce(txn, genesis, PLAN_SIGNING_CODE, plan, nonce_for); + } + + pub fn nonce(getter: &G, genesis: [u8; 32], tx: &Transaction) -> Option> { + match tx { + Transaction::DkgCommitments(attempt, _, _) => { + assert_eq!(*attempt, 0); + Some(Some(0)) + } + Transaction::DkgShares { attempt, .. } => { + assert_eq!(*attempt, 0); + Some(Some(1)) + } + Transaction::DkgConfirmed(attempt, _, _) => { + assert_eq!(*attempt, 0); + Some(Some(2)) + } + + Transaction::Batch(_, _) => None, + Transaction::SubstrateBlock(_) => None, + + Transaction::BatchPreprocess(data) => { + assert_eq!(data.attempt, 0); + Some(Self::db_nonce(getter, genesis, BATCH_CODE, data.plan)) + } + Transaction::BatchShare(data) => { + assert_eq!(data.attempt, 0); + Some(Self::db_nonce(getter, genesis, BATCH_SIGNING_CODE, data.plan)) + } + + Transaction::SignPreprocess(data) => { + assert_eq!(data.attempt, 0); + Some(Self::db_nonce(getter, genesis, PLAN_CODE, data.plan)) + } + Transaction::SignShare(data) => { + assert_eq!(data.attempt, 0); + Some(Self::db_nonce(getter, genesis, PLAN_SIGNING_CODE, data.plan)) + } + + Transaction::SignCompleted { .. } => None, + } + } +} diff --git a/coordinator/src/tributary/scanner.rs b/coordinator/src/tributary/scanner.rs index fe8a18d26..5d8f00168 100644 --- a/coordinator/src/tributary/scanner.rs +++ b/coordinator/src/tributary/scanner.rs @@ -4,9 +4,7 @@ use zeroize::Zeroizing; use ciphersuite::{Ciphersuite, Ristretto}; -use serai_client::{ - primitives::NetworkId, validator_sets::primitives::ValidatorSet, subxt::utils::Encoded, -}; +use serai_client::{validator_sets::primitives::ValidatorSet, subxt::utils::Encoded}; use tributary::{ Transaction as TributaryTransaction, Block, TributaryReader, @@ -40,7 +38,7 @@ async fn handle_block< FPst: Future, PST: Clone + Fn(ValidatorSet, Encoded) -> FPst, FRid: Future, - RID: Clone + Fn(NetworkId, [u8; 32], RecognizedIdType, [u8; 32]) -> FRid, + RID: crate::RIDTrait, P: P2p, >( db: &mut TributaryDb, @@ -101,13 +99,13 @@ async fn handle_block< // TODO2: Trigger any necessary re-attempts } -pub async fn handle_new_blocks< +pub(crate) async fn handle_new_blocks< D: Db, Pro: Processors, FPst: Future, PST: Clone + Fn(ValidatorSet, Encoded) -> FPst, FRid: Future, - RID: Clone + Fn(NetworkId, [u8; 32], RecognizedIdType, [u8; 32]) -> FRid, + RID: crate::RIDTrait, P: P2p, >( db: &mut TributaryDb, diff --git a/coordinator/tributary/src/blockchain.rs b/coordinator/tributary/src/blockchain.rs index 78c2ca2bf..d21928ec0 100644 --- a/coordinator/tributary/src/blockchain.rs +++ b/coordinator/tributary/src/blockchain.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::collections::{VecDeque, HashMap}; use ciphersuite::{group::GroupEncoding, Ciphersuite, Ristretto}; @@ -13,7 +13,7 @@ use crate::{ transaction::{Signed, TransactionKind, Transaction as TransactionTrait}, }; -#[derive(Clone, PartialEq, Eq, Debug)] +#[derive(Debug)] pub(crate) struct Blockchain { db: Option, genesis: [u8; 32], @@ -24,11 +24,13 @@ pub(crate) struct Blockchain { provided: ProvidedTransactions, mempool: Mempool, + + pub(crate) next_block_notifications: VecDeque>, } impl Blockchain { - fn tip_key(&self) -> Vec { - D::key(b"tributary_blockchain", b"tip", self.genesis) + fn tip_key(genesis: [u8; 32]) -> Vec { + D::key(b"tributary_blockchain", b"tip", genesis) } fn block_number_key(&self) -> Vec { D::key(b"tributary_blockchain", b"block_number", self.genesis) @@ -76,11 +78,13 @@ impl Blockchain { provided: ProvidedTransactions::new(db.clone(), genesis), mempool: Mempool::new(db, genesis), + + next_block_notifications: VecDeque::new(), }; if let Some((block_number, tip)) = { let db = res.db.as_ref().unwrap(); - db.get(res.block_number_key()).map(|number| (number, db.get(res.tip_key()).unwrap())) + db.get(res.block_number_key()).map(|number| (number, db.get(Self::tip_key(genesis)).unwrap())) } { res.block_number = u32::from_le_bytes(block_number.try_into().unwrap()); res.tip.copy_from_slice(&tip); @@ -132,6 +136,10 @@ impl Blockchain { db.get(Self::block_after_key(&genesis, block)).map(|bytes| bytes.try_into().unwrap()) } + pub(crate) fn tip_from_db(db: &D, genesis: [u8; 32]) -> [u8; 32] { + db.get(Self::tip_key(genesis)).map(|bytes| bytes.try_into().unwrap()).unwrap_or(genesis) + } + pub(crate) fn add_transaction( &mut self, internal: bool, @@ -226,7 +234,7 @@ impl Blockchain { let mut txn = db.txn(); self.tip = block.hash(); - txn.put(self.tip_key(), self.tip); + txn.put(Self::tip_key(self.genesis), self.tip); self.block_number += 1; txn.put(self.block_number_key(), self.block_number.to_le_bytes()); @@ -270,6 +278,10 @@ impl Blockchain { txn.commit(); self.db = Some(db); + for tx in self.next_block_notifications.drain(..) { + let _ = tx.send(()); + } + Ok(()) } } diff --git a/coordinator/tributary/src/lib.rs b/coordinator/tributary/src/lib.rs index ebb7b165c..3c5227b0c 100644 --- a/coordinator/tributary/src/lib.rs +++ b/coordinator/tributary/src/lib.rs @@ -336,6 +336,15 @@ impl Tributary { _ => false, } } + + /// Get a Future which will resolve once the next block has been added. + pub async fn next_block_notification( + &self, + ) -> impl Send + Sync + core::future::Future> { + let (tx, rx) = tokio::sync::oneshot::channel(); + self.network.blockchain.write().await.next_block_notifications.push_back(tx); + rx + } } #[derive(Clone)] @@ -344,7 +353,8 @@ impl TributaryReader { pub fn genesis(&self) -> [u8; 32] { self.1 } - // Since these values are static, they can be safely read from the database without lock + + // Since these values are static once set, they can be safely read from the database without lock // acquisition pub fn block(&self, hash: &[u8; 32]) -> Option> { Blockchain::::block_from_db(&self.0, self.1, hash) @@ -363,4 +373,9 @@ impl TributaryReader { .commit(hash) .map(|commit| Commit::::decode(&mut commit.as_ref()).unwrap().end_time) } + + // This isn't static, yet can be read with only minor discrepancy risks + pub fn tip(&self) -> [u8; 32] { + Blockchain::::tip_from_db(&self.0, self.1) + } } diff --git a/coordinator/tributary/src/tests/blockchain.rs b/coordinator/tributary/src/tests/blockchain.rs index cee6103e3..05357cb1d 100644 --- a/coordinator/tributary/src/tests/blockchain.rs +++ b/coordinator/tributary/src/tests/blockchain.rs @@ -104,7 +104,7 @@ fn invalid_block() { { // Add a valid transaction - let mut blockchain = blockchain.clone(); + let (_, mut blockchain) = new_blockchain(genesis, &[tx.1.signer]); assert!(blockchain.add_transaction::( true, Transaction::Application(tx.clone()), @@ -129,7 +129,7 @@ fn invalid_block() { { // Invalid signature - let mut blockchain = blockchain.clone(); + let (_, mut blockchain) = new_blockchain(genesis, &[tx.1.signer]); assert!(blockchain.add_transaction::( true, Transaction::Application(tx), diff --git a/deny.toml b/deny.toml index 595c6101a..32443d5fc 100644 --- a/deny.toml +++ b/deny.toml @@ -65,6 +65,8 @@ exceptions = [ { allow = ["AGPL-3.0"], name = "serai-client" }, + { allow = ["AGPL-3.0"], name = "mini-serai" }, + { allow = ["AGPL-3.0"], name = "serai-docker-tests" }, { allow = ["AGPL-3.0"], name = "serai-message-queue-tests" }, { allow = ["AGPL-3.0"], name = "serai-processor-tests" }, diff --git a/docs/policy/Canonical Chain.md b/docs/policy/Canonical Chain.md new file mode 100644 index 000000000..9eb1ae62e --- /dev/null +++ b/docs/policy/Canonical Chain.md @@ -0,0 +1,72 @@ +# Canonical Chain + +As Serai is a network connected to many external networks, at some point we will +likely have to ask ourselves what the canonical chain for a network is. This +document intends to establish soft, non-binding policy, in the hopes it'll guide +most discussions on the matter. + +The canonical chain is the chain Serai follows and honors transactions on. Serai +does not guarantee operations availability nor integrity on any chains other +than the canonical chain. Which chain is considered canonical is dependent on +several factors. + +### Finalization + +Serai finalizes blocks from external networks onto itself. Once a block is +finalized, it is considered irreversible. Accordingly, the primary tenet +regarding what chain Serai will honor is the chain Serai has finalized. We can +only assume the integrity of our coins on that chain. + +### Node Software + +Only node software which passes a quality threshold and actively identifies as +belonging to an external network's protocol should be run. Never should a +transformative node (a node trying to create a new network from an existing one) +be run in place of a node actually for the external network. Beyond active +identification, it must have community recognition as belonging. + +If the majority of a community actively identifying as the network stands behind +a hard fork, it should not be considered as a new network yet the next step of +the existing one. If a hard fork breaks Serai's integrity, it should not be +supported. + +Multiple independent nodes should be run in order to reduce the likelihood of +vulnerabilities to any specific node's faults. + +### Rollbacks + +Over time, various networks have rolled back in response to exploits. A rollback +should undergo the same scrutiny as a hard fork. If the rollback breaks Serai's +integrity, yet someone identifying as from the project offers to restore +integrity out-of-band, integrity is considered kept so long as the offer is +followed through on. + +Since a rollback would break Serai's finalization policy, a technical note on +how it could be implemented is provided. + +Assume a blockchain from `0 .. 100` exists, with `100a ..= 500a` being rolled +back blocks. The new chain extends from `99` with `100b ..= 200b`. Serai would +define the canonical chain as `0 .. 100`, `100a ..= 500a`, `100b ..= 200b`, with +`100b` building off `500a`. Serai would have to perform data-availability for +`100a ..= 500a` (such as via a JSON file in-tree), and would have to modify the +processor to edit its `Eventuality`s/UTXOs at `500a` back to the state at `99`. +Any `Burn`s handled after `99` should be handled once again, if the transactions +from `100a ..= 500a` cannot simply be carried over. + +### On Fault + +If the canonical chain does put Serai's coins into an invalid state, +irreversibly and without amends, then the discrepancy should be amortized to all +users as feasible, yet affected operations should otherwise halt if under +permanent duress. + +For example, if Serai lists a token which has a by-governance blacklist +function, and is blacklisted without appeal, Serai should destroy all associated +sriXYZ and cease operations. + +If a bug, either in the chain or in Serai's own code, causes a loss of 10% of +coins (without amends), operations should halt until all outputs in system can +have their virtual amount reduced by a total amount of the loss, +proportionalized to each output. Alternatively, Serai could decrease all token +balances by 10%. All liquidity/swap operations should be halted until users are +given proper time to withdraw, if they so choose, before operations resume. diff --git a/docs/processor/Multisig Rotation.md b/docs/processor/Multisig Rotation.md new file mode 100644 index 000000000..822eeccaf --- /dev/null +++ b/docs/processor/Multisig Rotation.md @@ -0,0 +1,176 @@ +# Multisig Rotation + +Substrate is expected to determine when a new validator set instance will be +created, and with it, a new multisig. Upon the successful creation of a new +multisig, as determined by the new multisig setting their key pair on Substrate, +rotation begins. + +### Timeline + +The following timeline is established: + +1) The new multisig is created, and has its keys set on Serai. Once the next + `Batch` with a new external network block is published, its block becomes the + "queue block". The new multisig is set to activate at the "queue block", plus + `CONFIRMATIONS` blocks (the "activation block"). + + We don't use the last `Batch`'s external network block, as that `Batch` may + be older than `CONFIRMATIONS` blocks. Any yet-to-be-included-and-finalized + `Batch` will be within `CONFIRMATIONS` blocks of what any processor has + scanned however, as it'll wait for inclusion and finalization before + continuing scanning. + +2) Once the "activation block" itself has been finalized on Serai, UIs should + start exclusively using the new multisig. If the "activation block" isn't + finalized within `2 * CONFIRMATIONS` blocks, UIs should stop making + transactions to any multisig on that network. + + Waiting for Serai's finalization prevents a UI from using an unfinalized + "activation block" before a re-organization to a shorter chain. If a + transaction to Serai was carried from the unfinalized "activation block" + to the shorter chain, it'd no longer be after the "activation block" and + accordingly would be ignored. + + We could not wait for Serai to finalize the block, yet instead wait for the + block to have `CONFIRMATIONS` confirmations. This would prevent needing to + wait for an indeterminate amount of time for Serai to finalize the + "activation block", with the knowledge it should be finalized. Doing so would + open UIs to eclipse attacks, where they live on an alternate chain where a + possible "activation block" is finalized, yet Serai finalizes a distinct + "activation block". If the alternate chain was longer than the finalized + chain, the above issue would be reopened. + + The reason for UIs stopping under abnormal behavior is as follows. Given a + sufficiently delayed `Batch` for the "activation block", UIs will use the old + multisig past the point it will be deprecated. Accordingly, UIs must realize + when `Batch`s are so delayed and continued transactions are a risk. While + `2 * CONFIRMATIONS` is presumably well within the 6 hour period (defined + below), that period exists for low-fee transactions at time of congestion. It + does not exist for UIs with old state, though it can be used to compensate + for them (reducing the tolerance for inclusion delays). `2 * CONFIRMATIONS` + is before the 6 hour period is enacted, preserving the tolerance for + inclusion delays, yet still should only happen under highly abnormal + circumstances. + + In order to minimize the time it takes for "activation block" to be + finalized, a `Batch` will always be created for it, regardless of it would + otherwise have a `Batch` created. + +3) The prior multisig continues handling `Batch`s and `Burn`s for + `CONFIRMATIONS` blocks, plus 10 minutes, after the "activation block". + + The first `CONFIRMATIONS` blocks is due to the fact the new multisig + shouldn't actually be sent coins during this period, making it irrelevant. + If coins are prematurely sent to the new multisig, they're artificially + delayed until the end of the `CONFIRMATIONS` blocks plus 10 minutes period. + This prevents an adversary from minting Serai tokens using coins in the new + multisig, yet then burning them to drain the prior multisig, creating a lack + of liquidity for several blocks. + + The reason for the 10 minutes is to provide grace to honest UIs. Since UIs + will wait until Serai confirms the "activation block" for keys before sending + to them, which will take `CONFIRMATIONS` blocks plus some latency, UIs would + make transactions to the prior multisig past the end of this period if it was + `CONFIRMATIONS` alone. Since the next period is `CONFIRMATIONS` blocks, which + is how long transactions take to confirm, transactions made past the end of + this period would only received after the next period. After the next period, + the prior multisig adds fees and a delay to all received funds (as it + forwards the funds from itself to the new multisig). The 10 minutes provides + grace for latency. + + The 10 minutes is a delay on anyone who immediately transitions to the new + multisig, in a no latency environment, yet the delay is preferable to fees + from forwarding. It also should be less than 10 minutes thanks to various + latencies. + +4) The prior multisig continues handling `Batch`s and `Burn`s for another + `CONFIRMATIONS` blocks. + + This is for two reasons: + + 1) Coins sent to the new multisig still need time to gain sufficient + confirmations. + 2) All outputs belonging to the prior multisig should become available within + `CONFIRMATIONS` blocks. + + All `Burn`s handled during this period should use the new multisig for the + change address. This should effect a transfer of most outputs. + + With the expected transfer of most outputs, and the new multisig receiving + new external transactions, the new multisig takes the responsibility of + signing all unhandled and newly emitted `Burn`s. + +5) For the next 6 hours, all non-`Branch` outputs received are immediately + forwarded to the new multisig. Only external transactions to the new multisig + are included in `Batch`s. + + The new multisig infers the `InInstruction`, and refund address, for + forwarded `External` outputs via reading what they were for the original + `External` output. + + Alternatively, the `InInstruction`, with refund address explicitly included, + could be included in the forwarding transaction. This may fail if the + `InInstruction` omitted the refund address and is too large to fit in a + transaction with one explicitly included. On such failure, the refund would + be immediately issued instead. + +6) Once the 6 hour period has expired, the prior multisig stops handling outputs + it didn't itself create. Any remaining `Eventuality`s are completed, and any + available/freshly available outputs are forwarded (creating new + `Eventuality`s which also need to successfully resolve). + + Once all the 6 hour period has expired, no `Eventuality`s remain, and all + outputs are forwarded, the multisig publishes a final `Batch` of the first + block, plus `CONFIRMATIONS`, which met these conditions, regardless of if it + would've otherwise had a `Batch`. Then, it reports to Substrate has closed. + No further actions by it, nor its validators, are expected (unless those + validators remain present in the new multisig). + +7) The new multisig confirms all transactions from all prior multisigs were made + as expected, including the reported `Batch`s. + + Unfortunately, we cannot solely check the immediately prior multisig due to + the ability for two sequential malicious multisigs to steal. If multisig + `n - 2` only transfers a fraction of its coins to multisig `n - 1`, multisig + `n - 1` can 'honestly' operate on the dishonest state it was given, + laundering it. This would let multisig `n - 1` forward the results of its + as-expected operations from a dishonest starting point to the new multisig, + and multisig `n` would attest to multisig `n - 1`'s expected (and therefore + presumed honest) operations, assuming liability. This would cause an honest + multisig to face full liability for the invalid state, causing it to be fully + slashed (as needed to reacquire any lost coins). + + This would appear short-circuitable if multisig `n - 1` transfers coins + exceeding the relevant Serai tokens' supply. Serai never expects to operate + in an over-solvent state, yet balance should trend upwards due to a flat fee + applied to each received output (preventing a griefing attack). Any balance + greater than the tokens' supply may have had funds skimmed off the top, yet + they'd still guarantee the solvency of Serai without any additional fees + passed to users. Unfortunately, due to the requirement to verify the `Batch`s + published (as else the Serai tokens' supply may be manipulated), this cannot + actually be achieved (at least, not without a ZK proof the published `Batch`s + were correct). + +8) The new multisig reports a successful close of the prior multisig, and + becomes the sole multisig with full responsibilities. + +### Latency and Fees + +Slightly before the end of step 3, the new multisig should start receiving new +external outputs. These won't be confirmed for another `CONFIRMATIONS` blocks, +and the new multisig won't start handling `Burn`s for another `CONFIRMATIONS` +blocks plus 10 minutes. Accordingly, the new multisig should only become +responsible for `Burn`s shortly after it has taken ownership of the stream of +newly received coins. + +Before it takes responsibility, it also should've been transferred all internal +outputs under the standard scheduling flow. Any delayed outputs will be +immediately forwarded, and external stragglers are only reported to Serai once +sufficiently confirmed in the new multisig. Accordingly, liquidity should avoid +fragmentation during rotation. The only latency should be on the 10 minutes +present, and on delayed outputs, which should've been immediately usable, having +to wait another `CONFIRMATIONS` blocks to be confirmed once forwarded. + +Immediate forwarding does unfortunately prevent batching inputs to reduce fees. +Given immediate forwarding only applies to latent outputs, considered +exceptional, and the protocol's fee handling ensures solvency, this is accepted. diff --git a/docs/processor/Scanning.md b/docs/processor/Scanning.md new file mode 100644 index 000000000..f03e36058 --- /dev/null +++ b/docs/processor/Scanning.md @@ -0,0 +1,31 @@ +# Scanning + +Only blocks with finality, either actual or sufficiently probabilistic, are +operated upon. This is referred to as a block with `CONFIRMATIONS` +confirmations, the block itself being the first confirmation. + +For chains which promise finality on a known schedule, `CONFIRMATIONS` is set to +`1` and each group of finalized blocks is treated as a single block, with the +tail block's hash representing the entire group. + +For chains which offer finality, on an unknown schedule, `CONFIRMATIONS` is +still set to `1` yet blocks aren't aggregated into a group. They're handled +individually, yet only once finalized. This allows networks which form +finalization erratically to not have to agree on when finalizations were formed, +solely that the blocks contained have a finalized descendant. + +### Notability, causing a `Batch` + +`Batch`s are only created for blocks which it benefits to achieve ordering on. +These are: + +- Blocks which contain transactions relevant to Serai +- Blocks which in which a new multisig activates +- Blocks in which a prior multisig retires + +### Waiting for `Batch` inclusion + +Once a `Batch` is created, it is expected to eventually be included on Serai. +If the `Batch` isn't included within `CONFIRMATIONS` blocks of its creation, the +scanner will wait until its inclusion before scanning +`batch_block + CONFIRMATIONS`. diff --git a/mini/Cargo.toml b/mini/Cargo.toml new file mode 100644 index 000000000..fc0abf8a0 --- /dev/null +++ b/mini/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "mini-serai" +version = "0.1.0" +description = "A miniature version of Serai used to test for race conditions" +license = "AGPL-3.0-only" +repository = "https://github.com/serai-dex/serai/tree/develop/mini" +authors = ["Luke Parker "] +keywords = [] +edition = "2021" +publish = false + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] + +[dependencies] +loom = "0.7" diff --git a/mini/LICENSE b/mini/LICENSE new file mode 100644 index 000000000..f684d0271 --- /dev/null +++ b/mini/LICENSE @@ -0,0 +1,15 @@ +AGPL-3.0-only license + +Copyright (c) 2023 Luke Parker + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License Version 3 as +published by the Free Software Foundation. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License +along with this program. If not, see . diff --git a/mini/README.md b/mini/README.md new file mode 100644 index 000000000..8e8daf71a --- /dev/null +++ b/mini/README.md @@ -0,0 +1,27 @@ +# Mini Serai + +A miniature version of the Serai stack, intended to demonstrate a lack of +system-wide race conditions in the officially stated flows. + +### Why + +When working on multiple multisigs, a race condition was noted. Originally, the +documentation stated that the activation block of the new multisig would be the +block after the next `Batch`'s block. This introduced a race condition, where +since multiple `Batch`s can be signed at the same time, multiple `Batch`s can +exist in the mempool at the same time. This could cause `Batch`s [1, 2] to +exist in the mempool, 1 to be published (causing 2 to be the activation block of +the new multisig), yet then the already signed 2 to be published (despite +no longer being accurate as it only had events for a subset of keys). + +This effort initially modeled and tested this single race condition, yet aims to +grow to the entire system. Then we just have to prove the actual Serai stack's +flow reduces to the miniature flow modeled here. While further efforts are +needed to prove Serai's implementation of the flow is itself free of race +conditions, this is a layer of defense over the theory. + +### How + +[loom](https://docs.rs/loom) is a library which will execute a block of code +with every possible combination of orders in order to test results aren't +invalidated by order of execution. diff --git a/mini/src/lib.rs b/mini/src/lib.rs new file mode 100644 index 000000000..57cd8693e --- /dev/null +++ b/mini/src/lib.rs @@ -0,0 +1,150 @@ +use std::sync::{Arc as StdArc, RwLock as StdRwLock}; + +use loom::{ + thread::{self, JoinHandle}, + sync::{Arc, RwLock, mpsc}, +}; + +#[cfg(test)] +mod tests; + +#[derive(Clone, PartialEq, Eq, Hash, Debug)] +pub struct Batch { + block: u64, + keys: Vec, +} + +#[derive(Clone, PartialEq, Eq, Hash, Debug)] +pub enum Event { + IncludedBatch(Batch), + // Allows if let else on this without clippy believing it's redundant + __Ignore, +} + +// The amount of blocks to scan after we publish a batch, before confirming the batch was +// included. +// Prevents race conditions on rotation regarding when the new keys activate. +const BATCH_FTL: u64 = 3; + +#[derive(Debug)] +pub struct Serai { + handle: JoinHandle<()>, + remaining_ticks: Arc>, + // Activation block, ID + pub active_keys: Arc>>, + pub mempool_batches: Arc>>, + pub events: mpsc::Receiver, + all_events_unsafe: StdArc>>, +} + +impl Serai { + #[allow(clippy::new_without_default)] + pub fn new(ticks: usize, mut queued_key: bool) -> Serai { + let remaining_ticks = Arc::new(RwLock::new(ticks)); + + let active_keys = Arc::new(RwLock::new(vec![(0, 0)])); + let mempool_batches = Arc::new(RwLock::new(vec![])); + let (events_sender, events_receiver) = mpsc::channel(); + let all_events_unsafe = StdArc::new(StdRwLock::new(vec![])); + + let handle = thread::spawn({ + let remaining_ticks = remaining_ticks.clone(); + + let active_keys = active_keys.clone(); + let mempool_batches = mempool_batches.clone(); + let all_events_unsafe = all_events_unsafe.clone(); + + move || { + while { + let mut remaining_ticks = remaining_ticks.write().unwrap(); + let ticking = *remaining_ticks != 0; + *remaining_ticks = remaining_ticks.saturating_sub(1); + ticking + } { + let mut batches = mempool_batches.write().unwrap(); + if !batches.is_empty() { + let batch: Batch = batches.remove(0); + + // Activate keys after the FTL + if queued_key { + let mut active_keys = active_keys.write().unwrap(); + let len = active_keys.len().try_into().unwrap(); + // TODO: active_keys is under Serai, yet the processor is the one actually with the + // context on when it activates + // This should be re-modeled as an event + active_keys.push((batch.block + BATCH_FTL, len)); + } + queued_key = false; + + let event = Event::IncludedBatch(batch); + events_sender.send(event.clone()).unwrap(); + all_events_unsafe.write().unwrap().push(event); + } + } + } + }); + + Serai { + handle, + remaining_ticks, + mempool_batches, + active_keys, + events: events_receiver, + all_events_unsafe, + } + } + + pub fn exhausted(&self) -> bool { + *self.remaining_ticks.read().unwrap() == 0 + } + + pub fn join(self) -> Vec { + self.handle.join().unwrap(); + + self.all_events_unsafe.read().unwrap().clone() + } +} + +#[derive(Debug)] +pub struct Processor { + handle: JoinHandle, +} + +impl Processor { + pub fn new(serai: Serai, blocks: u64) -> Processor { + let handle = thread::spawn(move || { + let mut last_finalized_block = 0; + for b in 0 .. blocks { + // If this block is too far ahead of Serai's last block, wait for Serai to process + // Note this wait only has to occur if we have a Batch which has yet to be included + // mini just publishes a Batch for every Block at this point in time, meaning it always has + // to wait + while b >= (last_finalized_block + BATCH_FTL) { + if serai.exhausted() { + return serai; + } + let Ok(event) = serai.events.recv() else { return serai }; + if let Event::IncludedBatch(Batch { block, .. }) = event { + last_finalized_block = block; + } + } + serai.mempool_batches.write().unwrap().push(Batch { + block: b, + keys: serai + .active_keys + .read() + .unwrap() + .iter() + .filter_map(|(activation_block, id)| Some(*id).filter(|_| b >= *activation_block)) + .collect(), + }); + } + serai + }); + Processor { handle } + } + + pub fn join(self) -> Serai { + self.handle.join().unwrap() + } +} diff --git a/mini/src/tests/activation_race/mod.rs b/mini/src/tests/activation_race/mod.rs new file mode 100644 index 000000000..846664125 --- /dev/null +++ b/mini/src/tests/activation_race/mod.rs @@ -0,0 +1,174 @@ +use std::{ + collections::HashSet, + sync::{Arc as StdArc, RwLock as StdRwLock}, +}; + +use crate::*; + +#[test] +fn activation_race() { + #[derive(Debug)] + struct EagerProcessor { + handle: JoinHandle, + } + + impl EagerProcessor { + fn new(serai: Serai, batches: u64) -> EagerProcessor { + let handle = thread::spawn(move || { + for b in 0 .. batches { + serai.mempool_batches.write().unwrap().push(Batch { + block: b, + keys: serai + .active_keys + .read() + .unwrap() + .iter() + .filter_map(|(activation_block, id)| Some(*id).filter(|_| b >= *activation_block)) + .collect(), + }); + } + serai + }); + EagerProcessor { handle } + } + + fn join(self) -> Serai { + self.handle.join().unwrap() + } + } + + let results = StdArc::new(StdRwLock::new(HashSet::new())); + + loom::model({ + let results = results.clone(); + move || { + let serai = Serai::new(4, true); + let processor = EagerProcessor::new(serai, 4); + let serai = processor.join(); + let events = serai.join(); + + results.write().unwrap().insert(events); + } + }); + + let results: HashSet<_> = results.read().unwrap().clone(); + assert_eq!(results.len(), 6); + for result in results { + for (b, batch) in result.into_iter().enumerate() { + if b < 3 { + assert_eq!( + batch, + Event::IncludedBatch(Batch { block: b.try_into().unwrap(), keys: vec![0] }) + ); + } else { + let Event::IncludedBatch(batch) = batch else { panic!("unexpected event") }; + assert_eq!(batch.block, b.try_into().unwrap()); + assert!((batch.keys == vec![0]) || (batch.keys == vec![0, 1])); + } + } + } +} + +#[test] +fn sequential_solves_activation_race() { + #[derive(Debug)] + struct DelayedProcessor { + handle: JoinHandle, + } + + impl DelayedProcessor { + fn new(serai: Serai, batches: u64) -> DelayedProcessor { + let handle = thread::spawn(move || { + for b in 0 .. batches { + let batch = { + let mut batches = serai.mempool_batches.write().unwrap(); + let batch = Batch { + block: b, + keys: serai + .active_keys + .read() + .unwrap() + .iter() + .filter_map(|(activation_block, id)| Some(*id).filter(|_| b >= *activation_block)) + .collect(), + }; + batches.push(batch.clone()); + batch + }; + + while (!serai.exhausted()) && + (serai.events.recv().unwrap() != Event::IncludedBatch(batch.clone())) + { + loom::thread::yield_now(); + } + } + serai + }); + DelayedProcessor { handle } + } + + fn join(self) -> Serai { + self.handle.join().unwrap() + } + } + + let results = StdArc::new(StdRwLock::new(HashSet::new())); + + loom::model({ + let results = results.clone(); + move || { + let serai = Serai::new(4, true); + let processor = DelayedProcessor::new(serai, 4); + let serai = processor.join(); + let events = serai.join(); + + results.write().unwrap().insert(events); + } + }); + + let results: HashSet<_> = results.read().unwrap().clone(); + assert_eq!(results.len(), 5); + for result in results { + for (b, batch) in result.into_iter().enumerate() { + assert_eq!( + batch, + Event::IncludedBatch(Batch { + block: b.try_into().unwrap(), + keys: if b < 3 { vec![0] } else { vec![0, 1] } + }), + ); + } + } +} + +#[test] +fn ftl_solves_activation_race() { + let results = StdArc::new(StdRwLock::new(HashSet::new())); + + loom::model({ + let results = results.clone(); + move || { + let serai = Serai::new(4, true); + // Uses Processor since this Processor has this algorithm implemented + let processor = Processor::new(serai, 4); + let serai = processor.join(); + let events = serai.join(); + + results.write().unwrap().insert(events); + } + }); + + let results: HashSet<_> = results.read().unwrap().clone(); + assert_eq!(results.len(), 5); + for result in results { + for (b, batch) in result.into_iter().enumerate() { + assert_eq!( + batch, + Event::IncludedBatch(Batch { + block: b.try_into().unwrap(), + keys: if b < 3 { vec![0] } else { vec![0, 1] } + }), + ); + } + } +} diff --git a/mini/src/tests/mod.rs b/mini/src/tests/mod.rs new file mode 100644 index 000000000..76fce26c3 --- /dev/null +++ b/mini/src/tests/mod.rs @@ -0,0 +1 @@ +mod activation_race; diff --git a/orchestration/coordinator/Dockerfile b/orchestration/coordinator/Dockerfile index 6a7664a17..92dc1115b 100644 --- a/orchestration/coordinator/Dockerfile +++ b/orchestration/coordinator/Dockerfile @@ -15,6 +15,7 @@ ADD message-queue /serai/message-queue ADD processor /serai/processor ADD coordinator /serai/coordinator ADD substrate /serai/substrate +ADD mini /serai/mini ADD tests /serai/tests ADD Cargo.toml /serai ADD Cargo.lock /serai diff --git a/orchestration/message-queue/Dockerfile b/orchestration/message-queue/Dockerfile index 2f5a3250b..1571f4a36 100644 --- a/orchestration/message-queue/Dockerfile +++ b/orchestration/message-queue/Dockerfile @@ -12,6 +12,7 @@ ADD message-queue /serai/message-queue ADD processor /serai/processor ADD coordinator /serai/coordinator ADD substrate /serai/substrate +ADD mini /serai/mini ADD tests /serai/tests ADD Cargo.toml /serai ADD Cargo.lock /serai diff --git a/orchestration/processor/Dockerfile b/orchestration/processor/Dockerfile index c4b48a89c..a6c2a2666 100644 --- a/orchestration/processor/Dockerfile +++ b/orchestration/processor/Dockerfile @@ -12,6 +12,7 @@ ADD message-queue /serai/message-queue ADD processor /serai/processor ADD coordinator /serai/coordinator ADD substrate /serai/substrate +ADD mini /serai/mini ADD tests /serai/tests ADD Cargo.toml /serai ADD Cargo.lock /serai diff --git a/orchestration/runtime/Dockerfile b/orchestration/runtime/Dockerfile index 4d10530b7..b8b13ac63 100644 --- a/orchestration/runtime/Dockerfile +++ b/orchestration/runtime/Dockerfile @@ -20,6 +20,7 @@ ADD message-queue /serai/message-queue ADD processor /serai/processor ADD coordinator /serai/coordinator ADD substrate /serai/substrate +ADD mini /serai/mini ADD tests /serai/tests ADD Cargo.toml /serai ADD Cargo.lock /serai diff --git a/orchestration/serai/Dockerfile b/orchestration/serai/Dockerfile index 32c4f274e..21d0f8a41 100644 --- a/orchestration/serai/Dockerfile +++ b/orchestration/serai/Dockerfile @@ -12,6 +12,7 @@ ADD message-queue /serai/message-queue ADD processor /serai/processor ADD coordinator /serai/coordinator ADD substrate /serai/substrate +ADD mini /serai/mini ADD tests /serai/tests ADD Cargo.toml /serai ADD Cargo.lock /serai diff --git a/processor/README.md b/processor/README.md index 78eeb092a..37d11e0d4 100644 --- a/processor/README.md +++ b/processor/README.md @@ -1,56 +1,5 @@ # Processor -The Serai processor scans a specified chain, communicating with the coordinator. - -### Key Generation - -The coordinator will tell the processor if it's been included in managing a -coin. If so, the processor is to begin the key generation protocol, relying on -the coordinator to provided authenticated communication with the remote parties. - -When the key generation protocol successfully completes, the processor is -expected to inform the coordinator so it may vote on it on the Substrate chain. -Once the key is voted in, it'll become active. - -### Scanning - -Sufficiently confirmed block become finalized in the eyes of the procesor. -Finalized blocks are scanned and have their outputs emitted, though not acted -on. - -### Reporting - -The processor reports finalized blocks to the coordinator. Once the group -acknowledges the block as finalized, they begin a threshold signing protocol -to sign the block's outputs as a `Batch`. - -Once the `Batch` is signed, the processor emits an `Update` with the signed -batch. Serai includes it, definitively ordering its outputs within the context -of Serai. - -### Confirmed Outputs - -With the outputs' ordering, validators are able to act on them. - -Actions are triggered by passing the outputs to the scheduler. The scheduler -will do one of two things: - -1) Use the output -2) Accumulate it for later usage - -### Burn Events - -When the Serai chain issues a `Burn` event, the processor should send coins -accordingly. This is done by scheduling the payments out. - -# TODO - -- Items marked TODO -- Items marked TODO2, yet those only need to be done after protonet -- Test the implementors of Coin against the trait API -- Test the databases -- Test eventuality handling - -- Coordinator communication - -Kafka? RPC ping to them, which we don't count as 'sent' until we get a pong? +The Serai processor scans a specified external network, communicating with the +coordinator. For details on its exact messaging flow, and overall policies, +please view `docs/processor`. diff --git a/processor/messages/src/lib.rs b/processor/messages/src/lib.rs index 5ca16544f..7d611b0e2 100644 --- a/processor/messages/src/lib.rs +++ b/processor/messages/src/lib.rs @@ -161,7 +161,6 @@ pub mod substrate { context: SubstrateContext, network: NetworkId, block: u64, - key: Vec, burns: Vec, batches: Vec, }, diff --git a/processor/src/coordinator.rs b/processor/src/coordinator.rs index 056cf6467..b9c10a16a 100644 --- a/processor/src/coordinator.rs +++ b/processor/src/coordinator.rs @@ -25,7 +25,7 @@ impl Coordinator for MessageQueue { } async fn recv(&mut self) -> Message { - // TODO: Use a proper expected next ID + // TODO2: Use a proper expected next ID let msg = self.next(0).await; let id = msg.id; diff --git a/processor/src/db.rs b/processor/src/db.rs index 916c35dda..212341d0c 100644 --- a/processor/src/db.rs +++ b/processor/src/db.rs @@ -1,8 +1,12 @@ use core::marker::PhantomData; +use std::io::Read; + +use scale::{Encode, Decode}; +use serai_client::validator_sets::primitives::{ValidatorSet, KeyPair}; pub use serai_db::*; -use crate::{Plan, networks::Network}; +use crate::networks::{Block, Network}; #[derive(Debug)] pub struct MainDb(D, PhantomData); @@ -25,74 +29,35 @@ impl MainDb { txn.put(Self::handled_key(id), []) } - fn plan_key(id: &[u8]) -> Vec { - Self::main_key(b"plan", id) - } - fn signing_key(key: &[u8]) -> Vec { - Self::main_key(b"signing", key) - } - pub fn save_signing(txn: &mut D::Transaction<'_>, key: &[u8], block_number: u64, plan: &Plan) { - let id = plan.id(); - - { - let mut signing = txn.get(Self::signing_key(key)).unwrap_or(vec![]); - - // If we've already noted we're signing this, return - assert_eq!(signing.len() % 32, 0); - for i in 0 .. (signing.len() / 32) { - if signing[(i * 32) .. ((i + 1) * 32)] == id { - return; - } + fn pending_activation_key() -> Vec { + Self::main_key(b"pending_activation", []) + } + pub fn set_pending_activation( + txn: &mut D::Transaction<'_>, + block_before_queue_block: >::Id, + set: ValidatorSet, + key_pair: KeyPair, + ) { + let mut buf = (set, key_pair).encode(); + buf.extend(block_before_queue_block.as_ref()); + txn.put(Self::pending_activation_key(), buf); + } + pub fn pending_activation( + getter: &G, + ) -> Option<(>::Id, ValidatorSet, KeyPair)> { + if let Some(bytes) = getter.get(Self::pending_activation_key()) { + if !bytes.is_empty() { + let mut slice = bytes.as_slice(); + let (set, key_pair) = <(ValidatorSet, KeyPair)>::decode(&mut slice).unwrap(); + let mut block_before_queue_block = >::Id::default(); + slice.read_exact(block_before_queue_block.as_mut()).unwrap(); + assert!(slice.is_empty()); + return Some((block_before_queue_block, set, key_pair)); } - - signing.extend(&id); - txn.put(Self::signing_key(key), id); - } - - { - let mut buf = block_number.to_le_bytes().to_vec(); - plan.write(&mut buf).unwrap(); - txn.put(Self::plan_key(&id), &buf); } + None } - - pub fn signing(&self, key: &[u8]) -> Vec<(u64, Plan)> { - let signing = self.0.get(Self::signing_key(key)).unwrap_or(vec![]); - let mut res = vec![]; - - assert_eq!(signing.len() % 32, 0); - for i in 0 .. (signing.len() / 32) { - let id = &signing[(i * 32) .. ((i + 1) * 32)]; - let buf = self.0.get(Self::plan_key(id)).unwrap(); - - let block_number = u64::from_le_bytes(buf[.. 8].try_into().unwrap()); - let plan = Plan::::read::<&[u8]>(&mut &buf[16 ..]).unwrap(); - assert_eq!(id, &plan.id()); - res.push((block_number, plan)); - } - - res - } - - pub fn finish_signing(&mut self, txn: &mut D::Transaction<'_>, key: &[u8], id: [u8; 32]) { - let mut signing = self.0.get(Self::signing_key(key)).unwrap_or(vec![]); - assert_eq!(signing.len() % 32, 0); - - let mut found = false; - for i in 0 .. (signing.len() / 32) { - let start = i * 32; - let end = i + 32; - if signing[start .. end] == id { - found = true; - signing = [&signing[.. start], &signing[end ..]].concat().to_vec(); - break; - } - } - - if !found { - log::warn!("told to finish signing {} yet wasn't actively signing it", hex::encode(id)); - } - - txn.put(Self::signing_key(key), signing); + pub fn clear_pending_activation(txn: &mut D::Transaction<'_>) { + txn.put(Self::pending_activation_key(), []); } } diff --git a/processor/src/key_gen.rs b/processor/src/key_gen.rs index 15be33db9..fe6905da1 100644 --- a/processor/src/key_gen.rs +++ b/processor/src/key_gen.rs @@ -40,9 +40,8 @@ impl KeyGenDb { fn save_params(txn: &mut D::Transaction<'_>, set: &ValidatorSet, params: &ThresholdParams) { txn.put(Self::params_key(set), bincode::serialize(params).unwrap()); } - fn params(getter: &G, set: &ValidatorSet) -> ThresholdParams { - // Directly unwraps the .get() as this will only be called after being set - bincode::deserialize(&getter.get(Self::params_key(set)).unwrap()).unwrap() + fn params(getter: &G, set: &ValidatorSet) -> Option { + getter.get(Self::params_key(set)).map(|bytes| bincode::deserialize(&bytes).unwrap()) } // Not scoped to the set since that'd have latter attempts overwrite former @@ -92,13 +91,13 @@ impl KeyGenDb { fn read_keys( getter: &G, key: &[u8], - ) -> (Vec, (ThresholdKeys, ThresholdKeys)) { - let keys_vec = getter.get(key).unwrap(); + ) -> Option<(Vec, (ThresholdKeys, ThresholdKeys))> { + let keys_vec = getter.get(key)?; let mut keys_ref: &[u8] = keys_vec.as_ref(); let substrate_keys = ThresholdKeys::new(ThresholdCore::read(&mut keys_ref).unwrap()); let mut network_keys = ThresholdKeys::new(ThresholdCore::read(&mut keys_ref).unwrap()); N::tweak_keys(&mut network_keys); - (keys_vec, (substrate_keys, network_keys)) + Some((keys_vec, (substrate_keys, network_keys))) } fn confirm_keys( txn: &mut D::Transaction<'_>, @@ -106,7 +105,8 @@ impl KeyGenDb { key_pair: KeyPair, ) -> (ThresholdKeys, ThresholdKeys) { let (keys_vec, keys) = - Self::read_keys(txn, &Self::generated_keys_key(set, (&key_pair.0 .0, key_pair.1.as_ref()))); + Self::read_keys(txn, &Self::generated_keys_key(set, (&key_pair.0 .0, key_pair.1.as_ref()))) + .unwrap(); assert_eq!(key_pair.0 .0, keys.0.group_key().to_bytes()); assert_eq!( { @@ -121,10 +121,10 @@ impl KeyGenDb { fn keys( getter: &G, key: &::G, - ) -> (ThresholdKeys, ThresholdKeys) { - let res = Self::read_keys(getter, &Self::keys_key(key)).1; + ) -> Option<(ThresholdKeys, ThresholdKeys)> { + let res = Self::read_keys(getter, &Self::keys_key(key))?.1; assert_eq!(&res.1.group_key(), key); - res + Some(res) } } @@ -147,13 +147,21 @@ impl KeyGen { KeyGen { db, entropy, active_commit: HashMap::new(), active_share: HashMap::new() } } + pub fn in_set(&self, set: &ValidatorSet) -> bool { + // We determine if we're in set using if we have the parameters for a set's key generation + KeyGenDb::::params(&self.db, set).is_some() + } + pub fn keys( &self, key: &::G, - ) -> (ThresholdKeys, ThresholdKeys) { + ) -> Option<(ThresholdKeys, ThresholdKeys)> { // This is safe, despite not having a txn, since it's a static value // The only concern is it may not be set when expected, or it may be set unexpectedly - // Since this unwraps, it being unset when expected to be set will cause a panic + // + // They're only expected to be set on boot, if confirmed. If they were confirmed yet the + // transaction wasn't committed, their confirmation will be re-handled + // // The only other concern is if it's set when it's not safe to use // The keys are only written on confirmation, and the transaction writing them is atomic to // every associated operation @@ -220,7 +228,7 @@ impl KeyGen { panic!("commitments when already handled commitments"); } - let params = KeyGenDb::::params(txn, &id.set); + let params = KeyGenDb::::params(txn, &id.set).unwrap(); // Unwrap the machines, rebuilding them if we didn't have them in our cache // We won't if the processor rebooted @@ -288,7 +296,7 @@ impl KeyGen { CoordinatorMessage::Shares { id, shares } => { info!("Received shares for {:?}", id); - let params = KeyGenDb::::params(txn, &id.set); + let params = KeyGenDb::::params(txn, &id.set).unwrap(); // Same commentary on inconsistency as above exists let machines = self.active_share.remove(&id.set).unwrap_or_else(|| { diff --git a/processor/src/main.rs b/processor/src/main.rs index f364b2aa7..523fa3b17 100644 --- a/processor/src/main.rs +++ b/processor/src/main.rs @@ -1,28 +1,19 @@ -use std::{ - time::Duration, - collections::{VecDeque, HashMap}, -}; +use std::{sync::RwLock, time::Duration, collections::HashMap}; use zeroize::{Zeroize, Zeroizing}; use transcript::{Transcript, RecommendedTranscript}; -use ciphersuite::group::GroupEncoding; -use frost::{curve::Ciphersuite, ThresholdKeys}; +use ciphersuite::{group::GroupEncoding, Ciphersuite}; -use log::{info, warn, error}; +use log::{info, warn}; use tokio::time::sleep; -use scale::{Encode, Decode}; - use serai_client::{ - primitives::{MAX_DATA_LEN, BlockHash, NetworkId}, - tokens::primitives::{OutInstruction, OutInstructionWithBalance}, - in_instructions::primitives::{ - Shorthand, RefundableInInstruction, InInstructionWithBalance, Batch, MAX_BATCH_SIZE, - }, + primitives::{BlockHash, NetworkId}, + validator_sets::primitives::{ValidatorSet, KeyPair}, }; -use messages::{SubstrateContext, CoordinatorMessage, ProcessorMessage}; +use messages::{CoordinatorMessage, ProcessorMessage}; use serai_env as env; @@ -32,7 +23,7 @@ mod plan; pub use plan::*; mod networks; -use networks::{OutputType, Output, PostFeeBranch, Block, Network}; +use networks::{PostFeeBranch, Block, Network, get_latest_block_number, get_block}; #[cfg(feature = "bitcoin")] use networks::Bitcoin; #[cfg(feature = "monero")] @@ -56,76 +47,12 @@ use signer::{SignerEvent, Signer}; mod substrate_signer; use substrate_signer::{SubstrateSignerEvent, SubstrateSigner}; -mod scanner; -use scanner::{ScannerEvent, Scanner, ScannerHandle}; - -mod scheduler; -use scheduler::Scheduler; +mod multisigs; +use multisigs::{MultisigEvent, MultisigManager}; #[cfg(test)] mod tests; -async fn get_latest_block_number(network: &N) -> usize { - loop { - match network.get_latest_block_number().await { - Ok(number) => { - return number; - } - Err(e) => { - error!( - "couldn't get the latest block number in main's error-free get_block. {} {}", - "this should only happen if the node is offline. error: ", e - ); - sleep(Duration::from_secs(10)).await; - } - } - } -} - -async fn get_block(network: &N, block_number: usize) -> N::Block { - loop { - match network.get_block(block_number).await { - Ok(block) => { - return block; - } - Err(e) => { - error!("couldn't get block {block_number} in main's error-free get_block. error: {}", e); - sleep(Duration::from_secs(10)).await; - } - } - } -} - -async fn get_fee(network: &N, block_number: usize) -> N::Fee { - // TODO2: Use an fee representative of several blocks - get_block(network, block_number).await.median_fee() -} - -async fn prepare_send( - network: &N, - keys: ThresholdKeys, - block_number: usize, - fee: N::Fee, - plan: Plan, -) -> (Option<(N::SignableTransaction, N::Eventuality)>, Vec) { - loop { - match network.prepare_send(keys.clone(), block_number, plan.clone(), fee).await { - Ok(prepared) => { - return prepared; - } - Err(e) => { - error!("couldn't prepare a send for plan {}: {e}", hex::encode(plan.id())); - // The processor is either trying to create an invalid TX (fatal) or the node went - // offline - // The former requires a patch, the latter is a connection issue - // If the latter, this is an appropriate sleep. If the former, we should panic, yet - // this won't flood the console ad infinitum - sleep(Duration::from_secs(60)).await; - } - } - } -} - // Items which are mutably borrowed by Tributary. // Any exceptions to this have to be carefully monitored in order to ensure consistency isn't // violated. @@ -164,71 +91,29 @@ struct TributaryMutable { // Items which are mutably borrowed by Substrate. // Any exceptions to this have to be carefully monitored in order to ensure consistency isn't // violated. -struct SubstrateMutable { - // The scanner is expected to autonomously operate, scanning blocks as they appear. - // When a block is sufficiently confirmed, the scanner mutates the signer to try and get a Batch - // signed. - // The scanner itself only mutates its list of finalized blocks and in-memory state though. - // Disk mutations to the scan-state only happen when Substrate says to. - - // This can't be mutated as soon as a Batch is signed since the mutation which occurs then is - // paired with the mutations caused by Burn events. Substrate's ordering determines if such a - // pairing exists. - scanner: ScannerHandle, - - // Schedulers take in new outputs, from the scanner, and payments, from Burn events on Substrate. - // These are paired when possible, in the name of efficiency. Accordingly, both mutations must - // happen by Substrate. - schedulers: HashMap, Scheduler>, -} -async fn sign_plans( - txn: &mut D::Transaction<'_>, - network: &N, - substrate_mutable: &mut SubstrateMutable, - signers: &mut HashMap, Signer>, - context: SubstrateContext, - plans: Vec>, -) { - let mut plans = VecDeque::from(plans); - - let mut block_hash = >::Id::default(); - block_hash.as_mut().copy_from_slice(&context.network_latest_finalized_block.0); - // block_number call is safe since it unwraps - let block_number = substrate_mutable - .scanner - .block_number(&block_hash) - .await - .expect("told to sign_plans on a context we're not synced to"); - - let fee = get_fee(network, block_number).await; - - while let Some(plan) = plans.pop_front() { - let id = plan.id(); - info!("preparing plan {}: {:?}", hex::encode(id), plan); - - let key = plan.key.to_bytes(); - MainDb::::save_signing(txn, key.as_ref(), block_number.try_into().unwrap(), &plan); - let (tx, branches) = - prepare_send(network, signers.get_mut(key.as_ref()).unwrap().keys(), block_number, fee, plan) - .await; +/* + The MultisigManager contains the Scanner and Schedulers. - for branch in branches { - substrate_mutable - .schedulers - .get_mut(key.as_ref()) - .expect("didn't have a scheduler for a key we have a plan for") - .created_output::(txn, branch.expected, branch.actual); - } + The scanner is expected to autonomously operate, scanning blocks as they appear. When a block is + sufficiently confirmed, the scanner causes the Substrate signer to sign a batch. It itself only + mutates its list of finalized blocks, to protect against re-orgs, and its in-memory state though. - if let Some((tx, eventuality)) = tx { - substrate_mutable.scanner.register_eventuality(block_number, id, eventuality.clone()).await; - signers.get_mut(key.as_ref()).unwrap().sign_transaction(txn, id, tx, eventuality).await; - } + Disk mutations to the scan-state only happens once the relevant `Batch` is included on Substrate. + It can't be mutated as soon as the `Batch` is signed as we need to know the order of `Batch`s + relevant to `Burn`s. - // TODO: If the TX is None, should we restore its inputs to the scheduler? - } -} + Schedulers take in new outputs, confirmed in `Batch`s, and outbound payments, triggered by + `Burn`s. + + Substrate also decides when to move to a new multisig, hence why this entire object is + Substate-mutable. + + Since MultisigManager should always be verifiable, and the Tributary is temporal, MultisigManager + being entirely SubstrateMutable shows proper data pipe-lining. +*/ + +type SubstrateMutable = MultisigManager; async fn handle_coordinator_msg( txn: &mut D::Transaction<'_>, @@ -239,15 +124,18 @@ async fn handle_coordinator_msg( msg: &Message, ) { // If this message expects a higher block number than we have, halt until synced - async fn wait(scanner: &ScannerHandle, block_hash: &BlockHash) { + async fn wait( + txn: &D::Transaction<'_>, + substrate_mutable: &SubstrateMutable, + block_hash: &BlockHash, + ) { let mut needed_hash = >::Id::default(); needed_hash.as_mut().copy_from_slice(&block_hash.0); - let block_number = loop { + loop { // Ensure our scanner has scanned this block, which means our daemon has this block at // a sufficient depth - // The block_number may be set even if scanning isn't complete - let Some(block_number) = scanner.block_number(&needed_hash).await else { + if substrate_mutable.block_number(txn, &needed_hash).await.is_none() { warn!( "node is desynced. we haven't scanned {} which should happen after {} confirms", hex::encode(&needed_hash), @@ -256,19 +144,10 @@ async fn handle_coordinator_msg( sleep(Duration::from_secs(10)).await; continue; }; - break block_number; - }; - - // While the scanner has cemented this block, that doesn't mean it's been scanned for all - // keys - // ram_scanned will return the lowest scanned block number out of all keys - // This is a safe call which fulfills the unfulfilled safety requirements from the prior call - while scanner.ram_scanned().await < block_number { - sleep(Duration::from_secs(1)).await; + break; } - // TODO: Sanity check we got an AckBlock (or this is the AckBlock) for the block in - // question + // TODO2: Sanity check we got an AckBlock (or this is the AckBlock) for the block in question /* let synced = |context: &SubstrateContext, key| -> Result<(), ()> { @@ -288,13 +167,38 @@ async fn handle_coordinator_msg( } if let Some(required) = msg.msg.required_block() { - // wait only reads from, it doesn't mutate, the scanner - wait(&substrate_mutable.scanner, &required).await; + // wait only reads from, it doesn't mutate, substrate_mutable + wait(txn, substrate_mutable, &required).await; } - // TODO: Shouldn't we create a txn here and pass it around as needed? - // The txn would ack this message ID. If we detect this message ID as handled in the DB, - // we'd move on here. Only after committing the TX would we report it as acked. + async fn activate_key( + network: &N, + substrate_mutable: &mut SubstrateMutable, + tributary_mutable: &mut TributaryMutable, + txn: &mut D::Transaction<'_>, + set: ValidatorSet, + key_pair: KeyPair, + activation_number: usize, + ) { + info!("activating {set:?}'s keys at {activation_number}"); + + let network_key = ::Curve::read_G::<&[u8]>(&mut key_pair.1.as_ref()) + .expect("Substrate finalized invalid point as a network's key"); + + if tributary_mutable.key_gen.in_set(&set) { + // See TributaryMutable's struct definition for why this block is safe + let KeyConfirmed { substrate_keys, network_keys } = + tributary_mutable.key_gen.confirm(txn, set, key_pair.clone()).await; + if set.session.0 == 0 { + tributary_mutable.substrate_signer = Some(SubstrateSigner::new(N::NETWORK, substrate_keys)); + } + tributary_mutable + .signers + .insert(key_pair.1.into(), Signer::new(network.clone(), network_keys)); + } + + substrate_mutable.add_key(txn, activation_number, network_key).await; + } match msg.msg.clone() { CoordinatorMessage::KeyGen(msg) => { @@ -304,38 +208,47 @@ async fn handle_coordinator_msg( } CoordinatorMessage::Sign(msg) => { - tributary_mutable.signers.get_mut(msg.key()).unwrap().handle(txn, msg).await; + tributary_mutable + .signers + .get_mut(msg.key()) + .expect("coordinator told us to sign with a signer we don't have") + .handle(txn, msg) + .await; } CoordinatorMessage::Coordinator(msg) => { - if let Some(substrate_signer) = tributary_mutable.substrate_signer.as_mut() { - substrate_signer.handle(txn, msg).await; - } + tributary_mutable + .substrate_signer + .as_mut() + .expect( + "coordinator told us to sign a batch when we don't have a Substrate signer at this time", + ) + .handle(txn, msg) + .await; } CoordinatorMessage::Substrate(msg) => { match msg { messages::substrate::CoordinatorMessage::ConfirmKeyPair { context, set, key_pair } => { // This is the first key pair for this network so no block has been finalized yet - let activation_number = if context.network_latest_finalized_block.0 == [0; 32] { + // TODO: Write documentation for this in docs/ + // TODO: Use an Option instead of a magic? + if context.network_latest_finalized_block.0 == [0; 32] { assert!(tributary_mutable.signers.is_empty()); assert!(tributary_mutable.substrate_signer.is_none()); - assert!(substrate_mutable.schedulers.is_empty()); + // We can't check this as existing is no longer pub + // assert!(substrate_mutable.existing.as_ref().is_none()); // Wait until a network's block's time exceeds Serai's time - // TODO: This assumes the network has a monotonic clock for its blocks' times, which - // isn't a viable assumption // If the latest block number is 10, then the block indexed by 1 has 10 confirms // 10 + 1 - 10 = 1 - while get_block( - network, - (get_latest_block_number(network).await + 1).saturating_sub(N::CONFIRMATIONS), - ) - .await - .time() < - context.serai_time - { + let mut block_i; + while { + block_i = + (get_latest_block_number(network).await + 1).saturating_sub(N::CONFIRMATIONS); + get_block(network, block_i).await.time() < context.serai_time + } { info!( "serai confirmed the first key pair for a set. {} {}", "we're waiting for a network's finalized block's time to exceed unix time ", @@ -345,9 +258,7 @@ async fn handle_coordinator_msg( } // Find the first block to do so - let mut earliest = - (get_latest_block_number(network).await + 1).saturating_sub(N::CONFIRMATIONS); - assert!(get_block(network, earliest).await.time() >= context.serai_time); + let mut earliest = block_i; // earliest > 0 prevents a panic if Serai creates keys before the genesis block // which... should be impossible // Yet a prevented panic is a prevented panic @@ -358,107 +269,103 @@ async fn handle_coordinator_msg( } // Use this as the activation block - earliest + let activation_number = earliest; + + activate_key( + network, + substrate_mutable, + tributary_mutable, + txn, + set, + key_pair, + activation_number, + ) + .await; } else { - let mut activation_block = >::Id::default(); - activation_block.as_mut().copy_from_slice(&context.network_latest_finalized_block.0); - // This block_number call is safe since it unwraps - substrate_mutable - .scanner - .block_number(&activation_block) - .await - .expect("KeyConfirmed from context we haven't synced") - }; - - info!("activating {set:?}'s keys at {activation_number}"); - - // See TributaryMutable's struct definition for why this block is safe - let KeyConfirmed { substrate_keys, network_keys } = - tributary_mutable.key_gen.confirm(txn, set, key_pair).await; - // TODO2: Don't immediately set this, set it once it's active - tributary_mutable.substrate_signer = - Some(SubstrateSigner::new(N::NETWORK, substrate_keys)); - - let key = network_keys.group_key(); - - substrate_mutable.scanner.rotate_key(txn, activation_number, key).await; - substrate_mutable - .schedulers - .insert(key.to_bytes().as_ref().to_vec(), Scheduler::::new::(txn, key)); - - tributary_mutable - .signers - .insert(key.to_bytes().as_ref().to_vec(), Signer::new(network.clone(), network_keys)); + let mut block_before_queue_block = >::Id::default(); + block_before_queue_block + .as_mut() + .copy_from_slice(&context.network_latest_finalized_block.0); + // We can't set these keys for activation until we know their queue block, which we + // won't until the next Batch is confirmed + // Set this variable so when we get the next Batch event, we can handle it + MainDb::::set_pending_activation(txn, block_before_queue_block, set, key_pair); + } } messages::substrate::CoordinatorMessage::SubstrateBlock { context, network: network_id, - block, - key: key_vec, + block: substrate_block, burns, batches, } => { assert_eq!(network_id, N::NETWORK, "coordinator sent us data for another network"); - let mut block_id = >::Id::default(); - block_id.as_mut().copy_from_slice(&context.network_latest_finalized_block.0); + if let Some((block, set, key_pair)) = MainDb::::pending_activation(txn) { + // Only run if this is a Batch belonging to a distinct block + if context.network_latest_finalized_block.as_ref() != block.as_ref() { + let mut queue_block = >::Id::default(); + queue_block.as_mut().copy_from_slice(context.network_latest_finalized_block.as_ref()); + + let activation_number = substrate_mutable + .block_number(txn, &queue_block) + .await + .expect("KeyConfirmed from context we haven't synced") + + N::CONFIRMATIONS; + + activate_key( + network, + substrate_mutable, + tributary_mutable, + txn, + set, + key_pair, + activation_number, + ) + .await; - let key = ::read_G::<&[u8]>(&mut key_vec.as_ref()).unwrap(); + MainDb::::clear_pending_activation(txn); + } + } - // We now have to acknowledge every block for this key up to the acknowledged block - let outputs = substrate_mutable.scanner.ack_up_to_block(txn, key, block_id).await; - // Since this block was acknowledged, we no longer have to sign the batch for it + // Since this block was acknowledged, we no longer have to sign the batches for it if let Some(substrate_signer) = tributary_mutable.substrate_signer.as_mut() { for batch_id in batches { substrate_signer.batch_signed(txn, batch_id); } } - let mut payments = vec![]; - for out in burns { - let OutInstructionWithBalance { - instruction: OutInstruction { address, data }, - balance, - } = out; - assert_eq!(balance.coin.network(), N::NETWORK); - - if let Ok(address) = N::Address::try_from(address.consume()) { - // TODO: Add coin to payment - payments.push(Payment { - address, - data: data.map(|data| data.consume()), - amount: balance.amount.0, - }); - } + let (acquired_lock, to_sign) = + substrate_mutable.substrate_block(txn, network, context, burns).await; + + // Send SubstrateBlockAck, with relevant plan IDs, before we trigger the signing of these + // plans + if !tributary_mutable.signers.is_empty() { + coordinator + .send(messages::ProcessorMessage::Coordinator( + messages::coordinator::ProcessorMessage::SubstrateBlockAck { + network: N::NETWORK, + block: substrate_block, + plans: to_sign.iter().map(|signable| signable.1).collect(), + }, + )) + .await; } - let plans = substrate_mutable - .schedulers - .get_mut(&key_vec) - .expect("key we don't have a scheduler for acknowledged a block") - .schedule::(txn, outputs, payments); - - coordinator - .send(ProcessorMessage::Coordinator( - messages::coordinator::ProcessorMessage::SubstrateBlockAck { - network: N::NETWORK, - block, - plans: plans.iter().map(|plan| plan.id()).collect(), - }, - )) - .await; + // See commentary in TributaryMutable for why this is safe + let signers = &mut tributary_mutable.signers; + for (key, id, tx, eventuality) in to_sign { + if let Some(signer) = signers.get_mut(key.to_bytes().as_ref()) { + signer.sign_transaction(txn, id, tx, eventuality).await; + } + } - sign_plans( - txn, - network, - substrate_mutable, - // See commentary in TributaryMutable for why this is safe - &mut tributary_mutable.signers, - context, - plans, - ) - .await; + // This is not premature, even if this block had multiple `Batch`s created, as the first + // `Batch` alone will trigger all Plans/Eventualities/Signs + if acquired_lock { + substrate_mutable.release_scanner_lock().await; + } } } } @@ -502,63 +409,58 @@ async fn boot( // We don't need to re-issue GenerateKey orders because the coordinator is expected to // schedule/notify us of new attempts + // TODO: Is this above comment still true? Not at all due to the planned lack of DKG timeouts? let key_gen = KeyGen::::new(raw_db.clone(), entropy(b"key-gen_entropy")); - // The scanner has no long-standing orders to re-issue - let (mut scanner, active_keys) = Scanner::new(network.clone(), raw_db.clone()); - let mut schedulers = HashMap::, Scheduler>::new(); + let (multisig_manager, current_keys, actively_signing) = + MultisigManager::new(raw_db, network).await; + let mut substrate_signer = None; let mut signers = HashMap::new(); - let main_db = MainDb::new(raw_db.clone()); - - for key in &active_keys { - schedulers.insert(key.to_bytes().as_ref().to_vec(), Scheduler::from_db(raw_db, *key).unwrap()); + let main_db = MainDb::::new(raw_db.clone()); - let (substrate_keys, network_keys) = key_gen.keys(key); + for (i, key) in current_keys.iter().enumerate() { + let Some((substrate_keys, network_keys)) = key_gen.keys(key) else { continue }; + let network_key = network_keys.group_key(); + // If this is the oldest key, load the SubstrateSigner for it as the active SubstrateSigner + // The new key only takes responsibility once the old key is fully deprecated + // // We don't have to load any state for this since the Scanner will re-fire any events - // necessary - // TODO2: This uses most recent as signer, use the active one - substrate_signer = Some(SubstrateSigner::new(N::NETWORK, substrate_keys)); + // necessary, only no longer scanning old blocks once Substrate acks them + if i == 0 { + substrate_signer = Some(SubstrateSigner::new(N::NETWORK, substrate_keys)); + } + // The Scanner re-fires events as needed for substrate_signer yet not signer + // This is due to the transactions which we start signing from due to a block not being + // guaranteed to be signed before we stop scanning the block on reboot + // We could simplify the Signer flow by delaying when it acks a block, yet that'd: + // 1) Increase the startup time + // 2) Cause re-emission of Batch events, which we'd need to check the safety of + // (TODO: Do anyways?) + // 3) Violate the attempt counter (TODO: Is this already being violated?) let mut signer = Signer::new(network.clone(), network_keys); - // Load any TXs being actively signed + // Sign any TXs being actively signed let key = key.to_bytes(); - for (block_number, plan) in main_db.signing(key.as_ref()) { - let block_number = block_number.try_into().unwrap(); - - let fee = get_fee(network, block_number).await; - - let id = plan.id(); - info!("reloading plan {}: {:?}", hex::encode(id), plan); - - let (Some((tx, eventuality)), _) = - prepare_send(network, signer.keys(), block_number, fee, plan).await - else { - panic!("previously created transaction is no longer being created") - }; - - scanner.register_eventuality(block_number, id, eventuality.clone()).await; - // TODO: Reconsider if the Signer should have the eventuality, or if just the network/scanner - // should - let mut txn = raw_db.txn(); - signer.sign_transaction(&mut txn, id, tx, eventuality).await; - // This should only have re-writes of existing data - drop(txn); + for (plan, tx, eventuality) in &actively_signing { + if plan.key == network_key { + let mut txn = raw_db.txn(); + signer.sign_transaction(&mut txn, plan.id(), tx.clone(), eventuality.clone()).await; + // This should only have re-writes of existing data + drop(txn); + } } signers.insert(key.as_ref().to_vec(), signer); } - ( - main_db, - TributaryMutable { key_gen, substrate_signer, signers }, - SubstrateMutable { scanner, schedulers }, - ) + (main_db, TributaryMutable { key_gen, substrate_signer, signers }, multisig_manager) } +#[allow(clippy::await_holding_lock)] // Needed for txn, unfortunately can't be down-scoped async fn run(mut raw_db: D, network: N, mut coordinator: Co) { // We currently expect a contextless bidirectional mapping between these two values // (which is that any value of A can be interpreted as B and vice versa) @@ -566,59 +468,17 @@ async fn run(mut raw_db: D, network: N, mut // This check ensures no network which doesn't have a bidirectional mapping is defined assert_eq!(>::Id::default().as_ref().len(), BlockHash([0u8; 32]).0.len()); - let (mut main_db, mut tributary_mutable, mut substrate_mutable) = - boot(&mut raw_db, &network).await; + let (main_db, mut tributary_mutable, mut substrate_mutable) = boot(&mut raw_db, &network).await; // We can't load this from the DB as we can't guarantee atomic increments with the ack function let mut last_coordinator_msg = None; loop { - // Check if the signers have events - // The signers will only have events after the following select executes, which will then - // trigger the loop again, hence why having the code here with no timer is fine - for (key, signer) in tributary_mutable.signers.iter_mut() { - while let Some(msg) = signer.events.pop_front() { - match msg { - SignerEvent::ProcessorMessage(msg) => { - coordinator.send(ProcessorMessage::Sign(msg)).await; - } + // The following select uses this txn in both branches, hence why needing a RwLock to pass it + // around is needed + let txn = RwLock::new(raw_db.txn()); - SignerEvent::SignedTransaction { id, tx } => { - coordinator - .send(ProcessorMessage::Sign(messages::sign::ProcessorMessage::Completed { - key: key.clone(), - id, - tx: tx.as_ref().to_vec(), - })) - .await; - - let mut txn = raw_db.txn(); - // This does mutate the Scanner, yet the eventuality protocol is only run to mutate - // the signer, which is Tributary mutable (and what's currently being mutated) - substrate_mutable.scanner.drop_eventuality(id).await; - main_db.finish_signing(&mut txn, key, id); - txn.commit(); - } - } - } - } - - if let Some(signer) = tributary_mutable.substrate_signer.as_mut() { - while let Some(msg) = signer.events.pop_front() { - match msg { - SubstrateSignerEvent::ProcessorMessage(msg) => { - coordinator.send(ProcessorMessage::Coordinator(msg)).await; - } - SubstrateSignerEvent::SignedBatch(batch) => { - coordinator - .send(ProcessorMessage::Substrate(messages::substrate::ProcessorMessage::Update { - batch, - })) - .await; - } - } - } - } + let mut outer_msg = None; tokio::select! { // This blocks the entire processor until it finishes handling this message @@ -627,13 +487,15 @@ async fn run(mut raw_db: D, network: N, mut // the other messages in the queue, it may be beneficial to parallelize these // They could likely be parallelized by type (KeyGen, Sign, Substrate) without issue msg = coordinator.recv() => { + let mut txn = txn.write().unwrap(); + let txn = &mut txn; + assert_eq!(msg.id, (last_coordinator_msg.unwrap_or(msg.id - 1) + 1)); last_coordinator_msg = Some(msg.id); // Only handle this if we haven't already if !main_db.handled_message(msg.id) { - let mut txn = raw_db.txn(); - MainDb::::handle_message(&mut txn, msg.id); + MainDb::::handle_message(txn, msg.id); // This is isolated to better think about how its ordered, or rather, about how the other // cases aren't ordered @@ -646,111 +508,96 @@ async fn run(mut raw_db: D, network: N, mut // This is safe so long as Tributary and Substrate messages don't both expect mutable // references over the same data handle_coordinator_msg( - &mut txn, + &mut **txn, &network, &mut coordinator, &mut tributary_mutable, &mut substrate_mutable, &msg, ).await; - - txn.commit(); } - coordinator.ack(msg).await; + outer_msg = Some(msg); }, - msg = substrate_mutable.scanner.events.recv() => { - let mut txn = raw_db.txn(); - - match msg.unwrap() { - ScannerEvent::Block { block, outputs } => { - let mut block_hash = [0; 32]; - block_hash.copy_from_slice(block.as_ref()); - // TODO: Move this out from Scanner now that the Scanner no longer handles batches - let mut batch_id = substrate_mutable.scanner.next_batch_id(&txn); - - // start with empty batch - let mut batches = vec![Batch { - network: N::NETWORK, - id: batch_id, - block: BlockHash(block_hash), - instructions: vec![], - }]; - for output in outputs { - // If these aren't externally received funds, don't handle it as an instruction - if output.kind() != OutputType::External { - continue; - } - - let mut data = output.data(); - let max_data_len = usize::try_from(MAX_DATA_LEN).unwrap(); - // TODO: Refund if we hit one of the following continues - if data.len() > max_data_len { - error!( - "data in output {} exceeded MAX_DATA_LEN ({MAX_DATA_LEN}): {}. skipping", - hex::encode(output.id()), - data.len(), - ); - continue; - } - - let Ok(shorthand) = Shorthand::decode(&mut data) else { continue }; - let Ok(instruction) = RefundableInInstruction::try_from(shorthand) else { continue }; - - // TODO2: Set instruction.origin if not set (and handle refunds in general) - let instruction = InInstructionWithBalance { - instruction: instruction.instruction, - balance: output.balance(), - }; - - let batch = batches.last_mut().unwrap(); - batch.instructions.push(instruction); - - // check if batch is over-size - if batch.encode().len() > MAX_BATCH_SIZE { - // pop the last instruction so it's back in size - let instruction = batch.instructions.pop().unwrap(); - - // bump the id for the new batch - batch_id += 1; - - // make a new batch with this instruction included - batches.push(Batch { - network: N::NETWORK, - id: batch_id, - block: BlockHash(block_hash), - instructions: vec![instruction], - }); - } - } - - // Save the next batch ID - substrate_mutable.scanner.set_next_batch_id(&mut txn, batch_id + 1); - + msg = substrate_mutable.next_event(&txn) => { + let mut txn = txn.write().unwrap(); + let txn = &mut txn; + match msg { + MultisigEvent::Batches(retired_key_new_key, batches) => { // Start signing this batch for batch in batches { info!("created batch {} ({} instructions)", batch.id, batch.instructions.len()); if let Some(substrate_signer) = tributary_mutable.substrate_signer.as_mut() { - substrate_signer - .sign(&mut txn, batch) - .await; + substrate_signer.sign(txn, batch).await; } } - }, - ScannerEvent::Completed(id, tx) => { - // We don't know which signer had this plan, so inform all of them - for (_, signer) in tributary_mutable.signers.iter_mut() { - signer.eventuality_completion(&mut txn, id, &tx).await; + if let Some((retired_key, new_key)) = retired_key_new_key { + // Safe to mutate since all signing operations are done and no more will be added + tributary_mutable.signers.remove(retired_key.to_bytes().as_ref()); + tributary_mutable.substrate_signer.take(); + if let Some((substrate_keys, _)) = tributary_mutable.key_gen.keys(&new_key) { + tributary_mutable.substrate_signer = + Some(SubstrateSigner::new(N::NETWORK, substrate_keys)); + } } }, + MultisigEvent::Completed(key, id, tx) => { + if let Some(signer) = tributary_mutable.signers.get_mut(&key) { + signer.completed(txn, id, tx); + } + } } - - txn.commit(); }, } + + // Check if the signers have events + // The signers will only have events after the above select executes, so having no timeout on + // the above is fine + // TODO: Have the Signers return these events, allowing removing these channels? + for (key, signer) in tributary_mutable.signers.iter_mut() { + while let Some(msg) = signer.events.pop_front() { + match msg { + SignerEvent::ProcessorMessage(msg) => { + coordinator.send(ProcessorMessage::Sign(msg)).await; + } + + SignerEvent::SignedTransaction { id, tx } => { + coordinator + .send(ProcessorMessage::Sign(messages::sign::ProcessorMessage::Completed { + key: key.clone(), + id, + tx: tx.as_ref().to_vec(), + })) + .await; + } + } + } + } + + if let Some(signer) = tributary_mutable.substrate_signer.as_mut() { + while let Some(msg) = signer.events.pop_front() { + match msg { + SubstrateSignerEvent::ProcessorMessage(msg) => { + coordinator.send(ProcessorMessage::Coordinator(msg)).await; + } + SubstrateSignerEvent::SignedBatch(batch) => { + coordinator + .send(ProcessorMessage::Substrate(messages::substrate::ProcessorMessage::Update { + batch, + })) + .await; + } + } + } + } + + txn.into_inner().unwrap().commit(); + if let Some(msg) = outer_msg { + coordinator.ack(msg).await; + } } } diff --git a/processor/src/multisigs/db.rs b/processor/src/multisigs/db.rs new file mode 100644 index 000000000..353bc4f81 --- /dev/null +++ b/processor/src/multisigs/db.rs @@ -0,0 +1,189 @@ +use core::marker::PhantomData; + +use ciphersuite::Ciphersuite; + +pub use serai_db::*; + +use scale::{Encode, Decode}; +use serai_client::in_instructions::primitives::InInstructionWithBalance; + +use crate::{ + Get, Db, Plan, + networks::{Transaction, Network}, +}; + +#[derive(Debug)] +pub struct MultisigsDb(PhantomData, PhantomData); +impl MultisigsDb { + fn multisigs_key(dst: &'static [u8], key: impl AsRef<[u8]>) -> Vec { + D::key(b"MULTISIGS", dst, key) + } + + fn next_batch_key() -> Vec { + Self::multisigs_key(b"next_batch", []) + } + // Set the next batch ID to use + pub fn set_next_batch_id(txn: &mut D::Transaction<'_>, batch: u32) { + txn.put(Self::next_batch_key(), batch.to_le_bytes()); + } + // Get the next batch ID + pub fn next_batch_id(getter: &G) -> u32 { + getter.get(Self::next_batch_key()).map_or(0, |v| u32::from_le_bytes(v.try_into().unwrap())) + } + + fn plan_key(id: &[u8]) -> Vec { + Self::multisigs_key(b"plan", id) + } + fn resolved_key(tx: &[u8]) -> Vec { + Self::multisigs_key(b"resolved", tx) + } + fn signing_key(key: &[u8]) -> Vec { + Self::multisigs_key(b"signing", key) + } + pub fn save_active_plan( + txn: &mut D::Transaction<'_>, + key: &[u8], + block_number: u64, + plan: &Plan, + ) { + let id = plan.id(); + + { + let mut signing = txn.get(Self::signing_key(key)).unwrap_or(vec![]); + + // If we've already noted we're signing this, return + assert_eq!(signing.len() % 32, 0); + for i in 0 .. (signing.len() / 32) { + if signing[(i * 32) .. ((i + 1) * 32)] == id { + return; + } + } + + signing.extend(&id); + txn.put(Self::signing_key(key), id); + } + + { + let mut buf = block_number.to_le_bytes().to_vec(); + plan.write(&mut buf).unwrap(); + txn.put(Self::plan_key(&id), &buf); + } + } + + pub fn active_plans(getter: &G, key: &[u8]) -> Vec<(u64, Plan)> { + let signing = getter.get(Self::signing_key(key)).unwrap_or(vec![]); + let mut res = vec![]; + + assert_eq!(signing.len() % 32, 0); + for i in 0 .. (signing.len() / 32) { + let id = &signing[(i * 32) .. ((i + 1) * 32)]; + let buf = getter.get(Self::plan_key(id)).unwrap(); + + let block_number = u64::from_le_bytes(buf[.. 8].try_into().unwrap()); + let plan = Plan::::read::<&[u8]>(&mut &buf[8 ..]).unwrap(); + assert_eq!(id, &plan.id()); + res.push((block_number, plan)); + } + + res + } + + pub fn resolved_plan( + getter: &G, + tx: >::Id, + ) -> Option<[u8; 32]> { + getter.get(tx.as_ref()).map(|id| id.try_into().unwrap()) + } + pub fn plan_by_key_with_self_change( + getter: &G, + key: ::G, + id: [u8; 32], + ) -> bool { + let plan = + Plan::::read::<&[u8]>(&mut &getter.get(Self::plan_key(&id)).unwrap()[8 ..]).unwrap(); + assert_eq!(plan.id(), id); + (key == plan.key) && (Some(N::change_address(plan.key)) == plan.change) + } + pub fn resolve_plan( + txn: &mut D::Transaction<'_>, + key: &[u8], + plan: [u8; 32], + resolution: >::Id, + ) { + let mut signing = txn.get(Self::signing_key(key)).unwrap_or(vec![]); + assert_eq!(signing.len() % 32, 0); + + let mut found = false; + for i in 0 .. (signing.len() / 32) { + let start = i * 32; + let end = i + 32; + if signing[start .. end] == plan { + found = true; + signing = [&signing[.. start], &signing[end ..]].concat().to_vec(); + break; + } + } + + if !found { + log::warn!("told to finish signing {} yet wasn't actively signing it", hex::encode(plan)); + } + + txn.put(Self::signing_key(key), signing); + + txn.put(Self::resolved_key(resolution.as_ref()), plan); + } + + fn forwarded_output_key(amount: u64) -> Vec { + Self::multisigs_key(b"forwarded_output", amount.to_le_bytes()) + } + pub fn save_forwarded_output( + txn: &mut D::Transaction<'_>, + instruction: InInstructionWithBalance, + ) { + let key = Self::forwarded_output_key(instruction.balance.amount.0); + let mut existing = txn.get(&key).unwrap_or(vec![]); + existing.extend(instruction.encode()); + txn.put(key, existing); + } + pub fn take_forwarded_output( + txn: &mut D::Transaction<'_>, + amount: u64, + ) -> Option { + let key = Self::forwarded_output_key(amount); + + let outputs = txn.get(&key)?; + let mut outputs_ref = outputs.as_slice(); + + let res = InInstructionWithBalance::decode(&mut outputs_ref).unwrap(); + assert!(outputs_ref.len() < outputs.len()); + if outputs_ref.is_empty() { + txn.del(&key); + } else { + txn.put(&key, outputs_ref); + } + Some(res) + } + + fn delayed_output_keys() -> Vec { + Self::multisigs_key(b"delayed_outputs", []) + } + pub fn save_delayed_output(txn: &mut D::Transaction<'_>, instruction: InInstructionWithBalance) { + let key = Self::delayed_output_keys(); + let mut existing = txn.get(&key).unwrap_or(vec![]); + existing.extend(instruction.encode()); + txn.put(key, existing); + } + pub fn take_delayed_outputs(txn: &mut D::Transaction<'_>) -> Vec { + let key = Self::delayed_output_keys(); + + let Some(outputs) = txn.get(&key) else { return vec![] }; + txn.del(key); + + let mut outputs_ref = outputs.as_slice(); + let mut res = vec![]; + while !outputs_ref.is_empty() { + res.push(InInstructionWithBalance::decode(&mut outputs_ref).unwrap()); + } + res + } +} diff --git a/processor/src/multisigs/mod.rs b/processor/src/multisigs/mod.rs new file mode 100644 index 000000000..968e094c9 --- /dev/null +++ b/processor/src/multisigs/mod.rs @@ -0,0 +1,927 @@ +use core::time::Duration; +use std::{sync::RwLock, collections::HashMap}; + +use ciphersuite::{group::GroupEncoding, Ciphersuite}; + +use scale::{Encode, Decode}; +use messages::SubstrateContext; + +use serai_client::{ + primitives::{BlockHash, MAX_DATA_LEN}, + in_instructions::primitives::{ + InInstructionWithBalance, Batch, RefundableInInstruction, Shorthand, MAX_BATCH_SIZE, + }, + tokens::primitives::{OutInstruction, OutInstructionWithBalance}, +}; + +use log::{info, error}; + +use tokio::time::sleep; + +#[cfg(not(test))] +mod scanner; +#[cfg(test)] +pub mod scanner; + +use scanner::{ScannerEvent, ScannerHandle, Scanner}; + +mod db; +use db::MultisigsDb; + +#[cfg(not(test))] +mod scheduler; +#[cfg(test)] +pub mod scheduler; +use scheduler::Scheduler; + +use crate::{ + Get, Db, Payment, PostFeeBranch, Plan, + networks::{OutputType, Output, Transaction, SignableTransaction, Block, Network, get_block}, +}; + +// InInstructionWithBalance from an external output +fn instruction_from_output(output: &N::Output) -> Option { + assert_eq!(output.kind(), OutputType::External); + + let mut data = output.data(); + let max_data_len = usize::try_from(MAX_DATA_LEN).unwrap(); + if data.len() > max_data_len { + error!( + "data in output {} exceeded MAX_DATA_LEN ({MAX_DATA_LEN}): {}. skipping", + hex::encode(output.id()), + data.len(), + ); + None?; + } + + let Ok(shorthand) = Shorthand::decode(&mut data) else { None? }; + let Ok(instruction) = RefundableInInstruction::try_from(shorthand) else { None? }; + + // TODO2: Set instruction.origin if not set (and handle refunds in general) + Some(InInstructionWithBalance { instruction: instruction.instruction, balance: output.balance() }) +} + +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +enum RotationStep { + // Use the existing multisig for all actions (steps 1-3) + UseExisting, + // Use the new multisig as change (step 4) + NewAsChange, + // The existing multisig is expected to solely forward transactions at this point (step 5) + ForwardFromExisting, + // The existing multisig is expected to finish its own transactions and do nothing more + // (step 6) + ClosingExisting, +} + +async fn get_fee(network: &N, block_number: usize) -> N::Fee { + // TODO2: Use an fee representative of several blocks + get_block(network, block_number).await.median_fee() +} + +async fn prepare_send( + network: &N, + block_number: usize, + fee: N::Fee, + plan: Plan, +) -> (Option<(N::SignableTransaction, N::Eventuality)>, Vec) { + loop { + match network.prepare_send(block_number, plan.clone(), fee).await { + Ok(prepared) => { + return prepared; + } + Err(e) => { + error!("couldn't prepare a send for plan {}: {e}", hex::encode(plan.id())); + // The processor is either trying to create an invalid TX (fatal) or the node went + // offline + // The former requires a patch, the latter is a connection issue + // If the latter, this is an appropriate sleep. If the former, we should panic, yet + // this won't flood the console ad infinitum + sleep(Duration::from_secs(60)).await; + } + } + } +} + +pub struct MultisigViewer { + activation_block: usize, + key: ::G, + scheduler: Scheduler, +} + +#[allow(clippy::type_complexity)] +#[derive(Clone, Debug)] +pub enum MultisigEvent { + // Batches to publish + Batches(Option<(::G, ::G)>, Vec), + // Eventuality completion found on-chain + Completed(Vec, [u8; 32], N::Transaction), +} + +pub struct MultisigManager { + scanner: ScannerHandle, + existing: Option>, + new: Option>, +} + +impl MultisigManager { + pub async fn new( + raw_db: &D, + network: &N, + ) -> ( + Self, + Vec<::G>, + Vec<(Plan, N::SignableTransaction, N::Eventuality)>, + ) { + // The scanner has no long-standing orders to re-issue + let (mut scanner, current_keys) = Scanner::new(network.clone(), raw_db.clone()); + + let mut schedulers = vec![]; + + assert!(current_keys.len() <= 2); + let mut actively_signing = vec![]; + for (_, key) in ¤t_keys { + schedulers.push(Scheduler::from_db(raw_db, *key).unwrap()); + + // Load any TXs being actively signed + let key = key.to_bytes(); + for (block_number, plan) in MultisigsDb::::active_plans(raw_db, key.as_ref()) { + let block_number = block_number.try_into().unwrap(); + + let fee = get_fee(network, block_number).await; + + let id = plan.id(); + info!("reloading plan {}: {:?}", hex::encode(id), plan); + + let key_bytes = plan.key.to_bytes(); + + let (Some((tx, eventuality)), _) = + prepare_send(network, block_number, fee, plan.clone()).await + else { + panic!("previously created transaction is no longer being created") + }; + + scanner + .register_eventuality(key_bytes.as_ref(), block_number, id, eventuality.clone()) + .await; + actively_signing.push((plan, tx, eventuality)); + } + } + + ( + MultisigManager { + scanner, + existing: current_keys.get(0).cloned().map(|(activation_block, key)| MultisigViewer { + activation_block, + key, + scheduler: schedulers.remove(0), + }), + new: current_keys.get(1).cloned().map(|(activation_block, key)| MultisigViewer { + activation_block, + key, + scheduler: schedulers.remove(0), + }), + }, + current_keys.into_iter().map(|(_, key)| key).collect(), + actively_signing, + ) + } + + /// Returns the block number for a block hash, if it's known and all keys have scanned the block. + // This is guaranteed to atomically increment so long as no new keys are added to the scanner + // which activate at a block before the currently highest scanned block. This is prevented by + // the processor waiting for `Batch` inclusion before scanning too far ahead, and activation only + // happening after the "too far ahead" window. + pub async fn block_number( + &self, + getter: &G, + hash: &>::Id, + ) -> Option { + let latest = ScannerHandle::::block_number(getter, hash)?; + + // While the scanner has cemented this block, that doesn't mean it's been scanned for all + // keys + // ram_scanned will return the lowest scanned block number out of all keys + if latest > self.scanner.ram_scanned().await { + return None; + } + Some(latest) + } + + pub async fn add_key( + &mut self, + txn: &mut D::Transaction<'_>, + activation_block: usize, + external_key: ::G, + ) { + self.scanner.register_key(txn, activation_block, external_key).await; + let viewer = Some(MultisigViewer { + activation_block, + key: external_key, + scheduler: Scheduler::::new::(txn, external_key), + }); + + if self.existing.is_none() { + self.existing = viewer; + return; + } + self.new = viewer; + } + + fn current_rotation_step(&self, block_number: usize) -> RotationStep { + fn ceil_div(num: usize, denom: usize) -> usize { + let res = num / denom; + if (res * denom) == num { + return res; + } + res + 1 + } + + let Some(new) = self.new.as_ref() else { return RotationStep::UseExisting }; + + // Period numbering here has no meaning other than these the time values useful here, and the + // order they're built in. They have no reference/shared marker with anything else + + // ESTIMATED_BLOCK_TIME_IN_SECONDS is fine to use here. While inaccurate, it shouldn't be + // drastically off, and even if it is, it's a hiccup to latency handling only possible when + // rotating. The error rate wouldn't be acceptable if it was allowed to accumulate over time, + // yet rotation occurs on Serai's clock, disconnecting any errors here from any prior. + + // N::CONFIRMATIONS + 10 minutes + let period_1_start = new.activation_block + + N::CONFIRMATIONS + + ceil_div(10 * 60, N::ESTIMATED_BLOCK_TIME_IN_SECONDS); + + // N::CONFIRMATIONS + let period_2_start = period_1_start + N::CONFIRMATIONS; + + // 6 hours after period 2 + // Also ensure 6 hours is greater than the amount of CONFIRMATIONS, for sanity purposes + let period_3_start = + period_2_start + ((6 * 60 * 60) / N::ESTIMATED_BLOCK_TIME_IN_SECONDS).max(N::CONFIRMATIONS); + + if block_number < period_1_start { + RotationStep::UseExisting + } else if block_number < period_2_start { + RotationStep::NewAsChange + } else if block_number < period_3_start { + RotationStep::ForwardFromExisting + } else { + RotationStep::ClosingExisting + } + } + + // Convert new Burns to Payments. + // + // Also moves payments from the old Scheduler to the new multisig if the step calls for it. + fn burns_to_payments( + &mut self, + txn: &mut D::Transaction<'_>, + step: RotationStep, + burns: Vec, + ) -> (Vec>, Vec>) { + let mut payments = vec![]; + for out in burns { + let OutInstructionWithBalance { instruction: OutInstruction { address, data }, balance } = + out; + assert_eq!(balance.coin.network(), N::NETWORK); + + if let Ok(address) = N::Address::try_from(address.consume()) { + // TODO: Add coin to payment + payments.push(Payment { + address, + data: data.map(|data| data.consume()), + amount: balance.amount.0, + }); + } + } + + let payments = payments; + match step { + RotationStep::UseExisting | RotationStep::NewAsChange => (payments, vec![]), + RotationStep::ForwardFromExisting | RotationStep::ClosingExisting => { + // Consume any payments the prior scheduler was unable to complete + // This should only actually matter once + let mut new_payments = self.existing.as_mut().unwrap().scheduler.consume_payments::(txn); + // Add the new payments + new_payments.extend(payments); + (vec![], new_payments) + } + } + } + + fn split_outputs_by_key(&self, outputs: Vec) -> (Vec, Vec) { + let mut existing_outputs = Vec::with_capacity(outputs.len()); + let mut new_outputs = vec![]; + + let existing_key = self.existing.as_ref().unwrap().key; + let new_key = self.new.as_ref().map(|new| new.key); + for output in outputs { + if output.key() == existing_key { + existing_outputs.push(output); + } else { + assert_eq!(Some(output.key()), new_key); + new_outputs.push(output); + } + } + + (existing_outputs, new_outputs) + } + + // Manually creates Plans for all External outputs needing forwarding/refunding. + // + // Returns created Plans and a map of forwarded output IDs to their associated InInstructions. + fn filter_outputs_due_to_forwarding( + &self, + existing_outputs: &mut Vec, + ) -> (Vec>, HashMap, InInstructionWithBalance>) { + // Manually create a Plan for all External outputs needing forwarding/refunding + + /* + Sending a Plan, with arbitrary data proxying the InInstruction, would require adding + a flow for networks which drop their data to still embed arbitrary data. It'd also have + edge cases causing failures. + + Instead, we save the InInstruction as we scan this output. Then, when the output is + successfully forwarded, we simply read it from the local database. This also saves the + costs of embedding arbitrary data. + + Since we can't rely on the Eventuality system to detect if it's a forwarded transaction, + due to the asynchonicity of the Eventuality system, we instead interpret an External + output with no InInstruction, which has an amount associated with an InInstruction + being forwarded, as having been forwarded. This does create a specific edge case where + a user who doesn't include an InInstruction may not be refunded however, if they share + an exact amount with an expected-to-be-forwarded transaction. This is deemed acceptable. + + TODO: Add a fourth address, forwarded_address, to prevent this. + */ + + let mut plans = vec![]; + let mut forwarding = HashMap::new(); + existing_outputs.retain(|output| { + if output.kind() == OutputType::External { + if let Some(instruction) = instruction_from_output::(output) { + // Build a dedicated Plan forwarding this + plans.push(Plan { + key: self.existing.as_ref().unwrap().key, + inputs: vec![output.clone()], + payments: vec![], + change: Some(N::address(self.new.as_ref().unwrap().key)), + }); + + // Set the instruction for this output to be returned + forwarding.insert(output.id().as_ref().to_vec(), instruction); + } + + // TODO: Refund here + false + } else { + true + } + }); + (plans, forwarding) + } + + // Filter newly received outputs due to the step being RotationStep::ClosingExisting. + fn filter_outputs_due_to_closing( + &mut self, + txn: &mut D::Transaction<'_>, + existing_outputs: &mut Vec, + ) -> Vec> { + /* + The document says to only handle outputs we created. We don't know what outputs we + created. We do have an ordered view of equivalent outputs however, and can assume the + first (and likely only) ones are the ones we created. + + Accordingly, only handling outputs we created should be definable as only handling + outputs from the resolution of Eventualities. + + This isn't feasible. It requires knowing what Eventualities were completed in this block, + when we handle this block, which we don't know without fully serialized scanning + Batch + publication. + + Take the following scenario: + 1) A network uses 10 confirmations. Block x is scanned, meaning x+9a exists. + 2) 67% of nodes process x, create, sign, and publish a TX, creating an Eventuality. + 3) A reorganization to a shorter chain occurs, including the published TX in x+1b. + 4) The 33% of nodes which are latent will be allowed to scan x+1b as soon as x+10b + exists. They won't wait for Serai to include the Batch for x until they try to scan + x+10b. + 5) These latent nodes will handle x+1b, post-create an Eventuality, post-learn x+1b + contained resolutions, changing how x+1b should've been interpreted. + + We either have to: + A) Fully serialize scanning (removing the ability to utilize throughput to allow higher + latency, at least while the step is `ClosingExisting`). + B) Create Eventualities immediately, which we can't do as then both the external + network's clock AND Serai's clock can trigger Eventualities, removing ordering. + We'd need to shift entirely to the external network's clock, only handling Burns + outside the parallelization window (which would be extremely latent). + C) Use a different mechanism to determine if we created an output. + D) Re-define which outputs are still to be handled after the 6 hour period expires, such + that the multisig's lifetime cannot be further extended yet it does fulfill its + responsibility. + + External outputs to the existing multisig will be: + - Scanned before the rotation and unused (as used External outputs become Change) + - Forwarded immediately upon scanning + - Not scanned before the cut off time (and accordingly dropped) + + For the first case, since they're scanned before the rotation and unused, they'll be + forwarded with all other available outputs (since they'll be available when scanned). + + Change outputs will be: + - Scanned before the rotation and forwarded with all other available outputs + - Forwarded immediately upon scanning + - Not scanned before the cut off time, requiring an extension exclusive to these outputs + + The important thing to note about honest Change outputs to the existing multisig is that + they'll only be created within `CONFIRMATIONS+1` blocks of the activation block. Also + important to note is that there's another explicit window of `CONFIRMATIONS` before the + 6 hour window. + + Eventualities are not guaranteed to be known before we scan the block containing their + resolution. They are guaranteed to be known within `CONFIRMATIONS-1` blocks however, due + to the limitation on how far we'll scan ahead. + + This means we will know of all Eventualities related to Change outputs we need to forward + before the 6 hour period begins (as forwarding outputs will not create any Change outputs + to the existing multisig). + + This means a definition of complete can be defined as: + 1) Handled all Branch outputs + 2) Forwarded all External outputs received before the end of 6 hour window + 3) Forwarded the results of all Eventualities with Change, which will have been created + before the 6 hour window + + How can we track and ensure this without needing to check if an output is from the + resolution of an Eventuality? + + 1) We only create Branch outputs before the 6 hour window starts. These are guaranteed + to appear within `CONFIRMATIONS` blocks. They will exist with arbitrary depth however, + meaning that upon completion they will spawn several more Eventualities. The further + created Eventualities re-risk being present after the 6 hour period ends. + + We can: + 1) Build a queue for Branch outputs, delaying their handling until relevant + Eventualities are guaranteed to be present. + + This solution would theoretically work for all outputs and allow collapsing this + problem to simply: + + > Accordingly, only handling outputs we created should be definable as only + handling outputs from the resolution of Eventualities. + + 2) Create all Eventualities under a Branch at time of Branch creation. + This idea fails as Plans are tightly bound to outputs. + + 3) Don't track Branch outputs by Eventualities, yet by the amount of Branch outputs + remaining. Any Branch output received, of a useful amount, is assumed to be our + own and handled. All other Branch outputs, even if they're the completion of some + Eventuality, are dropped. + + This avoids needing any additional queue, avoiding additional pipelining/latency. + + 2) External outputs are self-evident. We simply stop handling them at the cut-off point, + and only start checking after `CONFIRMATIONS` blocks if all Eventualities are + complete. + + 3) Since all Change Eventualities will be known prior to the 6 hour window's beginning, + we can safely check if a received Change output is the resolution of an Eventuality. + We only need to forward it if so. Forwarding it simply requires only checking if + Eventualities are complete after `CONFIRMATIONS` blocks, same as for straggling + External outputs. + */ + + let mut plans = vec![]; + existing_outputs.retain(|output| { + match output.kind() { + OutputType::External => false, + OutputType::Branch => { + let scheduler = &mut self.existing.as_mut().unwrap().scheduler; + // There *would* be a race condition here due to the fact we only mark a `Branch` output + // as needed when we process the block (and handle scheduling), yet actual `Branch` + // outputs may appear as soon as the next block (and we scan the next block before we + // process the prior block) + // + // Unlike Eventuality checking, which happens on scanning and is therefore asynchronous, + // all scheduling (and this check against the scheduler) happens on processing, which is + // synchronous + // + // While we could move Eventuality checking into the block processing, removing its + // asynchonicity, we could only check data the Scanner deems important. The Scanner won't + // deem important Eventuality resolutions which don't create an output to Serai unless + // it knows of the Eventuality. Accordingly, at best we could have a split role (the + // Scanner noting completion of Eventualities which don't have relevant outputs, the + // processing noting completion of ones which do) + // + // This is unnecessary, due to the current flow around Eventuality resolutions and the + // current bounds naturally found being sufficiently amenable, yet notable for the future + if scheduler.can_use_branch(output.amount()) { + // We could simply call can_use_branch, yet it'd have an edge case where if we receive + // two outputs for 100, and we could use one such output, we'd handle both. + // + // Individually schedule each output once confirming they're usable in order to avoid + // this. + let mut plan = scheduler.schedule::( + txn, + vec![output.clone()], + vec![], + self.new.as_ref().unwrap().key, + false, + ); + assert_eq!(plan.len(), 1); + let plan = plan.remove(0); + plans.push(plan); + } + false + } + OutputType::Change => { + // If the TX containing this output resolved an Eventuality... + if let Some(plan) = MultisigsDb::::resolved_plan(txn, output.tx_id()) { + // And the Eventuality had change... + // We need this check as Eventualities have a race condition and can't be relied + // on, as extensively detailed above. Eventualities explicitly with change do have + // a safe timing window however + if MultisigsDb::::plan_by_key_with_self_change( + txn, + // Pass the key so the DB checks the Plan's key is this multisig's, preventing a + // potential issue where the new multisig creates a Plan with change *and a + // payment to the existing multisig's change address* + self.existing.as_ref().unwrap().key, + plan, + ) { + // Then this is an honest change output we need to forward + // (or it's a payment to the change address in the same transaction as an honest + // change output, which is fine to let slip in) + return true; + } + } + false + } + } + }); + plans + } + + // Returns the Plans caused from a block being acknowledged. + // + // Will rotate keys if the block acknowledged is the retirement block. + async fn plans_from_block( + &mut self, + txn: &mut D::Transaction<'_>, + block_number: usize, + block_id: >::Id, + step: &mut RotationStep, + burns: Vec, + ) -> (bool, Vec>, HashMap, InInstructionWithBalance>) { + let (mut existing_payments, mut new_payments) = self.burns_to_payments(txn, *step, burns); + + // We now have to acknowledge the acknowledged block, if it's new + // It won't be if this block's `InInstruction`s were split into multiple `Batch`s + let (acquired_lock, (mut existing_outputs, new_outputs)) = { + let (acquired_lock, outputs) = if ScannerHandle::::db_scanned(txn) + .expect("published a Batch despite never scanning a block") < + block_number + { + let (is_retirement_block, outputs) = self.scanner.ack_block(txn, block_id.clone()).await; + if is_retirement_block { + let existing = self.existing.take().unwrap(); + assert!(existing.scheduler.empty()); + self.existing = self.new.take(); + *step = RotationStep::UseExisting; + assert!(existing_payments.is_empty()); + existing_payments = new_payments; + new_payments = vec![]; + } + (true, outputs) + } else { + (false, vec![]) + }; + (acquired_lock, self.split_outputs_by_key(outputs)) + }; + + let (mut plans, forwarded_external_outputs) = match *step { + RotationStep::UseExisting | RotationStep::NewAsChange => (vec![], HashMap::new()), + RotationStep::ForwardFromExisting => { + self.filter_outputs_due_to_forwarding(&mut existing_outputs) + } + RotationStep::ClosingExisting => { + (self.filter_outputs_due_to_closing(txn, &mut existing_outputs), HashMap::new()) + } + }; + + plans.extend({ + let existing = self.existing.as_mut().unwrap(); + let existing_key = existing.key; + self.existing.as_mut().unwrap().scheduler.schedule::( + txn, + existing_outputs, + existing_payments, + match *step { + RotationStep::UseExisting => existing_key, + RotationStep::NewAsChange | + RotationStep::ForwardFromExisting | + RotationStep::ClosingExisting => self.new.as_ref().unwrap().key, + }, + match *step { + RotationStep::UseExisting | RotationStep::NewAsChange => false, + RotationStep::ForwardFromExisting | RotationStep::ClosingExisting => true, + }, + ) + }); + + for plan in &plans { + assert_eq!(plan.key, self.existing.as_ref().unwrap().key); + if plan.change == Some(N::change_address(plan.key)) { + // Assert these are only created during the expected step + match *step { + RotationStep::UseExisting => {} + RotationStep::NewAsChange | + RotationStep::ForwardFromExisting | + RotationStep::ClosingExisting => panic!("change was set to self despite rotating"), + } + } + } + + if let Some(new) = self.new.as_mut() { + plans.extend(new.scheduler.schedule::(txn, new_outputs, new_payments, new.key, false)); + } + + (acquired_lock, plans, forwarded_external_outputs) + } + + /// Handle a SubstrateBlock event, building the relevant Plans. + pub async fn substrate_block( + &mut self, + txn: &mut D::Transaction<'_>, + network: &N, + context: SubstrateContext, + burns: Vec, + ) -> (bool, Vec<(::G, [u8; 32], N::SignableTransaction, N::Eventuality)>) + { + let mut block_id = >::Id::default(); + block_id.as_mut().copy_from_slice(context.network_latest_finalized_block.as_ref()); + let block_number = ScannerHandle::::block_number(txn, &block_id) + .expect("SubstrateBlock with context we haven't synced"); + + // Determine what step of rotation we're currently in + let mut step = self.current_rotation_step(block_number); + + // Get the Plans from this block + let (acquired_lock, plans, mut forwarded_external_outputs) = + self.plans_from_block(txn, block_number, block_id, &mut step, burns).await; + + let res = { + let mut res = Vec::with_capacity(plans.len()); + let fee = get_fee(network, block_number).await; + + for plan in plans { + let id = plan.id(); + info!("preparing plan {}: {:?}", hex::encode(id), plan); + + let key = plan.key; + let key_bytes = key.to_bytes(); + MultisigsDb::::save_active_plan( + txn, + key_bytes.as_ref(), + block_number.try_into().unwrap(), + &plan, + ); + + let to_be_forwarded = forwarded_external_outputs.remove(plan.inputs[0].id().as_ref()); + if to_be_forwarded.is_some() { + assert_eq!(plan.inputs.len(), 1); + } + let (tx, branches) = prepare_send(network, block_number, fee, plan).await; + + // If this is a Plan for an output we're forwarding, we need to save the InInstruction for + // its output under the amount successfully forwarded + if let Some(mut instruction) = to_be_forwarded { + // If we can't successfully create a forwarding TX, simply drop this + if let Some(tx) = &tx { + instruction.balance.amount.0 -= tx.0.fee(); + MultisigsDb::::save_forwarded_output(txn, instruction); + } + } + + for branch in branches { + let existing = self.existing.as_mut().unwrap(); + let to_use = if key == existing.key { + existing + } else { + let new = self + .new + .as_mut() + .expect("plan wasn't for existing multisig yet there wasn't a new multisig"); + assert_eq!(key, new.key); + new + }; + + to_use.scheduler.created_output::(txn, branch.expected, branch.actual); + } + + if let Some((tx, eventuality)) = tx { + // The main function we return to will send an event to the coordinator which must be + // fired before these registered Eventualities have their Completions fired + // Safety is derived from a mutable lock on the Scanner being preserved, preventing + // scanning (and detection of Eventuality resolutions) before it's released + // It's only released by the main function after it does what it will + self + .scanner + .register_eventuality(key_bytes.as_ref(), block_number, id, eventuality.clone()) + .await; + + res.push((key, id, tx, eventuality)); + } + + // TODO: If the TX is None, restore its inputs to the scheduler + // Otherwise, if the TX had a change output, dropping its inputs would burn funds + // Are there exceptional cases upon rotation? + } + res + }; + (acquired_lock, res) + } + + pub async fn release_scanner_lock(&mut self) { + self.scanner.release_lock().await; + } + + fn scanner_event_to_multisig_event( + &self, + txn: &mut D::Transaction<'_>, + msg: ScannerEvent, + ) -> MultisigEvent { + let (block_number, event) = match msg { + ScannerEvent::Block { is_retirement_block, block, outputs } => { + // Since the Scanner is asynchronous, the following is a concern for race conditions + // We safely know the step of a block since keys are declared, and the Scanner is safe + // with respect to the declaration of keys + // Accordingly, the following calls regarding new keys and step should be safe + let block_number = ScannerHandle::::block_number(txn, &block) + .expect("didn't have the block number for a block we just scanned"); + let step = self.current_rotation_step(block_number); + + let mut instructions = vec![]; + for output in outputs { + // If these aren't externally received funds, don't handle it as an instruction + if output.kind() != OutputType::External { + continue; + } + + // If this is an External transaction to the existing multisig, and we're either solely + // forwarding or closing the existing multisig, drop it + // In the case of the forwarding case, we'll report it once it hits the new multisig + if (match step { + RotationStep::UseExisting | RotationStep::NewAsChange => false, + RotationStep::ForwardFromExisting | RotationStep::ClosingExisting => true, + }) && (output.key() == self.existing.as_ref().unwrap().key) + { + continue; + } + + let instruction = if let Some(instruction) = instruction_from_output::(&output) { + instruction + } else { + if !output.data().is_empty() { + // TODO2: Refund + continue; + } + + if let Some(instruction) = + MultisigsDb::::take_forwarded_output(txn, output.amount()) + { + instruction + } else { + // TODO2: Refund + continue; + } + }; + + // Delay External outputs received to new multisig earlier than expected + if Some(output.key()) == self.new.as_ref().map(|new| new.key) { + match step { + RotationStep::UseExisting => { + MultisigsDb::::save_delayed_output(txn, instruction); + continue; + } + RotationStep::NewAsChange | + RotationStep::ForwardFromExisting | + RotationStep::ClosingExisting => {} + } + } + + instructions.push(instruction); + } + + // If any outputs were delayed, append them into this block + match step { + RotationStep::UseExisting => {} + RotationStep::NewAsChange | + RotationStep::ForwardFromExisting | + RotationStep::ClosingExisting => { + instructions.extend(MultisigsDb::::take_delayed_outputs(txn)); + } + } + + let mut block_hash = [0; 32]; + block_hash.copy_from_slice(block.as_ref()); + let mut batch_id = MultisigsDb::::next_batch_id(txn); + + // start with empty batch + let mut batches = vec![Batch { + network: N::NETWORK, + id: batch_id, + block: BlockHash(block_hash), + instructions: vec![], + }]; + + for instruction in instructions { + let batch = batches.last_mut().unwrap(); + batch.instructions.push(instruction); + + // check if batch is over-size + if batch.encode().len() > MAX_BATCH_SIZE { + // pop the last instruction so it's back in size + let instruction = batch.instructions.pop().unwrap(); + + // bump the id for the new batch + batch_id += 1; + + // make a new batch with this instruction included + batches.push(Batch { + network: N::NETWORK, + id: batch_id, + block: BlockHash(block_hash), + instructions: vec![instruction], + }); + } + } + + // Save the next batch ID + MultisigsDb::::set_next_batch_id(txn, batch_id + 1); + + ( + block_number, + MultisigEvent::Batches( + if is_retirement_block { + Some((self.existing.as_ref().unwrap().key, self.new.as_ref().unwrap().key)) + } else { + None + }, + batches, + ), + ) + } + + // This must be emitted before ScannerEvent::Block for all completions of known Eventualities + // within the block. Unknown Eventualities may have their Completed events emitted after + // ScannerEvent::Block however. + ScannerEvent::Completed(key, block_number, id, tx) => { + MultisigsDb::::resolve_plan(txn, &key, id, tx.id()); + (block_number, MultisigEvent::Completed(key, id, tx)) + } + }; + + // If we either received a Block event (which will be the trigger when we have no + // Plans/Eventualities leading into ClosingExisting), or we received the last Completed for + // this multisig, set its retirement block + let existing = self.existing.as_ref().unwrap(); + + // This multisig is closing + let closing = self.current_rotation_step(block_number) == RotationStep::ClosingExisting; + // There's nothing left in its Scheduler. This call is safe as: + // 1) When ClosingExisting, all outputs should've been already forwarded, preventing + // new UTXOs from accumulating. + // 2) No new payments should be issued. + // 3) While there may be plans, they'll be dropped to create Eventualities. + // If this Eventuality is resolved, the Plan has already been dropped. + // 4) If this Eventuality will trigger a Plan, it'll still be in the plans HashMap. + let scheduler_is_empty = closing && existing.scheduler.empty(); + // Nothing is still being signed + let no_active_plans = scheduler_is_empty && + MultisigsDb::::active_plans(txn, existing.key.to_bytes().as_ref()).is_empty(); + + self + .scanner + .multisig_completed + // The above explicitly included their predecessor to ensure short-circuiting, yet their + // names aren't defined as an aggregate check. Still including all three here ensures all are + // used in the final value + .send(closing && scheduler_is_empty && no_active_plans) + .unwrap(); + + event + } + + // async fn where dropping the Future causes no state changes + // This property is derived from recv having this property, and recv being the only async call + pub async fn next_event(&mut self, txn: &RwLock>) -> MultisigEvent { + let event = self.scanner.events.recv().await.unwrap(); + + // No further code is async + + self.scanner_event_to_multisig_event(&mut *txn.write().unwrap(), event) + } +} diff --git a/processor/src/multisigs/scanner.rs b/processor/src/multisigs/scanner.rs new file mode 100644 index 000000000..414b3c491 --- /dev/null +++ b/processor/src/multisigs/scanner.rs @@ -0,0 +1,727 @@ +use core::marker::PhantomData; +use std::{ + sync::Arc, + io::Read, + time::Duration, + collections::{VecDeque, HashSet, HashMap}, +}; + +use ciphersuite::group::GroupEncoding; +use frost::curve::Ciphersuite; + +use log::{info, debug, warn}; +use tokio::{ + sync::{RwLockReadGuard, RwLockWriteGuard, RwLock, mpsc}, + time::sleep, +}; + +use crate::{ + Get, DbTxn, Db, + networks::{Output, Transaction, EventualitiesTracker, Block, Network}, +}; + +#[derive(Clone, Debug)] +pub enum ScannerEvent { + // Block scanned + Block { is_retirement_block: bool, block: >::Id, outputs: Vec }, + // Eventuality completion found on-chain + Completed(Vec, usize, [u8; 32], N::Transaction), +} + +pub type ScannerEventChannel = mpsc::UnboundedReceiver>; + +#[derive(Clone, Debug)] +struct ScannerDb(PhantomData, PhantomData); +impl ScannerDb { + fn scanner_key(dst: &'static [u8], key: impl AsRef<[u8]>) -> Vec { + D::key(b"SCANNER", dst, key) + } + + fn block_key(number: usize) -> Vec { + Self::scanner_key(b"block_id", u64::try_from(number).unwrap().to_le_bytes()) + } + fn block_number_key(id: &>::Id) -> Vec { + Self::scanner_key(b"block_number", id) + } + fn save_block(txn: &mut D::Transaction<'_>, number: usize, id: &>::Id) { + txn.put(Self::block_number_key(id), u64::try_from(number).unwrap().to_le_bytes()); + txn.put(Self::block_key(number), id); + } + fn block(getter: &G, number: usize) -> Option<>::Id> { + getter.get(Self::block_key(number)).map(|id| { + let mut res = >::Id::default(); + res.as_mut().copy_from_slice(&id); + res + }) + } + fn block_number(getter: &G, id: &>::Id) -> Option { + getter + .get(Self::block_number_key(id)) + .map(|number| u64::from_le_bytes(number.try_into().unwrap()).try_into().unwrap()) + } + + fn keys_key() -> Vec { + Self::scanner_key(b"keys", b"") + } + fn register_key( + txn: &mut D::Transaction<'_>, + activation_number: usize, + key: ::G, + ) { + let mut keys = txn.get(Self::keys_key()).unwrap_or(vec![]); + + let key_bytes = key.to_bytes(); + + let key_len = key_bytes.as_ref().len(); + assert_eq!(keys.len() % (8 + key_len), 0); + + // Sanity check this key isn't already present + let mut i = 0; + while i < keys.len() { + if &keys[(i + 8) .. ((i + 8) + key_len)] == key_bytes.as_ref() { + panic!("adding {} as a key yet it was already present", hex::encode(key_bytes)); + } + i += 8 + key_len; + } + + keys.extend(u64::try_from(activation_number).unwrap().to_le_bytes()); + keys.extend(key_bytes.as_ref()); + txn.put(Self::keys_key(), keys); + } + fn keys(getter: &G) -> Vec<(usize, ::G)> { + let bytes_vec = getter.get(Self::keys_key()).unwrap_or(vec![]); + let mut bytes: &[u8] = bytes_vec.as_ref(); + + // Assumes keys will be 32 bytes when calculating the capacity + // If keys are larger, this may allocate more memory than needed + // If keys are smaller, this may require additional allocations + // Either are fine + let mut res = Vec::with_capacity(bytes.len() / (8 + 32)); + while !bytes.is_empty() { + let mut activation_number = [0; 8]; + bytes.read_exact(&mut activation_number).unwrap(); + let activation_number = u64::from_le_bytes(activation_number).try_into().unwrap(); + + res.push((activation_number, N::Curve::read_G(&mut bytes).unwrap())); + } + res + } + fn retire_key(txn: &mut D::Transaction<'_>) { + let keys = Self::keys(txn); + assert_eq!(keys.len(), 2); + txn.del(Self::keys_key()); + Self::register_key(txn, keys[1].0, keys[1].1); + } + + fn seen_key(id: &>::Id) -> Vec { + Self::scanner_key(b"seen", id) + } + fn seen(getter: &G, id: &>::Id) -> bool { + getter.get(Self::seen_key(id)).is_some() + } + + fn outputs_key(block: &>::Id) -> Vec { + Self::scanner_key(b"outputs", block.as_ref()) + } + fn save_outputs( + txn: &mut D::Transaction<'_>, + block: &>::Id, + outputs: &[N::Output], + ) { + let mut bytes = Vec::with_capacity(outputs.len() * 64); + for output in outputs { + output.write(&mut bytes).unwrap(); + } + txn.put(Self::outputs_key(block), bytes); + } + fn outputs( + txn: &D::Transaction<'_>, + block: &>::Id, + ) -> Option> { + let bytes_vec = txn.get(Self::outputs_key(block))?; + let mut bytes: &[u8] = bytes_vec.as_ref(); + + let mut res = vec![]; + while !bytes.is_empty() { + res.push(N::Output::read(&mut bytes).unwrap()); + } + Some(res) + } + + fn scanned_block_key() -> Vec { + Self::scanner_key(b"scanned_block", []) + } + + fn save_scanned_block(txn: &mut D::Transaction<'_>, block: usize) -> Vec { + let id = Self::block(txn, block); // It may be None for the first key rotated to + let outputs = + if let Some(id) = id.as_ref() { Self::outputs(txn, id).unwrap_or(vec![]) } else { vec![] }; + + // Mark all the outputs from this block as seen + for output in &outputs { + txn.put(Self::seen_key(&output.id()), b""); + } + + txn.put(Self::scanned_block_key(), u64::try_from(block).unwrap().to_le_bytes()); + + // Return this block's outputs so they can be pruned from the RAM cache + outputs + } + fn latest_scanned_block(getter: &G) -> Option { + getter + .get(Self::scanned_block_key()) + .map(|bytes| u64::from_le_bytes(bytes.try_into().unwrap()).try_into().unwrap()) + } + + fn retirement_block_key(key: &::G) -> Vec { + Self::scanner_key(b"retirement_block", key.to_bytes()) + } + fn save_retirement_block( + txn: &mut D::Transaction<'_>, + key: &::G, + block: usize, + ) { + txn.put(Self::retirement_block_key(key), u64::try_from(block).unwrap().to_le_bytes()); + } + fn retirement_block(getter: &G, key: &::G) -> Option { + getter + .get(Self::retirement_block_key(key)) + .map(|bytes| usize::try_from(u64::from_le_bytes(bytes.try_into().unwrap())).unwrap()) + } +} + +/// The Scanner emits events relating to the blockchain, notably received outputs. +/// +/// It WILL NOT fail to emit an event, even if it reboots at selected moments. +/// +/// It MAY fire the same event multiple times. +#[derive(Debug)] +pub struct Scanner { + _db: PhantomData, + + keys: Vec<(usize, ::G)>, + + eventualities: HashMap, EventualitiesTracker>, + + ram_scanned: Option, + ram_outputs: HashSet>, + + need_ack: VecDeque, + + events: mpsc::UnboundedSender>, +} + +#[derive(Clone, Debug)] +struct ScannerHold { + scanner: Arc>>>, +} +impl ScannerHold { + async fn read(&self) -> RwLockReadGuard<'_, Option>> { + loop { + let lock = self.scanner.read().await; + if lock.is_none() { + drop(lock); + tokio::task::yield_now().await; + continue; + } + return lock; + } + } + async fn write(&self) -> RwLockWriteGuard<'_, Option>> { + loop { + let lock = self.scanner.write().await; + if lock.is_none() { + drop(lock); + tokio::task::yield_now().await; + continue; + } + return lock; + } + } + // This is safe to not check if something else already acquired the Scanner as the only caller is + // sequential. + async fn long_term_acquire(&self) -> Scanner { + self.scanner.write().await.take().unwrap() + } + async fn restore(&self, scanner: Scanner) { + let _ = self.scanner.write().await.insert(scanner); + } +} + +#[derive(Debug)] +pub struct ScannerHandle { + scanner: ScannerHold, + held_scanner: Option>, + pub events: ScannerEventChannel, + pub multisig_completed: mpsc::UnboundedSender, +} + +impl ScannerHandle { + pub async fn ram_scanned(&self) -> usize { + self.scanner.read().await.as_ref().unwrap().ram_scanned.unwrap_or(0) + } + + /// Register a key to scan for. + pub async fn register_key( + &mut self, + txn: &mut D::Transaction<'_>, + activation_number: usize, + key: ::G, + ) { + let mut scanner_lock = self.scanner.write().await; + let scanner = scanner_lock.as_mut().unwrap(); + assert!( + activation_number > scanner.ram_scanned.unwrap_or(0), + "activation block of new keys was already scanned", + ); + + info!("Registering key {} in scanner at {activation_number}", hex::encode(key.to_bytes())); + + if scanner.keys.is_empty() { + assert!(scanner.ram_scanned.is_none()); + scanner.ram_scanned = Some(activation_number); + assert!(ScannerDb::::save_scanned_block(txn, activation_number).is_empty()); + } + + ScannerDb::::register_key(txn, activation_number, key); + scanner.keys.push((activation_number, key)); + #[cfg(not(test))] // TODO: A test violates this. Improve the test with a better flow + assert!(scanner.keys.len() <= 2); + + scanner.eventualities.insert(key.to_bytes().as_ref().to_vec(), EventualitiesTracker::new()); + } + + pub fn db_scanned(getter: &G) -> Option { + ScannerDb::::latest_scanned_block(getter) + } + + // This perform a database read which isn't safe with regards to if the value is set or not + // It may be set, when it isn't expected to be set, or not set, when it is expected to be set + // Since the value is static, if it's set, it's correctly set + pub fn block_number(getter: &G, id: &>::Id) -> Option { + ScannerDb::::block_number(getter, id) + } + + /// Acknowledge having handled a block. + /// + /// Creates a lock over the Scanner, preventing its independent scanning operations until + /// released. + /// + /// This must only be called on blocks which have been scanned in-memory. + pub async fn ack_block( + &mut self, + txn: &mut D::Transaction<'_>, + id: >::Id, + ) -> (bool, Vec) { + debug!("block {} acknowledged", hex::encode(&id)); + + let mut scanner = self.scanner.long_term_acquire().await; + + // Get the number for this block + let number = ScannerDb::::block_number(txn, &id) + .expect("main loop trying to operate on data we haven't scanned"); + log::trace!("block {} was {number}", hex::encode(&id)); + + let outputs = ScannerDb::::save_scanned_block(txn, number); + // This has a race condition if we try to ack a block we scanned on a prior boot, and we have + // yet to scan it on this boot + assert!(number <= scanner.ram_scanned.unwrap()); + for output in &outputs { + assert!(scanner.ram_outputs.remove(output.id().as_ref())); + } + + assert_eq!(scanner.need_ack.pop_front().unwrap(), number); + + self.held_scanner = Some(scanner); + + // Load the key from the DB, as it will have already been removed from RAM if retired + let key = ScannerDb::::keys(txn)[0].1; + let is_retirement_block = ScannerDb::::retirement_block(txn, &key) == Some(number); + if is_retirement_block { + ScannerDb::::retire_key(txn); + } + (is_retirement_block, outputs) + } + + pub async fn register_eventuality( + &mut self, + key: &[u8], + block_number: usize, + id: [u8; 32], + eventuality: N::Eventuality, + ) { + let mut lock; + // We won't use held_scanner if we're re-registering on boot + (if let Some(scanner) = self.held_scanner.as_mut() { + scanner + } else { + lock = Some(self.scanner.write().await); + lock.as_mut().unwrap().as_mut().unwrap() + }) + .eventualities + .get_mut(key) + .unwrap() + .register(block_number, id, eventuality) + } + + pub async fn release_lock(&mut self) { + self.scanner.restore(self.held_scanner.take().unwrap()).await + } +} + +impl Scanner { + #[allow(clippy::type_complexity, clippy::new_ret_no_self)] + pub fn new( + network: N, + db: D, + ) -> (ScannerHandle, Vec<(usize, ::G)>) { + let (events_send, events_recv) = mpsc::unbounded_channel(); + let (multisig_completed_send, multisig_completed_recv) = mpsc::unbounded_channel(); + + let keys = ScannerDb::::keys(&db); + let mut eventualities = HashMap::new(); + for key in &keys { + eventualities.insert(key.1.to_bytes().as_ref().to_vec(), EventualitiesTracker::new()); + } + + let ram_scanned = ScannerDb::::latest_scanned_block(&db); + + let scanner = ScannerHold { + scanner: Arc::new(RwLock::new(Some(Scanner { + _db: PhantomData, + + keys: keys.clone(), + + eventualities, + + ram_scanned, + ram_outputs: HashSet::new(), + + need_ack: VecDeque::new(), + + events: events_send, + }))), + }; + tokio::spawn(Scanner::run(db, network, scanner.clone(), multisig_completed_recv)); + + ( + ScannerHandle { + scanner, + held_scanner: None, + events: events_recv, + multisig_completed: multisig_completed_send, + }, + keys, + ) + } + + async fn emit(&mut self, event: ScannerEvent) -> bool { + if self.events.send(event).is_err() { + info!("Scanner handler was dropped. Shutting down?"); + return false; + } + true + } + + // An async function, to be spawned on a task, to discover and report outputs + async fn run( + mut db: D, + network: N, + scanner_hold: ScannerHold, + mut multisig_completed: mpsc::UnboundedReceiver, + ) { + loop { + let (ram_scanned, latest_block_to_scan) = { + // Sleep 5 seconds to prevent hammering the node/scanner lock + sleep(Duration::from_secs(5)).await; + + let ram_scanned = { + let scanner_lock = scanner_hold.read().await; + let scanner = scanner_lock.as_ref().unwrap(); + + // If we're not scanning for keys yet, wait until we are + if scanner.keys.is_empty() { + continue; + } + + let ram_scanned = scanner.ram_scanned.unwrap(); + // If a Batch has taken too long to be published, start waiting until it is before + // continuing scanning + // Solves a race condition around multisig rotation, documented in the relevant doc + // and demonstrated with mini + if let Some(needing_ack) = scanner.need_ack.front() { + let next = ram_scanned + 1; + let limit = needing_ack + N::CONFIRMATIONS; + assert!(next <= limit); + if next == limit { + continue; + } + }; + + ram_scanned + }; + + ( + ram_scanned, + loop { + break match network.get_latest_block_number().await { + // Only scan confirmed blocks, which we consider effectively finalized + // CONFIRMATIONS - 1 as whatever's in the latest block already has 1 confirm + Ok(latest) => latest.saturating_sub(N::CONFIRMATIONS.saturating_sub(1)), + Err(_) => { + warn!("couldn't get latest block number"); + sleep(Duration::from_secs(60)).await; + continue; + } + }; + }, + ) + }; + + for block_being_scanned in (ram_scanned + 1) ..= latest_block_to_scan { + // Redo the checks for if we're too far ahead + { + let needing_ack = { + let scanner_lock = scanner_hold.read().await; + let scanner = scanner_lock.as_ref().unwrap(); + scanner.need_ack.front().cloned() + }; + + if let Some(needing_ack) = needing_ack { + let limit = needing_ack + N::CONFIRMATIONS; + assert!(block_being_scanned <= limit); + if block_being_scanned == limit { + break; + } + } + } + + let block = match network.get_block(block_being_scanned).await { + Ok(block) => block, + Err(_) => { + warn!("couldn't get block {block_being_scanned}"); + break; + } + }; + let block_id = block.id(); + + info!("scanning block: {} ({block_being_scanned})", hex::encode(&block_id)); + + // These DB calls are safe, despite not having a txn, since they're static values + // There's no issue if they're written in advance of expected (such as on reboot) + // They're also only expected here + if let Some(id) = ScannerDb::::block(&db, block_being_scanned) { + if id != block_id { + panic!("reorg'd from finalized {} to {}", hex::encode(id), hex::encode(block_id)); + } + } else { + // TODO: Move this to an unwrap + if let Some(id) = ScannerDb::::block(&db, block_being_scanned.saturating_sub(1)) { + if id != block.parent() { + panic!( + "block {} doesn't build off expected parent {}", + hex::encode(block_id), + hex::encode(id), + ); + } + } + + let mut txn = db.txn(); + ScannerDb::::save_block(&mut txn, block_being_scanned, &block_id); + txn.commit(); + } + + // Scan new blocks + // TODO: This lock acquisition may be long-lived... + let mut scanner_lock = scanner_hold.write().await; + let scanner = scanner_lock.as_mut().unwrap(); + + let mut has_activation = false; + let mut outputs = vec![]; + let mut completion_block_numbers = vec![]; + for (activation_number, key) in scanner.keys.clone() { + if activation_number > block_being_scanned { + continue; + } + + if activation_number == block_being_scanned { + has_activation = true; + } + + let key_vec = key.to_bytes().as_ref().to_vec(); + + // TODO: These lines are the ones which will cause a really long-lived lock acquisiton + for output in network.get_outputs(&block, key).await { + assert_eq!(output.key(), key); + outputs.push(output); + } + + for (id, (block_number, tx)) in network + .get_eventuality_completions(scanner.eventualities.get_mut(&key_vec).unwrap(), &block) + .await + { + info!( + "eventuality {} resolved by {}, as found on chain", + hex::encode(id), + hex::encode(&tx.id()) + ); + + completion_block_numbers.push(block_number); + // This must be before the mission of ScannerEvent::Block, per commentary in mod.rs + if !scanner.emit(ScannerEvent::Completed(key_vec.clone(), block_number, id, tx)).await { + return; + } + } + } + + // Panic if we've already seen these outputs + for output in &outputs { + let id = output.id(); + info!( + "block {} had output {} worth {}", + hex::encode(&block_id), + hex::encode(&id), + output.amount(), + ); + + // On Bitcoin, the output ID should be unique for a given chain + // On Monero, it's trivial to make an output sharing an ID with another + // We should only scan outputs with valid IDs however, which will be unique + + /* + The safety of this code must satisfy the following conditions: + 1) seen is not set for the first occurrence + 2) seen is set for any future occurrence + + seen is only written to after this code completes. Accordingly, it cannot be set + before the first occurrence UNLESSS it's set, yet the last scanned block isn't. + They are both written in the same database transaction, preventing this. + + As for future occurrences, the RAM entry ensures they're handled properly even if + the database has yet to be set. + + On reboot, which will clear the RAM, if seen wasn't set, neither was latest scanned + block. Accordingly, this will scan from some prior block, re-populating the RAM. + + If seen was set, then this will be successfully read. + + There's also no concern ram_outputs was pruned, yet seen wasn't set, as pruning + from ram_outputs will acquire a write lock (preventing this code from acquiring + its own write lock and running), and during its holding of the write lock, it + commits the transaction setting seen and the latest scanned block. + + This last case isn't true. Committing seen/latest_scanned_block happens after + relinquishing the write lock. + + TODO2: Only update ram_outputs after committing the TXN in question. + */ + let seen = ScannerDb::::seen(&db, &id); + let id = id.as_ref().to_vec(); + if seen || scanner.ram_outputs.contains(&id) { + panic!("scanned an output multiple times"); + } + scanner.ram_outputs.insert(id); + } + + // We could remove this, if instead of doing the first block which passed + // requirements + CONFIRMATIONS, we simply emitted an event for every block where + // `number % CONFIRMATIONS == 0` (once at the final stage for the existing multisig) + // There's no need at this point, yet the latter may be more suitable for modeling... + async fn check_multisig_completed( + db: &mut D, + multisig_completed: &mut mpsc::UnboundedReceiver, + block_number: usize, + ) -> bool { + match multisig_completed.recv().await { + None => { + info!("Scanner handler was dropped. Shutting down?"); + false + } + Some(completed) => { + // Set the retirement block as block_number + CONFIRMATIONS + if completed { + let mut txn = db.txn(); + // The retiring key is the earliest one still around + let retiring_key = ScannerDb::::keys(&txn)[0].1; + // This value is static w.r.t. the key + ScannerDb::::save_retirement_block( + &mut txn, + &retiring_key, + block_number + N::CONFIRMATIONS, + ); + txn.commit(); + } + true + } + } + } + + drop(scanner_lock); + // Now that we've dropped the Scanner lock, we need to handle the multisig_completed + // channel before we decide if this block should be fired or not + // (holding the Scanner risks a deadlock) + for block_number in completion_block_numbers { + if !check_multisig_completed::(&mut db, &mut multisig_completed, block_number).await + { + return; + }; + } + + // Reacquire the scanner + let mut scanner_lock = scanner_hold.write().await; + let scanner = scanner_lock.as_mut().unwrap(); + + // Only emit an event if any of the following is true: + // - This is an activation block + // - This is a retirement block + // - There's outputs + // as only those are blocks are meaningful and warrant obtaining synchrony over + // TODO: Consider not obtaining synchrony over the retirement block depending on how the + // hand-off is implemented on the Substrate side of things + let is_retirement_block = + ScannerDb::::retirement_block(&db, &scanner.keys[0].1) == Some(block_being_scanned); + let sent_block = if has_activation || is_retirement_block || (!outputs.is_empty()) { + // Save the outputs to disk + let mut txn = db.txn(); + ScannerDb::::save_outputs(&mut txn, &block_id, &outputs); + txn.commit(); + + // Send all outputs + if !scanner + .emit(ScannerEvent::Block { is_retirement_block, block: block_id, outputs }) + .await + { + return; + } + + scanner.need_ack.push_back(block_being_scanned); + true + } else { + false + }; + + // Remove it from memory + if is_retirement_block { + let retired = scanner.keys.remove(0).1; + scanner.eventualities.remove(retired.to_bytes().as_ref()); + } + + // Update ram_scanned/need_ack + scanner.ram_scanned = Some(block_being_scanned); + + drop(scanner_lock); + // If we sent a Block event, once again check multisig_completed + if sent_block && + (!check_multisig_completed::( + &mut db, + &mut multisig_completed, + block_being_scanned, + ) + .await) + { + return; + } + } + } + } +} diff --git a/processor/src/scheduler.rs b/processor/src/multisigs/scheduler.rs similarity index 77% rename from processor/src/scheduler.rs rename to processor/src/multisigs/scheduler.rs index db0890438..4d0e42ff7 100644 --- a/processor/src/scheduler.rs +++ b/processor/src/multisigs/scheduler.rs @@ -6,7 +6,7 @@ use std::{ use ciphersuite::{group::GroupEncoding, Ciphersuite}; use crate::{ - networks::{Output, Network}, + networks::{OutputType, Output, Network}, DbTxn, Db, Payment, Plan, }; @@ -29,8 +29,6 @@ pub struct Scheduler { // queued_plans are for outputs which we will create, yet when created, will have their amount // reduced by the fee it cost to be created. The Scheduler will then be told how what amount the // output actually has, and it'll be moved into plans - // - // TODO2: Consider edge case where branch/change isn't mined yet keys are deprecated queued_plans: HashMap>>>, plans: HashMap>>>, @@ -46,6 +44,13 @@ fn scheduler_key(key: &G) -> Vec { } impl Scheduler { + pub fn empty(&self) -> bool { + self.queued_plans.is_empty() && + self.plans.is_empty() && + self.utxos.is_empty() && + self.payments.is_empty() + } + fn read(key: ::G, reader: &mut R) -> io::Result { let mut read_plans = || -> io::Result<_> { let mut all_plans = HashMap::new(); @@ -93,7 +98,7 @@ impl Scheduler { Ok(Scheduler { key, queued_plans, plans, utxos, payments }) } - // TODO: Get rid of this + // TODO2: Get rid of this // We reserialize the entire scheduler on any mutation to save it to the DB which is horrible // We should have an incremental solution fn serialize(&self) -> Vec { @@ -152,19 +157,16 @@ impl Scheduler { Self::read(key, reader) } - fn execute(&mut self, inputs: Vec, mut payments: Vec>) -> Plan { - // This must be equal to plan.key due to how networks detect they created outputs which are to - // the branch address - let branch_address = N::branch_address(self.key); - // created_output will be called any time we send to a branch address - // If it's called, and it wasn't expecting to be called, that's almost certainly an error - // The only way it wouldn't be is if someone on Serai triggered a burn to a branch, which is - // pointless anyways - // If we allow such behavior, we lose the ability to detect the aforementioned class of errors - // Ignore these payments so we can safely assert there - let mut payments = - payments.drain(..).filter(|payment| payment.address != branch_address).collect::>(); + pub fn can_use_branch(&self, amount: u64) -> bool { + self.plans.contains_key(&amount) + } + fn execute( + &mut self, + inputs: Vec, + mut payments: Vec>, + key_for_any_change: ::G, + ) -> Plan { let mut change = false; let mut max = N::MAX_OUTPUTS; @@ -184,6 +186,8 @@ impl Scheduler { amount }; + let branch_address = N::branch_address(self.key); + // If we have more payments than we can handle in a single TX, create plans for them // TODO2: This isn't perfect. For 258 outputs, and a MAX_OUTPUTS of 16, this will create: // 15 branches of 16 leaves @@ -207,37 +211,44 @@ impl Scheduler { payments.insert(0, Payment { address: branch_address.clone(), data: None, amount }); } - // TODO2: Use the latest key for change - // TODO2: Update rotation documentation - Plan { key: self.key, inputs, payments, change: Some(self.key).filter(|_| change) } + Plan { + key: self.key, + inputs, + payments, + change: Some(N::change_address(key_for_any_change)).filter(|_| change), + } } - fn add_outputs(&mut self, mut utxos: Vec) -> Vec> { + fn add_outputs( + &mut self, + mut utxos: Vec, + key_for_any_change: ::G, + ) -> Vec> { log::info!("adding {} outputs", utxos.len()); let mut txs = vec![]; for utxo in utxos.drain(..) { - // If we can fulfill planned TXs with this output, do so - // We could limit this to UTXOs where `utxo.kind() == OutputType::Branch`, yet there's no - // practical benefit in doing so - let amount = utxo.amount(); - if let Some(plans) = self.plans.get_mut(&amount) { - // Execute the first set of payments possible with an output of this amount - let payments = plans.pop_front().unwrap(); - // They won't be equal if we dropped payments due to being dust - assert!(amount >= payments.iter().map(|payment| payment.amount).sum::()); - - // If we've grabbed the last plan for this output amount, remove it from the map - if plans.is_empty() { - self.plans.remove(&amount); - } + if utxo.kind() == OutputType::Branch { + let amount = utxo.amount(); + if let Some(plans) = self.plans.get_mut(&amount) { + // Execute the first set of payments possible with an output of this amount + let payments = plans.pop_front().unwrap(); + // They won't be equal if we dropped payments due to being dust + assert!(amount >= payments.iter().map(|payment| payment.amount).sum::()); + + // If we've grabbed the last plan for this output amount, remove it from the map + if plans.is_empty() { + self.plans.remove(&amount); + } - // Create a TX for these payments - txs.push(self.execute(vec![utxo], payments)); - } else { - self.utxos.push(utxo); + // Create a TX for these payments + txs.push(self.execute(vec![utxo], payments, key_for_any_change)); + continue; + } } + + self.utxos.push(utxo); } log::info!("{} planned TXs have had their required inputs confirmed", txs.len()); @@ -249,9 +260,28 @@ impl Scheduler { &mut self, txn: &mut D::Transaction<'_>, utxos: Vec, - payments: Vec>, + mut payments: Vec>, + key_for_any_change: ::G, + force_spend: bool, ) -> Vec> { - let mut plans = self.add_outputs(utxos); + // Drop payments to our own branch address + /* + created_output will be called any time we send to a branch address. If it's called, and it + wasn't expecting to be called, that's almost certainly an error. The only way to guarantee + this however is to only have us send to a branch address when creating a branch, hence the + dropping of pointless payments. + + This is not comprehensive as a payment may still be made to another active multisig's branch + address, depending on timing. This is safe as the issue only occurs when a multisig sends to + its *own* branch address, since created_output is called on the signer's Scheduler. + */ + { + let branch_address = N::branch_address(self.key); + payments = + payments.drain(..).filter(|payment| payment.address != branch_address).collect::>(); + } + + let mut plans = self.add_outputs(utxos, key_for_any_change); log::info!("scheduling {} new payments", payments.len()); @@ -293,10 +323,14 @@ impl Scheduler { for chunk in utxo_chunks.drain(..) { // TODO: While payments have their TXs' fees deducted from themselves, that doesn't hold here - // We need to charge a fee before reporting incoming UTXOs to Substrate to cover aggregation - // TXs + // We need the documented, but not yet implemented, virtual amount scheme to solve this log::debug!("aggregating a chunk of {} inputs", N::MAX_INPUTS); - plans.push(Plan { key: self.key, inputs: chunk, payments: vec![], change: Some(self.key) }) + plans.push(Plan { + key: self.key, + inputs: chunk, + payments: vec![], + change: Some(N::change_address(key_for_any_change)), + }) } // We want to use all possible UTXOs for all possible payments @@ -326,12 +360,25 @@ impl Scheduler { // Now that we have the list of payments we can successfully handle right now, create the TX // for them if !executing.is_empty() { - plans.push(self.execute(utxos, executing)); + plans.push(self.execute(utxos, executing, key_for_any_change)); } else { // If we don't have any payments to execute, save these UTXOs for later self.utxos.extend(utxos); } + // If we're instructed to force a spend, do so + // This is used when an old multisig is retiring and we want to always transfer outputs to the + // new one, regardless if we currently have payments + if force_spend && (!self.utxos.is_empty()) { + assert!(self.utxos.len() <= N::MAX_INPUTS); + plans.push(Plan { + key: self.key, + inputs: self.utxos.drain(..).collect::>(), + payments: vec![], + change: Some(N::change_address(key_for_any_change)), + }); + } + txn.put(scheduler_key::(&self.key), self.serialize()); log::info!( @@ -342,6 +389,14 @@ impl Scheduler { plans } + pub fn consume_payments(&mut self, txn: &mut D::Transaction<'_>) -> Vec> { + let res: Vec<_> = self.payments.drain(..).collect(); + if !res.is_empty() { + txn.put(scheduler_key::(&self.key), self.serialize()); + } + res + } + // Note a branch output as having been created, with the amount it was actually created with, // or not having been created due to being too small // This can be called whenever, so long as it's properly ordered @@ -399,7 +454,7 @@ impl Scheduler { #[allow(clippy::unwrap_or_default)] self.plans.entry(actual).or_insert(VecDeque::new()).push_back(payments); - // TODO: This shows how ridiculous the serialize function is + // TODO2: This shows how ridiculous the serialize function is txn.put(scheduler_key::(&self.key), self.serialize()); } } diff --git a/processor/src/networks/bitcoin.rs b/processor/src/networks/bitcoin.rs index 9bcc70db6..823b45ce2 100644 --- a/processor/src/networks/bitcoin.rs +++ b/processor/src/networks/bitcoin.rs @@ -15,6 +15,7 @@ use tokio::time::sleep; use bitcoin_serai::{ bitcoin::{ hashes::Hash as HashTrait, + key::{Parity, XOnlyPublicKey}, consensus::{Encodable, Decodable}, script::Instruction, address::{NetworkChecked, Address as BAddress}, @@ -45,8 +46,9 @@ use serai_client::{ use crate::{ networks::{ NetworkError, Block as BlockTrait, OutputType, Output as OutputTrait, - Transaction as TransactionTrait, Eventuality as EventualityTrait, EventualitiesTracker, - PostFeeBranch, Network, drop_branches, amortize_fee, + Transaction as TransactionTrait, SignableTransaction as SignableTransactionTrait, + Eventuality as EventualityTrait, EventualitiesTracker, PostFeeBranch, Network, drop_branches, + amortize_fee, }, Plan, }; @@ -76,7 +78,7 @@ pub struct Output { data: Vec, } -impl OutputTrait for Output { +impl OutputTrait for Output { type Id = OutputId; fn kind(&self) -> OutputType { @@ -97,6 +99,24 @@ impl OutputTrait for Output { res } + fn tx_id(&self) -> [u8; 32] { + let mut hash = *self.output.outpoint().txid.as_raw_hash().as_byte_array(); + hash.reverse(); + hash + } + + fn key(&self) -> ProjectivePoint { + let script = &self.output.output().script_pubkey; + assert!(script.is_v1_p2tr()); + let Instruction::PushBytes(key) = script.instructions_minimal().last().unwrap().unwrap() else { + panic!("last item in v1 Taproot script wasn't bytes") + }; + let key = XOnlyPublicKey::from_slice(key.as_ref()) + .expect("last item in v1 Taproot script wasn't x-only public key"); + Secp256k1::read_G(&mut key.public_key(Parity::Even).serialize().as_slice()).unwrap() - + (ProjectivePoint::GENERATOR * self.output.offset()) + } + fn balance(&self) -> Balance { Balance { coin: SeraiCoin::Bitcoin, amount: Amount(self.output.value()) } } @@ -196,7 +216,6 @@ impl EventualityTrait for Eventuality { #[derive(Clone, Debug)] pub struct SignableTransaction { - keys: ThresholdKeys, transcript: RecommendedTranscript, actual: BSignableTransaction, } @@ -206,6 +225,11 @@ impl PartialEq for SignableTransaction { } } impl Eq for SignableTransaction {} +impl SignableTransactionTrait for SignableTransaction { + fn fee(&self) -> u64 { + self.actual.fee() + } +} impl BlockTrait for Block { type Id = [u8; 32]; @@ -221,6 +245,8 @@ impl BlockTrait for Block { hash } + // TODO: Don't use this block's time, use the network time at this block + // TODO: Confirm network time is monotonic, enabling its usage here fn time(&self) -> u64 { self.header.time.into() } @@ -231,7 +257,7 @@ impl BlockTrait for Block { } } -const KEY_DST: &[u8] = b"Bitcoin Key"; +const KEY_DST: &[u8] = b"Serai Bitcoin Output Offset"; lazy_static::lazy_static! { static ref BRANCH_OFFSET: Scalar = Secp256k1::hash_to_F(KEY_DST, b"branch"); static ref CHANGE_OFFSET: Scalar = Secp256k1::hash_to_F(KEY_DST, b"change"); @@ -313,6 +339,7 @@ impl Network for Bitcoin { const NETWORK: NetworkId = NetworkId::Bitcoin; const ID: &'static str = "Bitcoin"; + const ESTIMATED_BLOCK_TIME_IN_SECONDS: usize = 600; const CONFIRMATIONS: usize = 6; // 0.0001 BTC, 10,000 satoshis @@ -348,6 +375,11 @@ impl Network for Bitcoin { Self::address(key + (ProjectivePoint::GENERATOR * offsets[&OutputType::Branch])) } + fn change_address(key: ProjectivePoint) -> Self::Address { + let (_, offsets, _) = scanner(key); + Self::address(key + (ProjectivePoint::GENERATOR * offsets[&OutputType::Change])) + } + async fn get_latest_block_number(&self) -> Result { self.rpc.get_latest_block_number().await.map_err(|_| NetworkError::ConnectionError) } @@ -358,11 +390,7 @@ impl Network for Bitcoin { self.rpc.get_block(&block_hash).await.map_err(|_| NetworkError::ConnectionError) } - async fn get_outputs( - &self, - block: &Self::Block, - key: ProjectivePoint, - ) -> Result, NetworkError> { + async fn get_outputs(&self, block: &Self::Block, key: ProjectivePoint) -> Vec { let (scanner, _, kinds) = scanner(key); let mut outputs = vec![]; @@ -390,18 +418,20 @@ impl Network for Bitcoin { }; data.truncate(MAX_DATA_LEN.try_into().unwrap()); - outputs.push(Output { kind, output, data }) + let output = Output { kind, output, data }; + assert_eq!(output.tx_id(), tx.id()); + outputs.push(output); } } - Ok(outputs) + outputs } async fn get_eventuality_completions( &self, eventualities: &mut EventualitiesTracker, block: &Self::Block, - ) -> HashMap<[u8; 32], [u8; 32]> { + ) -> HashMap<[u8; 32], (usize, Transaction)> { let mut res = HashMap::new(); if eventualities.map.is_empty() { return res; @@ -410,7 +440,7 @@ impl Network for Bitcoin { async fn check_block( eventualities: &mut EventualitiesTracker, block: &Block, - res: &mut HashMap<[u8; 32], [u8; 32]>, + res: &mut HashMap<[u8; 32], (usize, Transaction)>, ) { for tx in &block.txdata[1 ..] { let input = &tx.input[0].previous_output; @@ -430,7 +460,7 @@ impl Network for Bitcoin { "dishonest multisig spent input on distinct set of outputs" ); - res.insert(plan, tx.id()); + res.insert(plan, (eventualities.block_number, tx.clone())); } } @@ -476,7 +506,6 @@ impl Network for Bitcoin { async fn prepare_send( &self, - keys: ThresholdKeys, _: usize, mut plan: Plan, fee: Fee, @@ -497,10 +526,7 @@ impl Network for Bitcoin { match BSignableTransaction::new( plan.inputs.iter().map(|input| input.output.clone()).collect(), &payments, - plan.change.map(|key| { - let (_, offsets, _) = scanner(key); - Self::address(key + (ProjectivePoint::GENERATOR * offsets[&OutputType::Change])).0 - }), + plan.change.as_ref().map(|change| change.0.clone()), None, fee.0, ) { @@ -544,7 +570,7 @@ impl Network for Bitcoin { Ok(( Some(( - SignableTransaction { keys, transcript: plan.transcript(), actual: signable }, + SignableTransaction { transcript: plan.transcript(), actual: signable }, Eventuality { plan_binding_input, outputs }, )), branch_outputs, @@ -553,13 +579,14 @@ impl Network for Bitcoin { async fn attempt_send( &self, + keys: ThresholdKeys, transaction: Self::SignableTransaction, ) -> Result { Ok( transaction .actual .clone() - .multisig(transaction.keys.clone(), transaction.transcript) + .multisig(keys.clone(), transaction.transcript) .expect("used the wrong keys"), ) } diff --git a/processor/src/networks/mod.rs b/processor/src/networks/mod.rs index 7e0666aa5..113cebb0b 100644 --- a/processor/src/networks/mod.rs +++ b/processor/src/networks/mod.rs @@ -1,4 +1,4 @@ -use core::fmt::Debug; +use core::{fmt::Debug, time::Duration}; use std::{io, collections::HashMap}; use async_trait::async_trait; @@ -12,6 +12,10 @@ use frost::{ use serai_client::primitives::{NetworkId, Balance}; +use log::error; + +use tokio::time::sleep; + #[cfg(feature = "bitcoin")] pub mod bitcoin; #[cfg(feature = "bitcoin")] @@ -90,14 +94,17 @@ impl OutputType { } } -pub trait Output: Send + Sync + Sized + Clone + PartialEq + Eq + Debug { +pub trait Output: Send + Sync + Sized + Clone + PartialEq + Eq + Debug { type Id: 'static + Id; fn kind(&self) -> OutputType; fn id(&self) -> Self::Id; + fn tx_id(&self) -> >::Id; + fn key(&self) -> ::G; fn balance(&self) -> Balance; + // TODO: Remove this? fn amount(&self) -> u64 { self.balance().amount.0 } @@ -117,6 +124,10 @@ pub trait Transaction: Send + Sync + Sized + Clone + Debug { async fn fee(&self, network: &N) -> u64; } +pub trait SignableTransaction: Send + Sync + Clone + Debug { + fn fee(&self) -> u64; +} + pub trait Eventuality: Send + Sync + Clone + Debug { fn lookup(&self) -> Vec; @@ -172,10 +183,11 @@ impl Default for EventualitiesTracker { } pub trait Block: Send + Sync + Sized + Clone + Debug { - // This is currently bounded to being 32-bytes. + // This is currently bounded to being 32 bytes. type Id: 'static + Id; fn id(&self) -> Self::Id; fn parent(&self) -> Self::Id; + // The monotonic network time at this block. fn time(&self) -> u64; fn median_fee(&self) -> N::Fee; } @@ -275,9 +287,9 @@ pub trait Network: 'static + Send + Sync + Clone + PartialEq + Eq + Debug { /// The type containing all information on a scanned output. // This is almost certainly distinct from the network's native output type. - type Output: Output; + type Output: Output; /// The type containing all information on a planned transaction, waiting to be signed. - type SignableTransaction: Send + Sync + Clone + Debug; + type SignableTransaction: SignableTransaction; /// The type containing all information to check if a plan was completed. /// /// This must be binding to both the outputs expected and the plan ID. @@ -302,6 +314,8 @@ pub trait Network: 'static + Send + Sync + Clone + PartialEq + Eq + Debug { const NETWORK: NetworkId; /// String ID for this network. const ID: &'static str; + /// The estimated amount of time a block will take. + const ESTIMATED_BLOCK_TIME_IN_SECONDS: usize; /// The amount of confirmations required to consider a block 'final'. const CONFIRMATIONS: usize; /// The maximum amount of inputs which will fit in a TX. @@ -322,8 +336,9 @@ pub trait Network: 'static + Send + Sync + Clone + PartialEq + Eq + Debug { /// Address for the given group key to receive external coins to. fn address(key: ::G) -> Self::Address; /// Address for the given group key to use for scheduled branches. - // This is purely used for debugging purposes. Any output may be used to execute a branch. fn branch_address(key: ::G) -> Self::Address; + /// Address for the given group key to use for change. + fn change_address(key: ::G) -> Self::Address; /// Get the latest block's number. async fn get_latest_block_number(&self) -> Result; @@ -334,24 +349,26 @@ pub trait Network: 'static + Send + Sync + Clone + PartialEq + Eq + Debug { &self, block: &Self::Block, key: ::G, - ) -> Result, NetworkError>; + ) -> Vec; /// Get the registered eventualities completed within this block, and any prior blocks which /// registered eventualities may have been completed in. /// - /// This will panic if not fed a new block. + /// This may panic if not fed a block greater than the tracker's block number. + // TODO: get_eventuality_completions_internal + provided get_eventuality_completions for common + // code async fn get_eventuality_completions( &self, eventualities: &mut EventualitiesTracker, block: &Self::Block, - ) -> HashMap<[u8; 32], >::Id>; + ) -> HashMap<[u8; 32], (usize, Self::Transaction)>; /// Prepare a SignableTransaction for a transaction. + /// /// Returns None for the transaction if the SignableTransaction was dropped due to lack of value. #[rustfmt::skip] async fn prepare_send( &self, - keys: ThresholdKeys, block_number: usize, plan: Plan, fee: Self::Fee, @@ -363,6 +380,7 @@ pub trait Network: 'static + Send + Sync + Clone + PartialEq + Eq + Debug { /// Attempt to sign a SignableTransaction. async fn attempt_send( &self, + keys: ThresholdKeys, transaction: Self::SignableTransaction, ) -> Result; @@ -396,3 +414,35 @@ pub trait Network: 'static + Send + Sync + Clone + PartialEq + Eq + Debug { #[cfg(test)] async fn test_send(&self, key: Self::Address) -> Self::Block; } + +// TODO: Move into above trait +pub async fn get_latest_block_number(network: &N) -> usize { + loop { + match network.get_latest_block_number().await { + Ok(number) => { + return number; + } + Err(e) => { + error!( + "couldn't get the latest block number in main's error-free get_block. {} {}", + "this should only happen if the node is offline. error: ", e + ); + sleep(Duration::from_secs(10)).await; + } + } + } +} + +pub async fn get_block(network: &N, block_number: usize) -> N::Block { + loop { + match network.get_block(block_number).await { + Ok(block) => { + return block; + } + Err(e) => { + error!("couldn't get block {block_number} in main's error-free get_block. error: {}", e); + sleep(Duration::from_secs(10)).await; + } + } + } +} diff --git a/processor/src/networks/monero.rs b/processor/src/networks/monero.rs index 32617fcde..82f04326e 100644 --- a/processor/src/networks/monero.rs +++ b/processor/src/networks/monero.rs @@ -37,8 +37,9 @@ use crate::{ Payment, Plan, additional_key, networks::{ NetworkError, Block as BlockTrait, OutputType, Output as OutputTrait, - Transaction as TransactionTrait, Eventuality as EventualityTrait, EventualitiesTracker, - PostFeeBranch, Network, drop_branches, amortize_fee, + Transaction as TransactionTrait, SignableTransaction as SignableTransactionTrait, + Eventuality as EventualityTrait, EventualitiesTracker, PostFeeBranch, Network, drop_branches, + amortize_fee, }, }; @@ -49,7 +50,7 @@ const EXTERNAL_SUBADDRESS: Option = SubaddressIndex::new(0, 0); const BRANCH_SUBADDRESS: Option = SubaddressIndex::new(1, 0); const CHANGE_SUBADDRESS: Option = SubaddressIndex::new(2, 0); -impl OutputTrait for Output { +impl OutputTrait for Output { // While we could use (tx, o), using the key ensures we won't be susceptible to the burning bug. // While we already are immune, thanks to using featured address, this doesn't hurt and is // technically more efficient. @@ -68,6 +69,14 @@ impl OutputTrait for Output { self.0.output.data.key.compress().to_bytes() } + fn tx_id(&self) -> [u8; 32] { + self.0.output.absolute.tx + } + + fn key(&self) -> EdwardsPoint { + EdwardsPoint(self.0.output.data.key - (EdwardsPoint::generator().0 * self.0.key_offset())) + } + fn balance(&self) -> Balance { Balance { coin: SeraiCoin::Monero, amount: Amount(self.0.commitment().amount) } } @@ -130,10 +139,14 @@ impl EventualityTrait for Eventuality { #[derive(Clone, Debug)] pub struct SignableTransaction { - keys: ThresholdKeys, transcript: RecommendedTranscript, actual: MSignableTransaction, } +impl SignableTransactionTrait for SignableTransaction { + fn fee(&self) -> u64 { + self.actual.fee() + } +} impl BlockTrait for Block { type Id = [u8; 32]; @@ -145,6 +158,7 @@ impl BlockTrait for Block { self.header.previous } + // TODO: Check Monero enforces this to be monotonic and sane fn time(&self) -> u64 { self.header.timestamp } @@ -227,6 +241,7 @@ impl Network for Monero { const NETWORK: NetworkId = NetworkId::Monero; const ID: &'static str = "Monero"; + const ESTIMATED_BLOCK_TIME_IN_SECONDS: usize = 120; const CONFIRMATIONS: usize = 10; // wallet2 will not create a transaction larger than 100kb, and Monero won't relay a transaction @@ -250,6 +265,10 @@ impl Network for Monero { Self::address_internal(key, BRANCH_SUBADDRESS) } + fn change_address(key: EdwardsPoint) -> Self::Address { + Self::address_internal(key, CHANGE_SUBADDRESS) + } + async fn get_latest_block_number(&self) -> Result { // Monero defines height as chain length, so subtract 1 for block number Ok(self.rpc.get_height().await.map_err(|_| NetworkError::ConnectionError)? - 1) @@ -267,15 +286,19 @@ impl Network for Monero { ) } - async fn get_outputs( - &self, - block: &Block, - key: EdwardsPoint, - ) -> Result, NetworkError> { - let mut txs = Self::scanner(key) - .scan(&self.rpc, block) - .await - .map_err(|_| NetworkError::ConnectionError)? + async fn get_outputs(&self, block: &Block, key: EdwardsPoint) -> Vec { + let outputs = loop { + match Self::scanner(key).scan(&self.rpc, block).await { + Ok(outputs) => break outputs, + Err(e) => { + log::error!("couldn't scan block {}: {e:?}", hex::encode(block.id())); + sleep(Duration::from_secs(60)).await; + continue; + } + } + }; + + let mut txs = outputs .iter() .filter_map(|outputs| Some(outputs.not_locked()).filter(|outputs| !outputs.is_empty())) .collect::>(); @@ -305,14 +328,14 @@ impl Network for Monero { } } - Ok(outputs) + outputs } async fn get_eventuality_completions( &self, eventualities: &mut EventualitiesTracker, block: &Block, - ) -> HashMap<[u8; 32], [u8; 32]> { + ) -> HashMap<[u8; 32], (usize, Transaction)> { let mut res = HashMap::new(); if eventualities.map.is_empty() { return res; @@ -322,7 +345,7 @@ impl Network for Monero { network: &Monero, eventualities: &mut EventualitiesTracker, block: &Block, - res: &mut HashMap<[u8; 32], [u8; 32]>, + res: &mut HashMap<[u8; 32], (usize, Transaction)>, ) { for hash in &block.txs { let tx = { @@ -339,7 +362,7 @@ impl Network for Monero { if let Some((_, eventuality)) = eventualities.map.get(&tx.prefix.extra) { if eventuality.matches(&tx) { - res.insert(eventualities.map.remove(&tx.prefix.extra).unwrap().0, tx.hash()); + res.insert(eventualities.map.remove(&tx.prefix.extra).unwrap().0, (block.number(), tx)); } } } @@ -373,7 +396,6 @@ impl Network for Monero { async fn prepare_send( &self, - keys: ThresholdKeys, block_number: usize, mut plan: Plan, fee: Fee, @@ -457,9 +479,7 @@ impl Network for Monero { Some(Zeroizing::new(plan.id())), inputs.clone(), payments, - plan.change.map(|key| { - Change::fingerprintable(Self::address_internal(key, CHANGE_SUBADDRESS).into()) - }), + plan.change.map(|change| Change::fingerprintable(change.into())), vec![], fee, ) { @@ -509,7 +529,6 @@ impl Network for Monero { let branch_outputs = amortize_fee(&mut plan, tx_fee); let signable = SignableTransaction { - keys, transcript, actual: match signable(plan, Some(tx_fee))? { Some(signable) => signable, @@ -522,9 +541,10 @@ impl Network for Monero { async fn attempt_send( &self, + keys: ThresholdKeys, transaction: SignableTransaction, ) -> Result { - match transaction.actual.clone().multisig(transaction.keys.clone(), transaction.transcript) { + match transaction.actual.clone().multisig(keys, transaction.transcript) { Ok(machine) => Ok(machine), Err(e) => panic!("failed to create a multisig machine for TX: {e}"), } diff --git a/processor/src/plan.rs b/processor/src/plan.rs index 129966046..3f005865b 100644 --- a/processor/src/plan.rs +++ b/processor/src/plan.rs @@ -24,6 +24,7 @@ impl Payment { } pub fn write(&self, writer: &mut W) -> io::Result<()> { + // TODO: Don't allow creating Payments with an Address which can't be serialized let address: Vec = self .address .clone() @@ -74,7 +75,7 @@ pub struct Plan { pub key: ::G, pub inputs: Vec, pub payments: Vec>, - pub change: Option<::G>, + pub change: Option, } impl core::fmt::Debug for Plan { fn fmt(&self, fmt: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error> { @@ -83,7 +84,7 @@ impl core::fmt::Debug for Plan { .field("key", &hex::encode(self.key.to_bytes())) .field("inputs", &self.inputs) .field("payments", &self.payments) - .field("change", &self.change.map(|change| hex::encode(change.to_bytes()))) + .field("change", &self.change.as_ref().map(|change| change.to_string())) .finish() } } @@ -105,8 +106,8 @@ impl Plan { payment.transcript(&mut transcript); } - if let Some(change) = self.change { - transcript.append_message(b"change", change.to_bytes()); + if let Some(change) = &self.change { + transcript.append_message(b"change", change.to_string()); } transcript @@ -132,12 +133,23 @@ impl Plan { payment.write(writer)?; } - writer.write_all(&[u8::from(self.change.is_some())])?; - if let Some(change) = &self.change { - writer.write_all(change.to_bytes().as_ref())?; - } - - Ok(()) + // TODO: Have Plan construction fail if change cannot be serialized + let change = if let Some(change) = &self.change { + change.clone().try_into().map_err(|_| { + io::Error::new( + io::ErrorKind::Other, + format!( + "an address we said to use as change couldn't be convered to a Vec: {}", + change.to_string(), + ), + ) + })? + } else { + vec![] + }; + assert!(serai_client::primitives::MAX_ADDRESS_LEN <= u8::MAX.into()); + writer.write_all(&[u8::try_from(change.len()).unwrap()])?; + writer.write_all(&change) } pub fn read(reader: &mut R) -> io::Result { @@ -156,9 +168,20 @@ impl Plan { payments.push(Payment::::read(reader)?); } - let mut buf = [0; 1]; - reader.read_exact(&mut buf)?; - let change = if buf[0] == 1 { Some(N::Curve::read_G(reader)?) } else { None }; + let mut len = [0; 1]; + reader.read_exact(&mut len)?; + let mut change = vec![0; usize::from(len[0])]; + reader.read_exact(&mut change)?; + let change = if change.is_empty() { + None + } else { + Some(N::Address::try_from(change).map_err(|_| { + io::Error::new( + io::ErrorKind::Other, + "couldn't deserialize an Address serialized into a Plan", + ) + })?) + }; Ok(Plan { key, inputs, payments, change }) } diff --git a/processor/src/scanner.rs b/processor/src/scanner.rs deleted file mode 100644 index cdf438dd0..000000000 --- a/processor/src/scanner.rs +++ /dev/null @@ -1,525 +0,0 @@ -use core::marker::PhantomData; -use std::{ - sync::Arc, - time::Duration, - collections::{HashSet, HashMap}, -}; - -use ciphersuite::group::GroupEncoding; -use frost::curve::Ciphersuite; - -use log::{info, debug, warn}; -use tokio::{ - sync::{RwLock, mpsc}, - time::sleep, -}; - -use crate::{ - Get, DbTxn, Db, - networks::{Output, Transaction, EventualitiesTracker, Block, Network}, -}; - -#[derive(Clone, Debug)] -pub enum ScannerEvent { - // Block scanned - Block { block: >::Id, outputs: Vec }, - // Eventuality completion found on-chain - Completed([u8; 32], >::Id), -} - -pub type ScannerEventChannel = mpsc::UnboundedReceiver>; - -#[derive(Clone, Debug)] -struct ScannerDb(PhantomData, PhantomData); -impl ScannerDb { - fn scanner_key(dst: &'static [u8], key: impl AsRef<[u8]>) -> Vec { - D::key(b"SCANNER", dst, key) - } - - fn block_key(number: usize) -> Vec { - Self::scanner_key(b"block_id", u64::try_from(number).unwrap().to_le_bytes()) - } - fn block_number_key(id: &>::Id) -> Vec { - Self::scanner_key(b"block_number", id) - } - fn save_block(txn: &mut D::Transaction<'_>, number: usize, id: &>::Id) { - txn.put(Self::block_number_key(id), u64::try_from(number).unwrap().to_le_bytes()); - txn.put(Self::block_key(number), id); - } - fn block(getter: &G, number: usize) -> Option<>::Id> { - getter.get(Self::block_key(number)).map(|id| { - let mut res = >::Id::default(); - res.as_mut().copy_from_slice(&id); - res - }) - } - fn block_number(getter: &G, id: &>::Id) -> Option { - getter - .get(Self::block_number_key(id)) - .map(|number| u64::from_le_bytes(number.try_into().unwrap()).try_into().unwrap()) - } - - fn active_keys_key() -> Vec { - Self::scanner_key(b"active_keys", b"") - } - fn add_active_key(txn: &mut D::Transaction<'_>, key: ::G) { - let mut keys = txn.get(Self::active_keys_key()).unwrap_or(vec![]); - - let key_bytes = key.to_bytes(); - - let key_len = key_bytes.as_ref().len(); - assert_eq!(keys.len() % key_len, 0); - - // Don't add this key if it's already present - let mut i = 0; - while i < keys.len() { - if &keys[i .. (i + key_len)] == key_bytes.as_ref() { - debug!("adding {} as an active key yet it was already present", hex::encode(key_bytes)); - return; - } - i += key_len; - } - - keys.extend(key_bytes.as_ref()); - txn.put(Self::active_keys_key(), keys); - } - fn active_keys(getter: &G) -> Vec<::G> { - let bytes_vec = getter.get(Self::active_keys_key()).unwrap_or(vec![]); - let mut bytes: &[u8] = bytes_vec.as_ref(); - - // Assumes keys will be 32 bytes when calculating the capacity - // If keys are larger, this may allocate more memory than needed - // If keys are smaller, this may require additional allocations - // Either are fine - let mut res = Vec::with_capacity(bytes.len() / 32); - while !bytes.is_empty() { - res.push(N::Curve::read_G(&mut bytes).unwrap()); - } - res - } - - fn seen_key(id: &::Id) -> Vec { - Self::scanner_key(b"seen", id) - } - fn seen(getter: &G, id: &::Id) -> bool { - getter.get(Self::seen_key(id)).is_some() - } - - fn next_batch_key() -> Vec { - Self::scanner_key(b"next_batch", []) - } - fn outputs_key( - key: &::G, - block: &>::Id, - ) -> Vec { - Self::scanner_key(b"outputs", [key.to_bytes().as_ref(), block.as_ref()].concat()) - } - fn save_outputs( - txn: &mut D::Transaction<'_>, - key: &::G, - block: &>::Id, - outputs: &[N::Output], - ) { - let mut bytes = Vec::with_capacity(outputs.len() * 64); - for output in outputs { - output.write(&mut bytes).unwrap(); - } - txn.put(Self::outputs_key(key, block), bytes); - - // This is a new set of outputs, which are expected to be handled in a perfectly ordered - // fashion - - // TODO2: This is not currently how this works - // There may be new blocks 0 .. 5, which A will scan, yet then B may be activated at block 4 - // This would cause - // 0a, 1a, 2a, 3a, 4a, 5a, 4b, 5b - // when it should be - // 0a, 1a, 2a, 3a, 4a, 4b, 5a, 5b - } - fn outputs( - txn: &D::Transaction<'_>, - key: &::G, - block: &>::Id, - ) -> Option> { - let bytes_vec = txn.get(Self::outputs_key(key, block))?; - let mut bytes: &[u8] = bytes_vec.as_ref(); - - let mut res = vec![]; - while !bytes.is_empty() { - res.push(N::Output::read(&mut bytes).unwrap()); - } - Some(res) - } - - fn scanned_block_key(key: &::G) -> Vec { - Self::scanner_key(b"scanned_block", key.to_bytes()) - } - - #[allow(clippy::type_complexity)] - fn save_scanned_block( - txn: &mut D::Transaction<'_>, - key: &::G, - block: usize, - ) -> Vec { - let id = Self::block(txn, block); // It may be None for the first key rotated to - let outputs = if let Some(id) = id.as_ref() { - Self::outputs(txn, key, id).unwrap_or(vec![]) - } else { - vec![] - }; - - // Mark all the outputs from this block as seen - for output in &outputs { - txn.put(Self::seen_key(&output.id()), b""); - } - - txn.put(Self::scanned_block_key(key), u64::try_from(block).unwrap().to_le_bytes()); - - // Return this block's outputs so they can be pruned from the RAM cache - outputs - } - fn latest_scanned_block(getter: &G, key: ::G) -> usize { - let bytes = getter - .get(Self::scanned_block_key(&key)) - .expect("asking for latest scanned block of key which wasn't rotated to"); - u64::from_le_bytes(bytes.try_into().unwrap()).try_into().unwrap() - } -} - -/// The Scanner emits events relating to the blockchain, notably received outputs. -/// It WILL NOT fail to emit an event, even if it reboots at selected moments. -/// It MAY fire the same event multiple times. -#[derive(Debug)] -pub struct Scanner { - network: N, - db: D, - keys: Vec<::G>, - - eventualities: EventualitiesTracker, - - ram_scanned: HashMap, usize>, - ram_outputs: HashSet>, - - events: mpsc::UnboundedSender>, -} - -#[derive(Debug)] -pub struct ScannerHandle { - scanner: Arc>>, - pub events: ScannerEventChannel, -} - -impl ScannerHandle { - pub async fn ram_scanned(&self) -> usize { - let mut res = None; - for scanned in self.scanner.read().await.ram_scanned.values() { - if res.is_none() { - res = Some(*scanned); - } - // Returns the lowest scanned value so no matter the keys interacted with, this is - // sufficiently scanned - res = Some(res.unwrap().min(*scanned)); - } - res.unwrap_or(0) - } - - pub async fn register_eventuality( - &mut self, - block_number: usize, - id: [u8; 32], - eventuality: N::Eventuality, - ) { - self.scanner.write().await.eventualities.register(block_number, id, eventuality) - } - - pub async fn drop_eventuality(&mut self, id: [u8; 32]) { - self.scanner.write().await.eventualities.drop(id); - } - - /// Rotate the key being scanned for. - /// - /// If no key has been prior set, this will become the key with no further actions. - /// - /// If a key has been prior set, both keys will be scanned for as detailed in the Multisig - /// documentation. The old key will eventually stop being scanned for, leaving just the - /// updated-to key. - pub async fn rotate_key( - &mut self, - txn: &mut D::Transaction<'_>, - activation_number: usize, - key: ::G, - ) { - let mut scanner = self.scanner.write().await; - if !scanner.keys.is_empty() { - // Protonet will have a single, static validator set - // TODO2 - panic!("only a single key is supported at this time"); - } - - info!("Rotating scanner to key {} at {activation_number}", hex::encode(key.to_bytes())); - - let outputs = ScannerDb::::save_scanned_block(txn, &key, activation_number); - scanner.ram_scanned.insert(key.to_bytes().as_ref().to_vec(), activation_number); - assert!(outputs.is_empty()); - - ScannerDb::::add_active_key(txn, key); - scanner.keys.push(key); - } - - // This perform a database read which isn't safe with regards to if the value is set or not - // It may be set, when it isn't expected to be set, or not set, when it is expected to be set - // Since the value is static, if it's set, it's correctly set - pub async fn block_number(&self, id: &>::Id) -> Option { - ScannerDb::::block_number(&self.scanner.read().await.db, id) - } - - // Set the next batch ID to use - pub fn set_next_batch_id(&self, txn: &mut D::Transaction<'_>, batch: u32) { - txn.put(ScannerDb::::next_batch_key(), batch.to_le_bytes()); - } - - // Get the next batch ID - pub fn next_batch_id(&self, txn: &D::Transaction<'_>) -> u32 { - txn - .get(ScannerDb::::next_batch_key()) - .map_or(0, |v| u32::from_le_bytes(v.try_into().unwrap())) - } - - /// Acknowledge having handled a block for a key. - pub async fn ack_up_to_block( - &mut self, - txn: &mut D::Transaction<'_>, - key: ::G, - id: >::Id, - ) -> Vec { - let mut scanner = self.scanner.write().await; - debug!("Block {} acknowledged", hex::encode(&id)); - - // Get the number for this block - let number = ScannerDb::::block_number(txn, &id) - .expect("main loop trying to operate on data we haven't scanned"); - // Get the number of the last block we acknowledged - let prior = ScannerDb::::latest_scanned_block(txn, key); - - let mut outputs = vec![]; - for number in (prior + 1) ..= number { - outputs.extend(ScannerDb::::save_scanned_block(txn, &key, number)); - } - - for output in &outputs { - assert!(scanner.ram_outputs.remove(output.id().as_ref())); - } - - outputs - } -} - -impl Scanner { - #[allow(clippy::new_ret_no_self)] - pub fn new(network: N, db: D) -> (ScannerHandle, Vec<::G>) { - let (events_send, events_recv) = mpsc::unbounded_channel(); - - let keys = ScannerDb::::active_keys(&db); - let mut ram_scanned = HashMap::new(); - for key in keys.clone() { - ram_scanned.insert( - key.to_bytes().as_ref().to_vec(), - ScannerDb::::latest_scanned_block(&db, key), - ); - } - - let scanner = Arc::new(RwLock::new(Scanner { - network, - db, - keys: keys.clone(), - - eventualities: EventualitiesTracker::new(), - - ram_scanned, - ram_outputs: HashSet::new(), - - events: events_send, - })); - tokio::spawn(Scanner::run(scanner.clone())); - - (ScannerHandle { scanner, events: events_recv }, keys) - } - - fn emit(&mut self, event: ScannerEvent) -> bool { - if self.events.send(event).is_err() { - info!("Scanner handler was dropped. Shutting down?"); - return false; - } - true - } - - // An async function, to be spawned on a task, to discover and report outputs - async fn run(scanner: Arc>) { - loop { - // Only check every five seconds for new blocks - sleep(Duration::from_secs(5)).await; - - // Scan new blocks - { - let mut scanner = scanner.write().await; - let latest = scanner.network.get_latest_block_number().await; - let latest = match latest { - // Only scan confirmed blocks, which we consider effectively finalized - // CONFIRMATIONS - 1 as whatever's in the latest block already has 1 confirm - Ok(latest) => latest.saturating_sub(N::CONFIRMATIONS.saturating_sub(1)), - Err(_) => { - warn!("couldn't get latest block number"); - sleep(Duration::from_secs(60)).await; - continue; - } - }; - - for key in scanner.keys.clone() { - let key_vec = key.to_bytes().as_ref().to_vec(); - let latest_scanned = scanner.ram_scanned[&key_vec]; - - for i in (latest_scanned + 1) ..= latest { - // TODO2: Check for key deprecation - - let block = match scanner.network.get_block(i).await { - Ok(block) => block, - Err(_) => { - warn!("couldn't get block {i}"); - break; - } - }; - let block_id = block.id(); - - // These block calls are safe, despite not having a txn, since they're static values - // only written to/read by this thread - // There's also no error caused by them being unexpectedly written (if the commit is - // made and then the processor suddenly reboots) - // There's also no issue if this code is run multiple times (due to code after - // aborting) - if let Some(id) = ScannerDb::::block(&scanner.db, i) { - if id != block_id { - panic!("reorg'd from finalized {} to {}", hex::encode(id), hex::encode(block_id)); - } - } else { - info!("Found new block: {}", hex::encode(&block_id)); - - if let Some(id) = ScannerDb::::block(&scanner.db, i.saturating_sub(1)) { - if id != block.parent() { - panic!( - "block {} doesn't build off expected parent {}", - hex::encode(block_id), - hex::encode(id), - ); - } - } - - let mut txn = scanner.db.txn(); - ScannerDb::::save_block(&mut txn, i, &block_id); - txn.commit(); - } - - let outputs = match scanner.network.get_outputs(&block, key).await { - Ok(outputs) => outputs, - Err(_) => { - warn!("couldn't scan block {i}"); - break; - } - }; - - // Write this number as scanned so we won't perform any of the following mutations - // multiple times - scanner.ram_scanned.insert(key_vec.clone(), i); - - // Panic if we've already seen these outputs - for output in &outputs { - let id = output.id(); - info!( - "block {} had output {} worth {}", - hex::encode(&block_id), - hex::encode(&id), - output.amount(), - ); - - // On Bitcoin, the output ID should be unique for a given chain - // On Monero, it's trivial to make an output sharing an ID with another - // We should only scan outputs with valid IDs however, which will be unique - - /* - The safety of this code must satisfy the following conditions: - 1) seen is not set for the first occurrence - 2) seen is set for any future occurrence - - seen is only written to after this code completes. Accordingly, it cannot be set - before the first occurrence UNLESSS it's set, yet the last scanned block isn't. - They are both written in the same database transaction, preventing this. - - As for future occurrences, the RAM entry ensures they're handled properly even if - the database has yet to be set. - - On reboot, which will clear the RAM, if seen wasn't set, neither was latest scanned - block. Accordingly, this will scan from some prior block, re-populating the RAM. - - If seen was set, then this will be successfully read. - - There's also no concern ram_outputs was pruned, yet seen wasn't set, as pruning - from ram_outputs will acquire a write lock (preventing this code from acquiring - its own write lock and running), and during its holding of the write lock, it - commits the transaction setting seen and the latest scanned block. - - This last case isn't true. Committing seen/latest_scanned_block happens after - relinquishing the write lock. - - TODO: Only update ram_outputs after committing the TXN in question. - */ - let seen = ScannerDb::::seen(&scanner.db, &id); - let id = id.as_ref().to_vec(); - if seen || scanner.ram_outputs.contains(&id) { - panic!("scanned an output multiple times"); - } - scanner.ram_outputs.insert(id); - } - - // Clone network because we can't borrow it while also mutably borrowing the - // eventualities - // Thankfully, network is written to be a cheap clone - let network = scanner.network.clone(); - // TODO: This get_eventuality_completions call will panic if called multiple times over - // the same blocks (such as when checking multiple keys under the current layout), - // as get_eventuality_completions assumes it's always only fed a future block - for (id, tx) in - network.get_eventuality_completions(&mut scanner.eventualities, &block).await - { - // This should only happen if there's a P2P net desync or there's a malicious - // validator - warn!( - "eventuality {} resolved by {}, as found on chain. this should not happen", - hex::encode(id), - hex::encode(&tx) - ); - - if !scanner.emit(ScannerEvent::Completed(id, tx)) { - return; - } - } - - // Don't emit an event if there's not any outputs - if outputs.is_empty() { - continue; - } - - // Save the outputs to disk - let mut txn = scanner.db.txn(); - ScannerDb::::save_outputs(&mut txn, &key, &block_id, &outputs); - txn.commit(); - - // Send all outputs - // TODO2: Fire this with all outputs for all keys, not for each key - if !scanner.emit(ScannerEvent::Block { block: block_id, outputs }) { - return; - } - } - } - } - } - } -} diff --git a/processor/src/signer.rs b/processor/src/signer.rs index a9afa8dbd..e098c07c9 100644 --- a/processor/src/signer.rs +++ b/processor/src/signer.rs @@ -139,10 +139,6 @@ impl Signer { } } - pub fn keys(&self) -> ThresholdKeys { - self.keys.clone() - } - fn verify_id(&self, id: &SignId) -> Result<(), ()> { // Check the attempt lines up match self.attempt.get(&id.id) { @@ -202,28 +198,42 @@ impl Signer { self.events.push_back(SignerEvent::SignedTransaction { id, tx: tx_id }); } - pub async fn eventuality_completion( + pub fn completed(&mut self, txn: &mut D::Transaction<'_>, id: [u8; 32], tx: N::Transaction) { + let first_completion = !self.already_completed(txn, id); + + // Save this completion to the DB + SignerDb::::save_transaction(txn, &tx); + SignerDb::::complete(txn, id, &tx.id()); + + if first_completion { + self.complete(id, tx.id()); + } + } + + // Doesn't use any loops/retries since we'll eventually get this from the Scanner anyways + async fn claimed_eventuality_completion( &mut self, txn: &mut D::Transaction<'_>, id: [u8; 32], tx_id: &>::Id, - ) { + ) -> bool { if let Some(eventuality) = SignerDb::::eventuality(txn, id) { // Transaction hasn't hit our mempool/was dropped for a different signature // The latter can happen given certain latency conditions/a single malicious signer - // In the case of a single malicious signer, they can drag multiple honest - // validators down with them, so we unfortunately can't slash on this case + // In the case of a single malicious signer, they can drag multiple honest validators down + // with them, so we unfortunately can't slash on this case let Ok(tx) = self.network.get_transaction(tx_id).await else { warn!( - "a validator claimed {} completed {} yet we didn't have that TX in our mempool", + "a validator claimed {} completed {} yet we didn't have that TX in our mempool {}", hex::encode(tx_id), hex::encode(id), + "(or had another connectivity issue)", ); - return; + return false; }; if self.network.confirm_completion(&eventuality, &tx) { - info!("eventuality for {} resolved in TX {}", hex::encode(id), hex::encode(tx_id)); + info!("signer eventuality for {} resolved in TX {}", hex::encode(id), hex::encode(tx_id)); let first_completion = !self.already_completed(txn, id); @@ -233,6 +243,7 @@ impl Signer { if first_completion { self.complete(id, tx.id()); + return true; } } else { warn!( @@ -242,12 +253,17 @@ impl Signer { ); } } else { - warn!( - "signer {} informed of the completion of plan {}. that plan was not recognized", + // If we don't have this in RAM, it should be because we already finished signing it + // TODO: Will the coordinator ever send us Completed for an unknown ID? + assert!(SignerDb::::completed(txn, id).is_some()); + info!( + "signer {} informed of the eventuality completion for plan {}, {}", hex::encode(self.keys.group_key().to_bytes()), hex::encode(id), + "which we already marked as completed", ); } + false } async fn attempt(&mut self, txn: &mut D::Transaction<'_>, id: [u8; 32], attempt: u32) { @@ -311,7 +327,7 @@ impl Signer { SignerDb::::attempt(txn, &id); // Attempt to create the TX - let machine = match self.network.attempt_send(tx).await { + let machine = match self.network.attempt_send(self.keys.clone(), tx).await { Err(e) => { error!("failed to attempt {}, #{}: {:?}", hex::encode(id.id), id.attempt, e); return; @@ -481,7 +497,7 @@ impl Signer { } tx.as_mut().copy_from_slice(&tx_vec); - self.eventuality_completion(txn, id, &tx).await; + self.claimed_eventuality_completion(txn, id, &tx).await; } } } diff --git a/processor/src/substrate_signer.rs b/processor/src/substrate_signer.rs index 64ae1fbbf..11ae0d000 100644 --- a/processor/src/substrate_signer.rs +++ b/processor/src/substrate_signer.rs @@ -208,7 +208,7 @@ impl SubstrateSigner { // b"substrate" is a literal from sp-core let machine = AlgorithmMachine::new(Schnorrkel::new(b"substrate"), self.keys.clone()); - // TODO: Use a seeded RNG here so we don't produce distinct messages with the same purpose + // TODO: Use a seeded RNG here so we don't produce distinct messages with the same intent // This is also needed so we don't preprocess, send preprocess, reboot before ack'ing the // message, send distinct preprocess, and then attempt a signing session premised on the former // with the latter diff --git a/processor/src/tests/addresses.rs b/processor/src/tests/addresses.rs index 00a88805a..9d726e030 100644 --- a/processor/src/tests/addresses.rs +++ b/processor/src/tests/addresses.rs @@ -12,11 +12,12 @@ use serai_db::{DbTxn, MemDb}; use crate::{ Plan, Db, networks::{OutputType, Output, Block, Network}, - scanner::{ScannerEvent, Scanner, ScannerHandle}, + multisigs::scanner::{ScannerEvent, Scanner, ScannerHandle}, tests::sign, }; async fn spend( + db: &mut D, network: &N, keys: &HashMap>, scanner: &mut ScannerHandle, @@ -32,10 +33,14 @@ async fn spend( keys.clone(), network .prepare_send( - keys.clone(), network.get_latest_block_number().await.unwrap() - N::CONFIRMATIONS, // Send to a change output - Plan { key, inputs: outputs.clone(), payments: vec![], change: Some(key) }, + Plan { + key, + inputs: outputs.clone(), + payments: vec![], + change: Some(N::change_address(key)), + }, network.get_fee().await, ) .await @@ -51,13 +56,19 @@ async fn spend( network.mine_block().await; } match timeout(Duration::from_secs(30), scanner.events.recv()).await.unwrap().unwrap() { - ScannerEvent::Block { block: _, outputs } => { + ScannerEvent::Block { is_retirement_block, block, outputs } => { + scanner.multisig_completed.send(false).unwrap(); + assert!(!is_retirement_block); assert_eq!(outputs.len(), 1); // Make sure this is actually a change output assert_eq!(outputs[0].kind(), OutputType::Change); + let mut txn = db.txn(); + assert_eq!(scanner.ack_block(&mut txn, block).await.1, outputs); + scanner.release_lock().await; + txn.commit(); outputs } - ScannerEvent::Completed(_, _) => { + ScannerEvent::Completed(_, _, _, _) => { panic!("unexpectedly got eventuality completion"); } } @@ -76,11 +87,14 @@ pub async fn test_addresses(network: N) { } let mut db = MemDb::new(); - let (mut scanner, active_keys) = Scanner::new(network.clone(), db.clone()); - assert!(active_keys.is_empty()); + let (mut scanner, current_keys) = Scanner::new(network.clone(), db.clone()); + assert!(current_keys.is_empty()); let mut txn = db.txn(); - scanner.rotate_key(&mut txn, network.get_latest_block_number().await.unwrap(), key).await; + scanner.register_key(&mut txn, network.get_latest_block_number().await.unwrap(), key).await; txn.commit(); + for _ in 0 .. N::CONFIRMATIONS { + network.mine_block().await; + } // Receive funds to the branch address and make sure it's properly identified let block_id = network.test_send(N::branch_address(key)).await.id(); @@ -88,19 +102,25 @@ pub async fn test_addresses(network: N) { // Verify the Scanner picked them up let outputs = match timeout(Duration::from_secs(30), scanner.events.recv()).await.unwrap().unwrap() { - ScannerEvent::Block { block, outputs } => { + ScannerEvent::Block { is_retirement_block, block, outputs } => { + scanner.multisig_completed.send(false).unwrap(); + assert!(!is_retirement_block); assert_eq!(block, block_id); assert_eq!(outputs.len(), 1); assert_eq!(outputs[0].kind(), OutputType::Branch); + let mut txn = db.txn(); + assert_eq!(scanner.ack_block(&mut txn, block).await.1, outputs); + scanner.release_lock().await; + txn.commit(); outputs } - ScannerEvent::Completed(_, _) => { + ScannerEvent::Completed(_, _, _, _) => { panic!("unexpectedly got eventuality completion"); } }; // Spend the branch output, creating a change output and ensuring we actually get change - let outputs = spend(&network, &keys, &mut scanner, outputs).await; + let outputs = spend(&mut db, &network, &keys, &mut scanner, outputs).await; // Also test spending the change output - spend(&network, &keys, &mut scanner, outputs).await; + spend(&mut db, &network, &keys, &mut scanner, outputs).await; } diff --git a/processor/src/tests/literal/mod.rs b/processor/src/tests/literal/mod.rs index d94943616..c98913cdd 100644 --- a/processor/src/tests/literal/mod.rs +++ b/processor/src/tests/literal/mod.rs @@ -16,6 +16,7 @@ mod bitcoin { bitcoin_signer, bitcoin_wallet, bitcoin_addresses, + bitcoin_no_deadlock_in_multisig_completed, ); } @@ -39,5 +40,6 @@ mod monero { monero_signer, monero_wallet, monero_addresses, + monero_no_deadlock_in_multisig_completed, ); } diff --git a/processor/src/tests/mod.rs b/processor/src/tests/mod.rs index 084d60b39..13af66bfd 100644 --- a/processor/src/tests/mod.rs +++ b/processor/src/tests/mod.rs @@ -2,7 +2,7 @@ mod key_gen; pub(crate) use key_gen::test_key_gen; mod scanner; -pub(crate) use scanner::test_scanner; +pub(crate) use scanner::{test_scanner, test_no_deadlock_in_multisig_completed}; mod signer; pub(crate) use signer::{sign, test_signer}; @@ -59,8 +59,12 @@ macro_rules! test_network { $signer: ident, $wallet: ident, $addresses: ident, + $no_deadlock_in_multisig_completed: ident, ) => { - use $crate::tests::{test_key_gen, test_scanner, test_signer, test_wallet, test_addresses}; + use $crate::tests::{ + test_key_gen, test_scanner, test_no_deadlock_in_multisig_completed, test_signer, test_wallet, + test_addresses, + }; // This doesn't interact with a node and accordingly doesn't need to be run sequentially #[tokio::test] @@ -93,6 +97,12 @@ macro_rules! test_network { test_addresses($network().await).await; } } + + async_sequential! { + async fn $no_deadlock_in_multisig_completed() { + test_no_deadlock_in_multisig_completed($network().await).await; + } + } }; } diff --git a/processor/src/tests/scanner.rs b/processor/src/tests/scanner.rs index 45d7abef2..3f81bd483 100644 --- a/processor/src/tests/scanner.rs +++ b/processor/src/tests/scanner.rs @@ -3,17 +3,15 @@ use std::sync::{Arc, Mutex}; use rand_core::OsRng; -use frost::Participant; +use frost::{Participant, tests::key_gen}; use tokio::time::timeout; -use serai_client::primitives::BlockHash; - use serai_db::{DbTxn, Db, MemDb}; use crate::{ networks::{OutputType, Output, Block, Network}, - scanner::{ScannerEvent, Scanner, ScannerHandle}, + multisigs::scanner::{ScannerEvent, Scanner, ScannerHandle}, }; pub async fn test_scanner(network: N) { @@ -32,16 +30,19 @@ pub async fn test_scanner(network: N) { let db = MemDb::new(); let new_scanner = || async { let mut db = db.clone(); - let (mut scanner, active_keys) = Scanner::new(network.clone(), db.clone()); + let (mut scanner, current_keys) = Scanner::new(network.clone(), db.clone()); let mut first = first.lock().unwrap(); if *first { - assert!(active_keys.is_empty()); + assert!(current_keys.is_empty()); let mut txn = db.txn(); - scanner.rotate_key(&mut txn, activation_number, group_key).await; + scanner.register_key(&mut txn, activation_number, group_key).await; txn.commit(); + for _ in 0 .. N::CONFIRMATIONS { + network.mine_block().await; + } *first = false; } else { - assert_eq!(active_keys.len(), 1); + assert_eq!(current_keys.len(), 1); } scanner }; @@ -55,13 +56,15 @@ pub async fn test_scanner(network: N) { let verify_event = |mut scanner: ScannerHandle| async { let outputs = match timeout(Duration::from_secs(30), scanner.events.recv()).await.unwrap().unwrap() { - ScannerEvent::Block { block, outputs } => { + ScannerEvent::Block { is_retirement_block, block, outputs } => { + scanner.multisig_completed.send(false).unwrap(); + assert!(!is_retirement_block); assert_eq!(block, block_id); assert_eq!(outputs.len(), 1); assert_eq!(outputs[0].kind(), OutputType::External); outputs } - ScannerEvent::Completed(_, _) => { + ScannerEvent::Completed(_, _, _, _) => { panic!("unexpectedly got eventuality completion"); } }; @@ -73,22 +76,10 @@ pub async fn test_scanner(network: N) { verify_event(new_scanner().await).await; // Acknowledge the block - - // Acknowledging it should yield a list of all blocks since the last acknowledged block - let mut blocks = vec![]; - let mut curr_block = activation_number + 1; - loop { - let block = network.get_block(curr_block).await.unwrap().id(); - blocks.push(BlockHash(block.as_ref().try_into().unwrap())); - if block == block_id { - break; - } - curr_block += 1; - } - let mut cloned_db = db.clone(); let mut txn = cloned_db.txn(); - assert_eq!(scanner.ack_up_to_block(&mut txn, keys.group_key(), block_id).await, outputs); + assert_eq!(scanner.ack_block(&mut txn, block_id).await.1, outputs); + scanner.release_lock().await; txn.commit(); // There should be no more events @@ -97,3 +88,67 @@ pub async fn test_scanner(network: N) { // Create a new scanner off the current DB and make sure it also does nothing assert!(timeout(Duration::from_secs(30), new_scanner().await.events.recv()).await.is_err()); } + +pub async fn test_no_deadlock_in_multisig_completed(network: N) { + // Mine blocks so there's a confirmed block + for _ in 0 .. N::CONFIRMATIONS { + network.mine_block().await; + } + + let mut db = MemDb::new(); + let (mut scanner, current_keys) = Scanner::new(network.clone(), db.clone()); + assert!(current_keys.is_empty()); + + let mut txn = db.txn(); + // Register keys to cause Block events at CONFIRMATIONS (dropped since first keys), + // CONFIRMATIONS + 1, and CONFIRMATIONS + 2 + for i in 0 .. 3 { + scanner + .register_key( + &mut txn, + network.get_latest_block_number().await.unwrap() + N::CONFIRMATIONS + i, + { + let mut keys = key_gen(&mut OsRng); + for (_, keys) in keys.iter_mut() { + N::tweak_keys(keys); + } + keys[&Participant::new(1).unwrap()].group_key() + }, + ) + .await; + } + txn.commit(); + + for _ in 0 .. (3 * N::CONFIRMATIONS) { + network.mine_block().await; + } + + let block_id = + match timeout(Duration::from_secs(30), scanner.events.recv()).await.unwrap().unwrap() { + ScannerEvent::Block { is_retirement_block, block, outputs: _ } => { + scanner.multisig_completed.send(false).unwrap(); + assert!(!is_retirement_block); + block + } + ScannerEvent::Completed(_, _, _, _) => { + panic!("unexpectedly got eventuality completion"); + } + }; + + match timeout(Duration::from_secs(30), scanner.events.recv()).await.unwrap().unwrap() { + ScannerEvent::Block { .. } => {} + ScannerEvent::Completed(_, _, _, _) => { + panic!("unexpectedly got eventuality completion"); + } + }; + + // The ack_block acquisiton shows the Scanner isn't maintaining the lock on its own thread after + // emitting the Block event + // TODO: This is incomplete. Also test after emitting Completed + let mut txn = db.txn(); + assert_eq!(scanner.ack_block(&mut txn, block_id).await.1, vec![]); + scanner.release_lock().await; + txn.commit(); + + scanner.multisig_completed.send(false).unwrap(); +} diff --git a/processor/src/tests/signer.rs b/processor/src/tests/signer.rs index 1e0c53b83..c5a0e6c5a 100644 --- a/processor/src/tests/signer.rs +++ b/processor/src/tests/signer.rs @@ -153,7 +153,7 @@ pub async fn test_signer(network: N) { } let key = keys[&Participant::new(1).unwrap()].group_key(); - let outputs = network.get_outputs(&network.test_send(N::address(key)).await, key).await.unwrap(); + let outputs = network.get_outputs(&network.test_send(N::address(key)).await, key).await; let sync_block = network.get_latest_block_number().await.unwrap() - N::CONFIRMATIONS; let fee = network.get_fee().await; @@ -163,13 +163,12 @@ pub async fn test_signer(network: N) { for (i, keys) in keys.drain() { let (signable, eventuality) = network .prepare_send( - keys.clone(), sync_block, Plan { key, inputs: outputs.clone(), payments: vec![Payment { address: N::address(key), data: None, amount }], - change: Some(key), + change: Some(N::change_address(key)), }, fee, ) @@ -194,8 +193,7 @@ pub async fn test_signer(network: N) { &network.get_block(network.get_latest_block_number().await.unwrap()).await.unwrap(), key, ) - .await - .unwrap(); + .await; assert_eq!(outputs.len(), 2); // Adjust the amount for the fees let amount = amount - tx.fee(&network).await; diff --git a/processor/src/tests/wallet.rs b/processor/src/tests/wallet.rs index 8b22685cf..8bb84d457 100644 --- a/processor/src/tests/wallet.rs +++ b/processor/src/tests/wallet.rs @@ -11,13 +11,20 @@ use serai_db::{DbTxn, Db, MemDb}; use crate::{ Payment, Plan, networks::{Output, Transaction, Block, Network}, - scanner::{ScannerEvent, Scanner}, - scheduler::Scheduler, + multisigs::{ + scanner::{ScannerEvent, Scanner}, + scheduler::Scheduler, + }, tests::sign, }; // Tests the Scanner, Scheduler, and Signer together pub async fn test_wallet(network: N) { + // Mine blocks so there's a confirmed block + for _ in 0 .. N::CONFIRMATIONS { + network.mine_block().await; + } + let mut keys = key_gen(&mut OsRng); for (_, keys) in keys.iter_mut() { N::tweak_keys(keys); @@ -25,27 +32,36 @@ pub async fn test_wallet(network: N) { let key = keys[&Participant::new(1).unwrap()].group_key(); let mut db = MemDb::new(); - let (mut scanner, active_keys) = Scanner::new(network.clone(), db.clone()); - assert!(active_keys.is_empty()); + let (mut scanner, current_keys) = Scanner::new(network.clone(), db.clone()); + assert!(current_keys.is_empty()); let (block_id, outputs) = { let mut txn = db.txn(); - scanner.rotate_key(&mut txn, network.get_latest_block_number().await.unwrap(), key).await; + scanner.register_key(&mut txn, network.get_latest_block_number().await.unwrap(), key).await; txn.commit(); + for _ in 0 .. N::CONFIRMATIONS { + network.mine_block().await; + } let block = network.test_send(N::address(key)).await; let block_id = block.id(); match timeout(Duration::from_secs(30), scanner.events.recv()).await.unwrap().unwrap() { - ScannerEvent::Block { block, outputs } => { + ScannerEvent::Block { is_retirement_block, block, outputs } => { + scanner.multisig_completed.send(false).unwrap(); + assert!(!is_retirement_block); assert_eq!(block, block_id); assert_eq!(outputs.len(), 1); (block_id, outputs) } - ScannerEvent::Completed(_, _) => { + ScannerEvent::Completed(_, _, _, _) => { panic!("unexpectedly got eventuality completion"); } } }; + let mut txn = db.txn(); + assert_eq!(scanner.ack_block(&mut txn, block_id.clone()).await.1, outputs); + scanner.release_lock().await; + txn.commit(); let mut txn = db.txn(); let mut scheduler = Scheduler::new::(&mut txn, key); @@ -54,6 +70,8 @@ pub async fn test_wallet(network: N) { &mut txn, outputs.clone(), vec![Payment { address: N::address(key), data: None, amount }], + key, + false, ); txn.commit(); assert_eq!( @@ -62,7 +80,7 @@ pub async fn test_wallet(network: N) { key, inputs: outputs.clone(), payments: vec![Payment { address: N::address(key), data: None, amount }], - change: Some(key), + change: Some(N::change_address(key)), }] ); @@ -78,7 +96,7 @@ pub async fn test_wallet(network: N) { let mut eventualities = vec![]; for (i, keys) in keys.drain() { let (signable, eventuality) = network - .prepare_send(keys.clone(), network.get_block_number(&block_id).await, plans[0].clone(), fee) + .prepare_send(network.get_block_number(&block_id).await, plans[0].clone(), fee) .await .unwrap() .0 @@ -93,8 +111,7 @@ pub async fn test_wallet(network: N) { network.mine_block().await; let block_number = network.get_latest_block_number().await.unwrap(); let block = network.get_block(block_number).await.unwrap(); - let first_outputs = outputs; - let outputs = network.get_outputs(&block, key).await.unwrap(); + let outputs = network.get_outputs(&block, key).await; assert_eq!(outputs.len(), 2); let amount = amount - tx.fee(&network).await; assert!((outputs[0].amount() == amount) || (outputs[1].amount() == amount)); @@ -108,20 +125,20 @@ pub async fn test_wallet(network: N) { } match timeout(Duration::from_secs(30), scanner.events.recv()).await.unwrap().unwrap() { - ScannerEvent::Block { block: block_id, outputs: these_outputs } => { + ScannerEvent::Block { is_retirement_block, block: block_id, outputs: these_outputs } => { + scanner.multisig_completed.send(false).unwrap(); + assert!(!is_retirement_block); assert_eq!(block_id, block.id()); assert_eq!(these_outputs, outputs); } - ScannerEvent::Completed(_, _) => { + ScannerEvent::Completed(_, _, _, _) => { panic!("unexpectedly got eventuality completion"); } } // Check the Scanner DB can reload the outputs let mut txn = db.txn(); - assert_eq!( - scanner.ack_up_to_block(&mut txn, key, block.id()).await, - [first_outputs, outputs].concat().to_vec() - ); + assert_eq!(scanner.ack_block(&mut txn, block.id()).await.1, outputs); + scanner.release_lock().await; txn.commit(); } diff --git a/tests/coordinator/src/tests/batch.rs b/tests/coordinator/src/tests/batch.rs index e0257dc3a..bff81c2f9 100644 --- a/tests/coordinator/src/tests/batch.rs +++ b/tests/coordinator/src/tests/batch.rs @@ -22,10 +22,9 @@ use messages::{sign::SignId, SubstrateContext, CoordinatorMessage}; use crate::{*, tests::*}; -pub async fn batch( +pub async fn batch( processors: &mut [Processor], substrate_key: &Zeroizing<::F>, - network_key: &Zeroizing, batch: Batch, ) -> u64 { let mut id = [0; 32]; @@ -213,7 +212,6 @@ pub async fn batch( }, network: batch.batch.network, block: last_serai_block, - key: (C::generator() * **network_key).to_bytes().as_ref().to_vec(), burns: vec![], batches: vec![batch.batch.id], } @@ -257,11 +255,10 @@ async fn batch_test() { } let mut processors = new_processors; - let (substrate_key, network_key) = key_gen::(&mut processors).await; - batch::( + let (substrate_key, _) = key_gen::(&mut processors).await; + batch( &mut processors, &substrate_key, - &network_key, Batch { network: NetworkId::Bitcoin, id: 0, diff --git a/tests/coordinator/src/tests/sign.rs b/tests/coordinator/src/tests/sign.rs index 816caf87c..1a835b205 100644 --- a/tests/coordinator/src/tests/sign.rs +++ b/tests/coordinator/src/tests/sign.rs @@ -232,10 +232,9 @@ async fn sign_test() { let balance = Balance { coin: Coin::Bitcoin, amount }; let coin_block = BlockHash([0x33; 32]); - let block_included_in = batch::( + let block_included_in = batch( &mut processors, &substrate_key, - &network_key, Batch { network: NetworkId::Bitcoin, id: 0, @@ -346,7 +345,6 @@ async fn sign_test() { }, network: NetworkId::Bitcoin, block: last_serai_block.number(), - key: (Secp256k1::generator() * *network_key).to_bytes().to_vec(), burns: vec![OutInstructionWithBalance { instruction: out_instruction.clone(), balance: Balance { coin: Coin::Bitcoin, amount } diff --git a/tests/message-queue/src/lib.rs b/tests/message-queue/src/lib.rs index f1a548440..34f49af79 100644 --- a/tests/message-queue/src/lib.rs +++ b/tests/message-queue/src/lib.rs @@ -47,6 +47,7 @@ pub fn instance( hex::encode((Ristretto::generator() * priv_keys[&NetworkId::Monero]).to_bytes()), ), ("DB_PATH".to_string(), "./message-queue-db".to_string()), + ("RUST_LOG".to_string(), "serai_message_queue=trace,".to_string()), ] .into(), ); diff --git a/tests/processor/src/lib.rs b/tests/processor/src/lib.rs index 8004e81df..a0112429d 100644 --- a/tests/processor/src/lib.rs +++ b/tests/processor/src/lib.rs @@ -54,6 +54,7 @@ pub fn processor_instance( ("NETWORK_RPC_LOGIN".to_string(), format!("{RPC_USER}:{RPC_PASS}")), ("NETWORK_RPC_PORT".to_string(), port.to_string()), ("DB_PATH".to_string(), "./processor-db".to_string()), + ("RUST_LOG".to_string(), "serai_processor=trace,".to_string()), ] .into(), ) diff --git a/tests/processor/src/tests/batch.rs b/tests/processor/src/tests/batch.rs index 71fd2a535..497a270cc 100644 --- a/tests/processor/src/tests/batch.rs +++ b/tests/processor/src/tests/batch.rs @@ -149,7 +149,6 @@ pub(crate) async fn substrate_block( context: _, network: sent_network, block: sent_block, - key: _, burns: _, batches: _, } => { @@ -280,8 +279,6 @@ fn batch_test() { }, network, block: substrate_block_num + u64::from(i), - // TODO: Should we use the network key here? Or should we only use the Ristretto key? - key: key_pair.1.to_vec(), burns: vec![], batches: vec![batch.batch.id], }, diff --git a/tests/processor/src/tests/send.rs b/tests/processor/src/tests/send.rs index 6a32a8ae4..238c28329 100644 --- a/tests/processor/src/tests/send.rs +++ b/tests/processor/src/tests/send.rs @@ -213,8 +213,6 @@ fn send_test() { }, network, block: substrate_block_num, - // TODO: Should we use the network key here? Or should we only use the Ristretto key? - key: key_pair.1.to_vec(), burns: vec![OutInstructionWithBalance { instruction: OutInstruction { address: wallet.address(), data: None }, balance: balance_sent,