feat: enhanced end-to-end tests (#10)

* feat: refactored happy case test, splitting into several reusable functions for other cases * fix: threshould requirement shall allow equality * feat: added faulty party test * fix: resolve linter issue with complex return type * fix: include own weight in threshold checks in update_state * chore: improve update state warning source with comments * feat: added rate limiting and malicious party test * feat: refactored tests - moved ballot tests to integration, created separate mock data module * feat: refactored integration tests, better results analysis * feat: added faulty leader test for clarity * chore: update cargo.toml and readme * chore: fixed clippy check with --all-features * feat: added check for own message receival, warnings * feat: improve config stages timeouts; feature-enabling for integration tests in Cargo.toml * feat: added integration test for large amount of parties, threshold computation function to config * chore: flaky tests in rust ci * fix: threshold ceiling division * chore: testing with nextest in rust workflow * fix: changed scheduling to instant * chore: added security considerations section to readme
distributed-lab · Oct 17, 2024 · a364193 · a364193
1 parent 754e246
commit a364193
Show file tree

Hide file tree

Showing 10 changed files with 588 additions and 358 deletions.
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
@@ -13,6 +13,9 @@ on:
 permissions:
   contents: read
 
+env:
+  CARGO_TERM_COLOR: always
+
 jobs:
   check:
     name: Check
@@ -83,12 +86,14 @@ jobs:
           profile: minimal
           toolchain: ${{ matrix.rust }}
 
-      - name: Run cargo test
-        uses: actions-rs/cargo@v1
-        continue-on-error: false
+      - name: Install Nextest
+        uses: taiki-e/install-action@v2
         with:
-          command: test
-          args: --all-features --verbose
+          tool: nextest
+
+      - name: Run Nextest
+        # We have non-deterministic integration tests, thus adding retries.
+        run: cargo nextest run --all-features --retries 3
 
   lints:
     name: Lints
@@ -133,4 +138,4 @@ jobs:
         continue-on-error: false
         with:
           command: clippy
-          args: --all-targets -- -D warnings
+          args: --all-targets --all-features -- -D warnings
diff --git a/Cargo.toml b/Cargo.toml
@@ -5,7 +5,7 @@ edition = "2021"
 description = "BPCon: A Byzantine Fault-Tolerant Consensus Protocol Implementation in Rust."
 license = "MIT"
 repository = "https://github.com/distributed-lab/bpcon"
-homepage = "https://github.com/distributed-lab/bpcon"
+homepage = "https://github.com/distributed-lab/bpcon#readme"
 documentation = "https://distributed-lab.github.io/bpcon/"
 keywords = ["consensus", "byzantine", "protocol", "distributed-systems", "blockchain"]
 categories = ["algorithms"]
@@ -23,8 +23,13 @@ seeded-random = "^0.6.0"
 thiserror = "^1.0.63"
 
 [features]
-default = ["full"]
-full = ["tokio/full", "rkyv/validation"]
+default = ["tokio/full", "rkyv/validation"]
+test-mocks = []
 
 [dev-dependencies]
 tokio = { version = "^1.39.2", features = ["test-util"] }
+futures = "0.3.30"
+
+[[test]]
+name = "mod"
+required-features = ["test-mocks"]
diff --git a/README.md b/README.md
@@ -25,7 +25,7 @@ bpcon = {version = "0.1.0", git = "https://github.com/distributed-lab/bpcon"}
 This is a core trait, which defines what type are you selecting in your consensus.
 It may be the next block in blockchain, or leader for some operation, or anything else you need.
 
-Below is a simple example, where we will operate on selection for `u64` type. 
+Below is a simple example, where we will operate on selection for `u64` type.
 Using it you may interpret `ID` for leader of distributed operation, for instance.
 
 ```rust
@@ -40,7 +40,7 @@ impl Value for MyValue {}
 
 ### Implement [ValueSelector](https://distributed-lab.github.io/bpcon/bpcon/value/trait.ValueSelector.html) trait
 
-`BPCon` allows you to define specific conditions how proposer (leader) will select value 
+`BPCon` allows you to define specific conditions how proposer (leader) will select value
 and how other members will verify its selection.
 
 Here is a simple example:
@@ -91,11 +91,11 @@ impl ValueSelector<MyValue> for MyValueSelector {
 
 `LeaderElector` trait allows you to define specific conditions, how to select leader for consensus.
 
-__NOTE: it is important to provide deterministic mechanism, 
-because each participant will compute leader for itself 
+__NOTE: it is important to provide deterministic mechanism,
+because each participant will compute leader for itself,
 and in case it is not deterministic, state divergence occurs.__
 
-We also provide ready-to-use 
+We also provide ready-to-use
 [DefaultLeaderElector](https://distributed-lab.github.io/bpcon/bpcon/leader/struct.DefaultLeaderElector.html)
 which is using weighted randomization.
 
@@ -115,21 +115,53 @@ use bpcon::config::BPConConfig;
 let cfg = BPConConfig::with_default_timeouts(vec![1, 1, 1, 1, 1, 1], 4);
 ```
 
-Feel free to explore [config.rs](https://distributed-lab.github.io/bpcon/bpcon/config/struct.BPConConfig.html) 
+Feel free to explore [config.rs](https://distributed-lab.github.io/bpcon/bpcon/config/struct.BPConConfig.html)
 for more information.
 
 ### Create parties
 
-Having `BPConConfig`, `ValueSelector` and `LeaderElector` defined, instantiate your parties. 
+Having `BPConConfig`, `ValueSelector` and `LeaderElector` defined, instantiate your parties.
 Check out [new](https://distributed-lab.github.io/bpcon/bpcon/party/struct.Party.html#method.new)
 method on a `Party` struct.
 
 ### Launch ballot on parties and handle messages
 
-Each party interfaces communication with external system via channels. 
+Each party interfaces communication with external system via channels.
 In a way, you shall propagate outgoing messages to other parties like:
 
 1. Listen for outgoing message using `msg_out_receiver`.
 2. Forward it to other parties using `msg_in_sender`.
 
-We welcome you to check `test_end_to_end_ballot` in `party.rs` for example.
+We welcome you to check our [integration tests](./tests) for examples.
+
+## Security Considerations 🔐
+
+### Categories of parties
+
+In real world applications, we may categorize parties by their behavior to following:
+
+1. Good - party sends messages to other participants based on following events,
+    and correctly receives and processes messages from other parties.
+
+2. Faulty - party has troubles receiving/sending messages.
+    These are simply mitigated by the weighed threshold and redundancy of consensus participants.
+
+3. Malicious - party launches DDoS attack using unbounded sending of messages -
+    to deal with this, we introduce rate-limiting mechanism in accepting messages inside the `Party`,
+    however it is also ❗️ required by integrating 'external' system ❗️, which handles `P2P`, to attest to this, because otherwise receiving channel may get flooded by malicious messages and block messages from other parties.
+    Another way to cause trouble is by sending invalid messages. For this, each party has
+    a set of checks for certain fields like current ballot number, status, etc.
+    Additionally, if the state transition caused by incoming message errored, it does not impact the party in either way.
+
+### Note on the leader 👑
+
+If the `leader` of the ballot is faulty or malicious, the ballot deterministically fails and needs to be relaunched.
+
+### Note on the communication discrepancies 🔇
+
+Each party has a certain period in which it may accept particular messages for a certain stage
+(example: having passed 1a stage, it is open for accepting only 1b messages for 2 seconds).
+These periods are configurable using `BPConConfig`, meaning you can program certain ranges
+to allow slow parties to catch up, while others are waiting, before transiting to the next stage.
+
+In addition it is possible to schedule parties to launch at specific absolute time.
diff --git a/src/config.rs b/src/config.rs
@@ -1,6 +1,7 @@
 //! Definitions central to BPCon configuration.
 
 use std::time::Duration;
+use tokio::time::Instant;
 
 /// Configuration structure for BPCon.
 ///
@@ -18,15 +19,15 @@ pub struct BPConConfig {
 
     /// Threshold weight to define BFT quorum.
     ///
-    /// This value must be greater than 2/3 of the total weight of all parties combined.
+    /// This value must be greater than or equal to 2/3 of the total weight of all parties combined.
     /// The quorum is the minimum weight required to make decisions in the BPCon protocol.
     pub threshold: u128,
 
-    /// Timeout before the ballot is launched.
+    /// Absolute time, at which party begins to work.
     ///
     /// This timeout differs from `launch1a_timeout` as it applies to a distinct status
     /// and does not involve listening to external events and messages.
-    pub launch_timeout: Duration,
+    pub launch_at: Instant,
 
     /// Timeout before the 1a stage is launched.
     ///
@@ -96,14 +97,47 @@ impl BPConConfig {
             party_weights,
             threshold,
             // TODO: deduce actually good defaults.
-            launch_timeout: Duration::from_secs(0),
-            launch1a_timeout: Duration::from_secs(5),
-            launch1b_timeout: Duration::from_secs(10),
-            launch2a_timeout: Duration::from_secs(15),
-            launch2av_timeout: Duration::from_secs(20),
-            launch2b_timeout: Duration::from_secs(25),
-            finalize_timeout: Duration::from_secs(30),
-            grace_period: Duration::from_secs(1),
+            launch_at: Instant::now(),
+            launch1a_timeout: Duration::from_millis(200),
+            launch1b_timeout: Duration::from_millis(400),
+            launch2a_timeout: Duration::from_millis(600),
+            launch2av_timeout: Duration::from_millis(800),
+            launch2b_timeout: Duration::from_millis(1000),
+            finalize_timeout: Duration::from_millis(1200),
+            grace_period: Duration::from_millis(0),
         }
     }
+
+    /// Compute the Byzantine Fault Tolerance (BFT) threshold for the consensus protocol.
+    ///
+    /// This function calculates the minimum weight required to achieve a BFT quorum.
+    /// In BFT systems, consensus is typically reached when more than two-thirds
+    /// of the total weight is gathered from non-faulty parties.
+    ///
+    /// # Parameters
+    ///
+    /// - `party_weights`: A vector of weights corresponding to each party involved in the consensus.
+    ///   These weights represent the voting power or influence of each party in the protocol.
+    ///
+    /// # Returns
+    ///
+    /// The BFT threshold as a `u128` value, which represents the minimum total weight
+    /// required to achieve consensus in a Byzantine Fault Tolerant system. This is calculated
+    /// as two-thirds of the total party weights.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use bpcon::config::BPConConfig;
+    ///
+    /// let party_weights = vec![10, 20, 30, 40, 50];
+    /// let threshold = BPConConfig::compute_bft_threshold(party_weights);
+    /// assert_eq!(threshold, 100);
+    /// ```
+    ///
+    /// In the example above, the total weight is 150, and the BFT threshold is calculated as `2/3 * 150 = 100`.
+    pub fn compute_bft_threshold(party_weights: Vec<u64>) -> u128 {
+        let total_weight: u128 = party_weights.iter().map(|&w| w as u128).sum();
+        (2 * total_weight + 2) / 3 // adding 2 to keep division ceiling.
+    }
 }
diff --git a/src/leader.rs b/src/leader.rs
@@ -155,42 +155,50 @@ impl<V: Value, VS: ValueSelector<V>> LeaderElector<V, VS> for DefaultLeaderElect
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::party::tests::{default_config, default_party};
+    use crate::config::BPConConfig;
+    use crate::test_mocks::MockParty;
     use rand::Rng;
     use std::thread;
     use std::time::Duration;
 
     #[test]
     fn test_default_leader_elector_determinism() {
-        let party = default_party();
+        let party = MockParty::default();
         let elector = DefaultLeaderElector::new();
 
-        let leader1 = elector.elect_leader(&party).unwrap();
+        const ITERATIONS: usize = 10;
 
-        // Test multiple iterations to ensure the leader remains the same
-        for i in 2..=10 {
-            let leader = elector.elect_leader(&party).unwrap();
-            assert_eq!(
-                leader1, leader,
-                "Leaders should be consistent on repeated calls (iteration {})",
-                i
-            );
+        // Collect multiple leaders
+        let leaders: Vec<_> = (0..ITERATIONS)
+            .map(|_| elector.elect_leader(&party).unwrap())
+            .collect();
+
+        // Match the first leader and ensure all others are the same
+        match &leaders[..] {
+            [first_leader, rest @ ..] => {
+                assert!(
+                    rest.iter().all(|leader| leader == first_leader),
+                    "All leaders should be the same across multiple iterations."
+                );
+            }
+            _ => panic!("No leaders were collected!"),
         }
     }
 
     #[test]
     fn test_default_leader_elector_fail_with_zero_weights() {
-        let mut party = default_party();
-        let mut cfg = default_config();
-        cfg.party_weights = vec![0, 0, 0];
+        let mut party = MockParty::default();
+        let cfg = BPConConfig {
+            party_weights: vec![0, 0, 0],
+            ..Default::default()
+        };
         party.cfg = cfg;
-
         let elector = DefaultLeaderElector::new();
 
-        match elector.elect_leader(&party) {
-            Err(_) => {} // This is the expected behavior
-            _ => panic!("Expected DefaultLeaderElectorError::ZeroWeightSum"),
-        }
+        assert!(
+            elector.elect_leader(&party).is_err(),
+            "Expected DefaultLeaderElectorError::ZeroWeightSum"
+        );
     }
 
     fn debug_hash_to_range_new(seed: u64, range: u64) -> u64 {
@@ -218,7 +226,7 @@ mod tests {
     }
 
     #[test]
-    #[ignore] // Ignoring since it takes a while to run
+    #[ignore = "takes too long to run, launch manually"]
     fn test_hash_range_random() {
         // Test the uniform distribution
 

diff --git a/src/lib.rs b/src/lib.rs
@@ -3,4 +3,6 @@ pub mod error;
 pub mod leader;
 pub mod message;
 pub mod party;
+#[cfg(any(test, feature = "test-mocks"))]
+pub mod test_mocks;
 pub mod value;
diff --git a/src/message.rs b/src/message.rs
@@ -30,7 +30,7 @@ pub struct MessageRouting {
 ///
 /// These message types represent the various stages of the BPCon consensus protocol,
 /// each corresponding to a specific phase in the process.
-#[derive(PartialEq, Eq, Debug, Copy, Clone)]
+#[derive(PartialEq, Eq, Debug, Copy, Clone, Hash)]
 pub enum ProtocolMessage {
     Msg1a,
     Msg1b,