diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index cd906eab..a70c6225 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -32,13 +32,11 @@ jobs: uses: actions/cache@v4 with: path: | - ~/.cargo/registry - ~/.cargo/git - ~/.cargo/bin + ~/.cargo target - key: ${{ runner.os }}-bench-${{ hashFiles('**/Cargo.lock') }} + key: ${{ runner.os }}-bench-0.1.1-${{ hashFiles('**/Cargo.lock') }} - name: Install canbench - run: cargo install canbench --force + run: cargo install canbench - name: Run perf for base branch if: false run: | diff --git a/rust/bench/bench.rs b/rust/bench/bench.rs index 6947096e..43b2a4c8 100644 --- a/rust/bench/bench.rs +++ b/rust/bench/bench.rs @@ -1,16 +1,20 @@ use canbench_rs::{bench, bench_fn, bench_scope, BenchResult}; -use candid::{CandidType, Decode, Deserialize, Encode, Int, Nat}; +use candid::{CandidType, Decode, DecoderConfig, Deserialize, Encode, Int, Nat}; use std::collections::BTreeMap; #[allow(clippy::all)] mod nns; const N: usize = 2097152; +const COST: usize = 20_000_000; +const SKIP: usize = 10_000; #[bench(raw)] fn blob() -> BenchResult { use serde_bytes::ByteBuf; let vec: Vec = vec![0x61; N]; + let mut config = DecoderConfig::new(); + config.set_decoding_quota(COST).set_skipping_quota(SKIP); bench_fn(|| { let bytes = { let _p = bench_scope("1. Encoding"); @@ -18,7 +22,7 @@ fn blob() -> BenchResult { }; { let _p = bench_scope("2. Decoding"); - Decode!(&bytes, ByteBuf).unwrap(); + Decode!([config]; &bytes, ByteBuf).unwrap(); } }) } @@ -27,6 +31,8 @@ fn blob() -> BenchResult { fn text() -> BenchResult { let vec: Vec = vec![0x61; N]; let text = String::from_utf8(vec).unwrap(); + let mut config = DecoderConfig::new(); + config.set_decoding_quota(COST).set_skipping_quota(SKIP); bench_fn(|| { let bytes = { let _p = bench_scope("1. Encoding"); @@ -34,14 +40,16 @@ fn text() -> BenchResult { }; { let _p = bench_scope("2. Decoding"); - Decode!(&bytes, String).unwrap(); + Decode!([config]; &bytes, String).unwrap(); } }) } #[bench(raw)] -fn vec_int64() -> BenchResult { - let vec: Vec = vec![-1; N]; +fn vec_int16() -> BenchResult { + let vec: Vec = vec![-1; N]; + let mut config = DecoderConfig::new(); + config.set_decoding_quota(COST).set_skipping_quota(SKIP); bench_fn(|| { let bytes = { let _p = bench_scope("1. Encoding"); @@ -49,13 +57,15 @@ fn vec_int64() -> BenchResult { }; { let _p = bench_scope("2. Decoding"); - Decode!(&bytes, Vec).unwrap(); + Decode!([config]; &bytes, Vec).unwrap(); } }) } #[bench(raw)] fn btreemap() -> BenchResult { + let mut config = DecoderConfig::new(); + config.set_decoding_quota(COST).set_skipping_quota(SKIP); let n = 1048576; let map: BTreeMap = (0u32..n as u32) .map(|i| (i.to_string(), Nat::from(i))) @@ -67,13 +77,15 @@ fn btreemap() -> BenchResult { }; { let _p = bench_scope("2. Decoding"); - Decode!(&bytes, BTreeMap).unwrap(); + Decode!([config]; &bytes, BTreeMap).unwrap(); } }) } #[bench(raw)] fn option_list() -> BenchResult { + let mut config = DecoderConfig::new(); + config.set_decoding_quota(COST).set_skipping_quota(SKIP); let n = 2048; #[derive(CandidType, Deserialize)] struct List { @@ -93,13 +105,15 @@ fn option_list() -> BenchResult { }; { let _p = bench_scope("2. Decoding"); - Decode!(&bytes, Option>).unwrap(); + Decode!([config]; &bytes, Option>).unwrap(); } }) } #[bench(raw)] fn variant_list() -> BenchResult { + let mut config = DecoderConfig::new(); + config.set_decoding_quota(COST).set_skipping_quota(SKIP); let n = 2048; #[derive(CandidType, Deserialize)] enum VariantList { @@ -116,7 +130,7 @@ fn variant_list() -> BenchResult { }; { let _p = bench_scope("2. Decoding"); - Decode!(&bytes, VariantList).unwrap(); + Decode!([config]; &bytes, VariantList).unwrap(); } }) } @@ -124,6 +138,8 @@ fn variant_list() -> BenchResult { #[bench(raw)] fn nns() -> BenchResult { use candid_parser::utils::CandidSource; + let mut config = DecoderConfig::new(); + config.set_decoding_quota(COST).set_skipping_quota(SKIP); let nns_did = CandidSource::Text(include_str!("./nns.did")); let motion_proposal = r#" ( @@ -207,16 +223,20 @@ fn nns() -> BenchResult { }; { let _p = bench_scope("2. Decoding"); - Decode!(&bytes, nns::ManageNeuron).unwrap(); + Decode!([config]; &bytes, nns::ManageNeuron).unwrap(); } }) } #[bench(raw)] fn extra_args() -> BenchResult { - let bytes = hex::decode("4449444c036c01d6fca702016d026c00010080ade204").unwrap(); + let mut config = DecoderConfig::new(); + config.set_skipping_quota(SKIP); + let vec_null = hex::decode("4449444c036c01d6fca702016d026c00010080ade204").unwrap(); + let vec_opt_record = hex::decode("4449444c176c02017f027f6c02010002006c02000101016c02000201026c02000301036c02000401046c02000501056c02000601066c02000701076c02000801086c02000901096c02000a010a6c02000b010b6c02000c010c6c02000d020d6c02000e010e6c02000f010f6c02001001106c02001101116c02001201126c02001301136e146d150116050101010101").unwrap(); bench_fn(|| { - let _ = Decode!(&bytes); + assert!(Decode!([config]; &vec_null).is_err()); + assert!(Decode!([config]; &vec_opt_record).is_err()); }) } diff --git a/rust/candid/src/de.rs b/rust/candid/src/de.rs index 6a8dd624..87c3e787 100644 --- a/rust/candid/src/de.rs +++ b/rust/candid/src/de.rs @@ -28,26 +28,19 @@ pub struct IDLDeserialize<'de> { impl<'de> IDLDeserialize<'de> { /// Create a new deserializer with IDL binary message. pub fn new(bytes: &'de [u8]) -> Result { - let de = Deserializer::from_bytes(bytes).with_context(|| { - if bytes.len() <= 500 { - format!("Cannot parse header {}", &hex::encode(bytes)) - } else { - "Cannot parse header".to_string() - } - })?; - Ok(IDLDeserialize { de }) + let config = DecoderConfig::new(); + Self::new_with_config(bytes, &config) } /// Create a new deserializer with IDL binary message. The config is used to adjust some parameters in the deserializer. - pub fn new_with_config(bytes: &'de [u8], config: Config) -> Result { - let mut de = Deserializer::from_bytes(bytes).with_context(|| { + pub fn new_with_config(bytes: &'de [u8], config: &DecoderConfig) -> Result { + let mut de = Deserializer::from_bytes(bytes, config).with_context(|| { if config.full_error_message || bytes.len() <= 500 { format!("Cannot parse header {}", &hex::encode(bytes)) } else { "Cannot parse header".to_string() } })?; - de.zero_sized_values = config.zero_sized_values; - de.full_error_message = config.full_error_message; + de.add_cost(de.input.position() as usize * 4)?; Ok(IDLDeserialize { de }) } /// Deserialize one value from deserializer. @@ -83,7 +76,8 @@ impl<'de> IDLDeserialize<'de> { self.de.expect_type = expected_type; self.de.wire_type = TypeInner::Reserved.into(); return T::deserialize(&mut self.de); - } else if self.de.full_error_message || text_size(&expected_type, MAX_TYPE_LEN).is_ok() + } else if self.de.config.full_error_message + || text_size(&expected_type, MAX_TYPE_LEN).is_ok() { return Err(Error::msg(format!( "No more values on the wire, the expected type {expected_type} is not opt, null, or reserved" @@ -103,7 +97,7 @@ impl<'de> IDLDeserialize<'de> { self.de.wire_type = ty.clone(); let mut v = T::deserialize(&mut self.de).with_context(|| { - if self.de.full_error_message + if self.de.config.full_error_message || (text_size(&ty, MAX_TYPE_LEN).is_ok() && text_size(&expected_type, MAX_TYPE_LEN).is_ok()) { @@ -112,7 +106,7 @@ impl<'de> IDLDeserialize<'de> { format!("Fail to decode argument {ind}") } }); - if self.de.full_error_message { + if self.de.config.full_error_message { v = v.with_context(|| self.de.dump_state()); } Ok(v?) @@ -122,14 +116,14 @@ impl<'de> IDLDeserialize<'de> { self.de.types.is_empty() } /// Return error if there are unprocessed bytes in the input. - pub fn done(mut self) -> Result<()> { + pub fn done(&mut self) -> Result<()> { while !self.is_done() { self.get_value::()?; } let ind = self.de.input.position() as usize; let rest = &self.de.input.get_ref()[ind..]; if !rest.is_empty() { - if !self.de.full_error_message { + if !self.de.config.full_error_message { return Err(Error::msg("Trailing value after finishing deserialization")); } else { return Err(anyhow!(self.de.dump_state())) @@ -138,29 +132,109 @@ impl<'de> IDLDeserialize<'de> { } Ok(()) } + /// Return the current DecoderConfig, mainly to extract the remaining quota. + pub fn get_config(&self) -> DecoderConfig { + self.de.config.clone() + } } -pub struct Config { - zero_sized_values: usize, +#[derive(Clone)] +/// Config the deserialization quota, used to prevent spending too much time in decoding malicious payload. +pub struct DecoderConfig { + pub decoding_quota: Option, + pub skipping_quota: Option, full_error_message: bool, } -impl Config { +impl DecoderConfig { + /// Creates a config with no quota. This allows developers to handle large Candid + /// data internally, e.g., persisting states to stable memory. + /// When using Candid in canister endpoints, we recommend setting the quota to prevent malicious payload. pub fn new() -> Self { Self { - zero_sized_values: 2_000_000, + decoding_quota: None, + skipping_quota: None, + #[cfg(not(target_arch = "wasm32"))] full_error_message: true, + #[cfg(target_arch = "wasm32")] + full_error_message: false, } } - pub fn set_zero_sized_values(&mut self, n: usize) -> &mut Self { - self.zero_sized_values = n; + /// Limit the total amount of work the deserailizer can perform. Deserialization errors out when the limit is reached. + /// If your canister endpoint has variable-length data types and expects that the valid data will be small, + /// you can set this limit to prevent spending too much time decoding invalid data. + /// + /// The cost of decoding a message = 4 * the byte length of the header (the byte before the value part) + the cost of decoding each value. + /// + /// The cost of decoding a value is roughly defined as follows + /// (it's not precise because the cost also depends on how Rust data types are defined), + /// ```text + /// C : -> -> nat + /// C(n : nat) = |leb128(n)| + /// C(i : int) = |sleb128(i)| + /// C(n : nat) = N / 8 + /// C(i : int) = N / 8 + /// C(z : float) = N / 8 + /// C(b : bool) = 1 + /// C(t : text) = 1 + |t| + /// C(_ : null) = 1 + /// C(_ : reserved) = 1 + /// C(_ : empty) = undefined + /// + /// C : -> -> nat + /// C(null : opt ) = 2 + /// C(?v : opt ) = 2 + C(v : ) + /// C(v^N : vec ) = 2 + 3 * N + sum_i C(v[i] : ) + /// C(kv* : record {*}) = 2 + sum_i C(kv : *[i]) + /// C(kv : variant {*}) = 2 + C(kv : *[i]) + /// + /// C : (, ) -> -> nat + /// C((k,v) : k:) = 7 + |k| + C(v : ) // record field + /// C((k,v) : k:) = 5 + |k| + C(v : ) // variant field + /// + /// C : -> -> nat + /// C(id(v*) : service ) = 2 + |v*| + |type table| + /// C((id(v*),name) : func ) = 4 + |v*| + |name| + |type table| + /// C(id(v*) : principal) = 1 + |v*| + /// + /// When a value `v : t` on the wire is skipped, due to being extra arguments, extra fields and mismatched option types, + /// we apply a 50x penalty on `C(v : t)` in the decoding cost. + /// ``` + pub fn set_decoding_quota(&mut self, n: usize) -> &mut Self { + self.decoding_quota = Some(n); self } + /// Limit the amount of work for skipping unneeded data on the wire. This includes extra arguments, extra fields + /// and mismatched option values. Decoding values to `IDLValue` is also counted towards this limit. + /// For the cost model, please refer to the docs in [`set_decoding_quota`](#method.set_decoding_quota). + /// Note that unlike the decoding_quota, we will not apply the 50x penalty for skipped values in this counter. + /// When using Candid in canister endpoints, it's strongly encouraged to set this quota to a small value, e.g., 10_000. + pub fn set_skipping_quota(&mut self, n: usize) -> &mut Self { + self.skipping_quota = Some(n); + self + } + /// When set to false, error message only displays the concrete type when the type is small. + /// The error message also doesn't include the decoding states. + /// When set to true, error message always shows the full type and decoding states. pub fn set_full_error_message(&mut self, n: bool) -> &mut Self { self.full_error_message = n; self } + /// Given the original config, compute the decoding cost + pub fn compute_cost(&self, original: &Self) -> Self { + let decoding_quota = original + .decoding_quota + .and_then(|n| Some(n - self.decoding_quota?)); + let skipping_quota = original + .skipping_quota + .and_then(|n| Some(n - self.skipping_quota?)); + Self { + decoding_quota, + skipping_quota, + full_error_message: original.full_error_message, + } + } } -impl Default for Config { +impl Default for DecoderConfig { fn default() -> Self { Self::new() } @@ -236,14 +310,13 @@ struct Deserializer<'de> { // Indicates whether to deserialize with IDLValue. // It only affects the field id generation in enum type. is_untyped: bool, - zero_sized_values: usize, - full_error_message: bool, + config: DecoderConfig, #[cfg(not(target_arch = "wasm32"))] recursion_depth: u16, } impl<'de> Deserializer<'de> { - fn from_bytes(bytes: &'de [u8]) -> Result { + fn from_bytes(bytes: &'de [u8], config: &DecoderConfig) -> Result { let mut reader = Cursor::new(bytes); let header = Header::read(&mut reader)?; let (env, types) = header.to_types()?; @@ -256,14 +329,7 @@ impl<'de> Deserializer<'de> { gamma: Gamma::default(), field_name: None, is_untyped: false, - #[cfg(not(target_arch = "wasm32"))] - zero_sized_values: 2_000_000, - #[cfg(target_arch = "wasm32")] - zero_sized_values: 0, - #[cfg(not(target_arch = "wasm32"))] - full_error_message: true, - #[cfg(target_arch = "wasm32")] - full_error_message: false, + config: config.clone(), #[cfg(not(target_arch = "wasm32"))] recursion_depth: 0, }) @@ -299,6 +365,7 @@ impl<'de> Deserializer<'de> { Ok(res) } fn check_subtype(&mut self) -> Result<()> { + self.add_cost(self.table.0.len())?; subtype_with_config( OptReport::Silence, &mut self.gamma, @@ -307,7 +374,7 @@ impl<'de> Deserializer<'de> { &self.expect_type, ) .with_context(|| { - if self.full_error_message + if self.config.full_error_message || (text_size(&self.wire_type, MAX_TYPE_LEN).is_ok() && text_size(&self.expect_type, MAX_TYPE_LEN).is_ok()) { @@ -327,26 +394,35 @@ impl<'de> Deserializer<'de> { self.expect_type.as_ref(), TypeInner::Var(_) | TypeInner::Knot(_) ) { + self.add_cost(1)?; self.expect_type = self.table.trace_type(&self.expect_type)?; } if matches!( self.wire_type.as_ref(), TypeInner::Var(_) | TypeInner::Knot(_) ) { + self.add_cost(1)?; self.wire_type = self.table.trace_type(&self.wire_type)?; } Ok(()) } - fn is_zero_sized_type(&self, t: &Type) -> bool { - match t.as_ref() { - TypeInner::Null | TypeInner::Reserved => true, - TypeInner::Record(fs) => fs.iter().all(|f| { - let t = self.table.trace_type(&f.ty).unwrap(); - // recursive records have been replaced with empty already, it's safe to call without memoization. - self.is_zero_sized_type(&t) - }), - _ => false, + fn add_cost(&mut self, cost: usize) -> Result<()> { + if let Some(n) = self.config.decoding_quota { + let cost = if self.is_untyped { cost * 50 } else { cost }; + if n < cost { + return Err(Error::msg("Decoding cost exceeds the limit")); + } + self.config.decoding_quota = Some(n - cost); + } + if self.is_untyped { + if let Some(n) = self.config.skipping_quota { + if n < cost { + return Err(Error::msg("Skipping cost exceeds the limit")); + } + self.config.skipping_quota = Some(n - cost); + } } + Ok(()) } // Should always call set_field_name to set the field_name. After deserialize_identifier // processed the field_name, field_name will be reset to None. @@ -371,11 +447,13 @@ impl<'de> Deserializer<'de> { self.unroll_type()?; assert!(*self.expect_type == TypeInner::Int); let mut bytes = vec![0u8]; + let pos = self.input.position(); let int = match self.wire_type.as_ref() { TypeInner::Int => Int::decode(&mut self.input).map_err(Error::msg)?, TypeInner::Nat => Int(Nat::decode(&mut self.input).map_err(Error::msg)?.0.into()), t => return Err(Error::subtype(format!("{t} cannot be deserialized to int"))), }; + self.add_cost((self.input.position() - pos) as usize)?; bytes.extend_from_slice(&int.0.to_signed_bytes_le()); visitor.visit_byte_buf(bytes) } @@ -391,7 +469,9 @@ impl<'de> Deserializer<'de> { "nat" ); let mut bytes = vec![1u8]; + let pos = self.input.position(); let nat = Nat::decode(&mut self.input).map_err(Error::msg)?; + self.add_cost((self.input.position() - pos) as usize)?; bytes.extend_from_slice(&nat.0.to_bytes_le()); visitor.visit_byte_buf(bytes) } @@ -405,14 +485,16 @@ impl<'de> Deserializer<'de> { "principal" ); let mut bytes = vec![2u8]; - let id = PrincipalBytes::read(&mut self.input)?.inner; - bytes.extend_from_slice(&id); + let id = PrincipalBytes::read(&mut self.input)?; + self.add_cost(id.len as usize + 1)?; + bytes.extend_from_slice(&id.inner); visitor.visit_byte_buf(bytes) } fn deserialize_reserved<'a, V>(&'a mut self, visitor: V) -> Result where V: Visitor<'de>, { + self.add_cost(1)?; let bytes = vec![3u8]; visitor.visit_byte_buf(bytes) } @@ -423,8 +505,9 @@ impl<'de> Deserializer<'de> { self.unroll_type()?; self.check_subtype()?; let mut bytes = vec![4u8]; - let id = PrincipalBytes::read(&mut self.input)?.inner; - bytes.extend_from_slice(&id); + let id = PrincipalBytes::read(&mut self.input)?; + self.add_cost(id.len as usize + 1)?; + bytes.extend_from_slice(&id.inner); visitor.visit_byte_buf(bytes) } fn deserialize_function<'a, V>(&'a mut self, visitor: V) -> Result @@ -437,13 +520,14 @@ impl<'de> Deserializer<'de> { return Err(Error::msg("Opaque reference not supported")); } let mut bytes = vec![5u8]; - let id = PrincipalBytes::read(&mut self.input)?.inner; + let id = PrincipalBytes::read(&mut self.input)?; let len = Len::read(&mut self.input)?.0; let meth = self.borrow_bytes(len)?; + self.add_cost(id.len as usize + len + 3)?; // TODO find a better way leb128::write::unsigned(&mut bytes, len as u64)?; bytes.extend_from_slice(meth); - bytes.extend_from_slice(&id); + bytes.extend_from_slice(&id.inner); visitor.visit_byte_buf(bytes) } fn deserialize_blob<'a, V>(&'a mut self, visitor: V) -> Result @@ -456,6 +540,7 @@ impl<'de> Deserializer<'de> { "blob" ); let len = Len::read(&mut self.input)?.0; + self.add_cost(len + 1)?; let blob = self.borrow_bytes(len)?; let mut bytes = Vec::with_capacity(len + 1); bytes.push(6u8); @@ -477,6 +562,7 @@ impl<'de> Deserializer<'de> { V: Visitor<'de>, { let len = Len::read(&mut self.input)?.0 as u64; + self.add_cost(len as usize + 1)?; Len::read(&mut self.input)?; let slice_len = self.input.get_ref().len() as u64; let pos = self.input.position(); @@ -516,6 +602,9 @@ impl<'de> Deserializer<'de> { Ok(v) } Err(Error::Subtype(_)) => { + // Remember the backtracking cost + self.config = self_clone.config; + self.add_cost(10)?; self.deserialize_ignored_any(serde::de::IgnoredAny)?; visitor.visit_none() } @@ -525,12 +614,13 @@ impl<'de> Deserializer<'de> { } macro_rules! primitive_impl { - ($ty:ident, $type:expr, $($value:tt)*) => { + ($ty:ident, $type:expr, $cost:literal, $($value:tt)*) => { paste::item! { fn [](self, visitor: V) -> Result where V: Visitor<'de> { self.unroll_type()?; check!(*self.expect_type == $type && *self.wire_type == $type, stringify!($type)); + self.add_cost($cost)?; let val = self.input.$($value)*().map_err(|_| Error::msg(format!("Cannot read {} value", stringify!($type))))?; visitor.[](val) } @@ -604,16 +694,16 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { v } - primitive_impl!(i8, TypeInner::Int8, read_i8); - primitive_impl!(i16, TypeInner::Int16, read_i16::); - primitive_impl!(i32, TypeInner::Int32, read_i32::); - primitive_impl!(i64, TypeInner::Int64, read_i64::); - primitive_impl!(u8, TypeInner::Nat8, read_u8); - primitive_impl!(u16, TypeInner::Nat16, read_u16::); - primitive_impl!(u32, TypeInner::Nat32, read_u32::); - primitive_impl!(u64, TypeInner::Nat64, read_u64::); - primitive_impl!(f32, TypeInner::Float32, read_f32::); - primitive_impl!(f64, TypeInner::Float64, read_f64::); + primitive_impl!(i8, TypeInner::Int8, 1, read_i8); + primitive_impl!(i16, TypeInner::Int16, 2, read_i16::); + primitive_impl!(i32, TypeInner::Int32, 4, read_i32::); + primitive_impl!(i64, TypeInner::Int64, 8, read_i64::); + primitive_impl!(u8, TypeInner::Nat8, 1, read_u8); + primitive_impl!(u16, TypeInner::Nat16, 2, read_u16::); + primitive_impl!(u32, TypeInner::Nat32, 4, read_u32::); + primitive_impl!(u64, TypeInner::Nat64, 8, read_u64::); + primitive_impl!(f32, TypeInner::Float32, 4, read_f32::); + primitive_impl!(f64, TypeInner::Float64, 8, read_f64::); fn is_human_readable(&self) -> bool { false @@ -625,6 +715,7 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { use crate::types::leb128::{decode_int, decode_nat}; self.unroll_type()?; assert!(*self.expect_type == TypeInner::Int); + self.add_cost(16)?; let value: i128 = match self.wire_type.as_ref() { TypeInner::Int => decode_int(&mut self.input)?, TypeInner::Nat => i128::try_from(decode_nat(&mut self.input)?) @@ -642,6 +733,7 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { *self.expect_type == TypeInner::Nat && *self.wire_type == TypeInner::Nat, "nat" ); + self.add_cost(16)?; let value = crate::types::leb128::decode_nat(&mut self.input)?; visitor.visit_u128(value) } @@ -655,6 +747,7 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { && matches!(*self.wire_type, TypeInner::Null | TypeInner::Reserved), "unit" ); + self.add_cost(1)?; visitor.visit_unit() } fn deserialize_bool(self, visitor: V) -> Result @@ -666,6 +759,7 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { *self.expect_type == TypeInner::Bool && *self.wire_type == TypeInner::Bool, "bool" ); + self.add_cost(1)?; let res = BoolValue::read(&mut self.input)?; visitor.visit_bool(res.0) } @@ -679,6 +773,7 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { "text" ); let len = Len::read(&mut self.input)?.0; + self.add_cost(len + 1)?; let bytes = self.borrow_bytes(len)?.to_owned(); let value = String::from_utf8(bytes).map_err(Error::msg)?; visitor.visit_string(value) @@ -693,6 +788,7 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { "text" ); let len = Len::read(&mut self.input)?.0; + self.add_cost(len + 1)?; let slice = self.borrow_bytes(len)?; let value: &str = std::str::from_utf8(slice).map_err(Error::msg)?; visitor.visit_borrowed_str(value) @@ -701,12 +797,14 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { where V: Visitor<'de>, { + self.add_cost(1)?; self.deserialize_unit(visitor) } fn deserialize_newtype_struct(self, _name: &'static str, visitor: V) -> Result where V: Visitor<'de>, { + self.add_cost(1)?; visitor.visit_newtype_struct(self) } fn deserialize_option(self, visitor: V) -> Result @@ -714,6 +812,7 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { V: Visitor<'de>, { self.unroll_type()?; + self.add_cost(1)?; match (self.wire_type.as_ref(), self.expect_type.as_ref()) { (TypeInner::Null | TypeInner::Reserved, TypeInner::Opt(_)) => visitor.visit_none(), (TypeInner::Opt(t1), TypeInner::Opt(t2)) => { @@ -750,17 +849,12 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { { check_recursion! { self.unroll_type()?; + self.add_cost(1)?; match (self.expect_type.as_ref(), self.wire_type.as_ref()) { (TypeInner::Vec(e), TypeInner::Vec(w)) => { let expect = e.clone(); let wire = self.table.trace_type(w)?; let len = Len::read(&mut self.input)?.0; - if self.is_zero_sized_type(&wire) { - if self.zero_sized_values < len { - return Err(Error::msg("vec length of zero sized values too large")); - } - self.zero_sized_values -= len; - } visitor.visit_seq(Compound::new(self, Style::Vector { len, expect, wire })) } (TypeInner::Record(e), TypeInner::Record(w)) => { @@ -789,6 +883,7 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { "vec nat8" ); let len = Len::read(&mut self.input)?.0; + self.add_cost(len + 1)?; let bytes = self.borrow_bytes(len)?.to_owned(); visitor.visit_byte_buf(bytes) } @@ -798,6 +893,7 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { TypeInner::Principal => self.deserialize_principal(visitor), TypeInner::Vec(t) if **t == TypeInner::Nat8 => { let len = Len::read(&mut self.input)?.0; + self.add_cost(len + 1)?; let slice = self.borrow_bytes(len)?; visitor.visit_borrowed_bytes(slice) } @@ -810,6 +906,7 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { { check_recursion! { self.unroll_type()?; + self.add_cost(1)?; match (self.expect_type.as_ref(), self.wire_type.as_ref()) { (TypeInner::Vec(e), TypeInner::Vec(w)) => { let e = self.table.trace_type(e)?; @@ -848,6 +945,7 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { V: Visitor<'de>, { check_recursion! { + self.add_cost(1)?; self.deserialize_seq(visitor) } } @@ -861,6 +959,7 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { V: Visitor<'de>, { check_recursion! { + self.add_cost(1)?; self.deserialize_seq(visitor) } } @@ -875,6 +974,7 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { { check_recursion! { self.unroll_type()?; + self.add_cost(1)?; match (self.expect_type.as_ref(), self.wire_type.as_ref()) { (TypeInner::Record(e), TypeInner::Record(w)) => { let expect = e.clone().into(); @@ -898,6 +998,7 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { { check_recursion! { self.unroll_type()?; + self.add_cost(1)?; match (self.expect_type.as_ref(), self.wire_type.as_ref()) { (TypeInner::Variant(e), TypeInner::Variant(w)) => { let index = Len::read(&mut self.input)?.0; @@ -926,8 +1027,14 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { { match self.field_name.take() { Some(l) => match l.as_ref() { - Label::Named(name) => visitor.visit_string(name.to_string()), - Label::Id(hash) | Label::Unnamed(hash) => visitor.visit_u32(*hash), + Label::Named(name) => { + self.add_cost(name.len())?; + visitor.visit_string(name.to_string()) + } + Label::Id(hash) | Label::Unnamed(hash) => { + self.add_cost(4)?; + visitor.visit_u32(*hash) + } }, None => assert!(false), } @@ -978,6 +1085,7 @@ impl<'de, 'a> de::SeqAccess<'de> for Compound<'a, 'de> { where T: de::DeserializeSeed<'de>, { + self.de.add_cost(3)?; match self.style { Style::Vector { ref mut len, @@ -1020,6 +1128,7 @@ impl<'de, 'a> de::MapAccess<'de> for Compound<'a, 'de> { where K: de::DeserializeSeed<'de>, { + self.de.add_cost(4)?; match self.style { Style::Struct { ref mut expect, @@ -1093,11 +1202,15 @@ impl<'de, 'a> de::MapAccess<'de> for Compound<'a, 'de> { { match &self.style { Style::Map { expect, wire, .. } => { + self.de.add_cost(3)?; self.de.expect_type = expect.1.clone(); self.de.wire_type = wire.1.clone(); seed.deserialize(&mut *self.de) } - _ => seed.deserialize(&mut *self.de), + _ => { + self.de.add_cost(1)?; + seed.deserialize(&mut *self.de) + } } } } @@ -1110,6 +1223,7 @@ impl<'de, 'a> de::EnumAccess<'de> for Compound<'a, 'de> { where V: de::DeserializeSeed<'de>, { + self.de.add_cost(4)?; match &self.style { Style::Enum { expect, wire } => { self.de.expect_type = expect.ty.clone(); @@ -1143,6 +1257,7 @@ impl<'de, 'a> de::VariantAccess<'de> for Compound<'a, 'de> { *self.de.expect_type == TypeInner::Null && *self.de.wire_type == TypeInner::Null, "unit_variant" ); + self.de.add_cost(1)?; Ok(()) } @@ -1150,6 +1265,7 @@ impl<'de, 'a> de::VariantAccess<'de> for Compound<'a, 'de> { where T: de::DeserializeSeed<'de>, { + self.de.add_cost(1)?; seed.deserialize(self.de) } @@ -1157,6 +1273,7 @@ impl<'de, 'a> de::VariantAccess<'de> for Compound<'a, 'de> { where V: Visitor<'de>, { + self.de.add_cost(1)?; de::Deserializer::deserialize_tuple(self.de, len, visitor) } @@ -1164,6 +1281,7 @@ impl<'de, 'a> de::VariantAccess<'de> for Compound<'a, 'de> { where V: Visitor<'de>, { + self.de.add_cost(1)?; de::Deserializer::deserialize_struct(self.de, "_", fields, visitor) } } diff --git a/rust/candid/src/lib.rs b/rust/candid/src/lib.rs index fe45f4aa..07bcca22 100644 --- a/rust/candid/src/lib.rs +++ b/rust/candid/src/lib.rs @@ -256,10 +256,14 @@ pub use types::{ #[allow(dead_code)] pub mod binary_parser; pub mod de; +pub use de::DecoderConfig; pub mod ser; pub mod utils; -pub use utils::{decode_args, decode_one, encode_args, encode_one, write_args}; +pub use utils::{ + decode_args, decode_args_with_config, decode_one, decode_one_with_config, encode_args, + encode_one, write_args, +}; #[cfg_attr(docsrs, doc(cfg(feature = "value")))] #[cfg(feature = "value")] diff --git a/rust/candid/src/types/type_env.rs b/rust/candid/src/types/type_env.rs index a5725df9..0723f276 100644 --- a/rust/candid/src/types/type_env.rs +++ b/rust/candid/src/types/type_env.rs @@ -1,6 +1,6 @@ use crate::types::{Function, Type, TypeInner}; use crate::{Error, Result}; -use std::collections::{BTreeMap, BTreeSet}; +use std::collections::BTreeMap; #[derive(Debug, Clone, Default)] pub struct TypeEnv(pub BTreeMap); @@ -80,50 +80,50 @@ impl TypeEnv { } Err(Error::msg(format!("cannot find method {id}"))) } - fn go<'a>( + fn is_empty<'a>( &'a self, - seen: &mut BTreeSet<&'a str>, - res: &mut BTreeSet<&'a str>, - t: &'a Type, - ) -> Result<()> { - if !res.is_empty() { - return Ok(()); - } - match t.as_ref() { - TypeInner::Record(fs) => { - for f in fs { - self.go(seen, res, &f.ty)?; - } + res: &mut BTreeMap<&'a str, Option>, + id: &'a str, + ) -> Result { + match res.get(id) { + None => { + res.insert(id, None); + let t = self.find_type(id)?; + let result = match t.as_ref() { + TypeInner::Record(fs) => { + for f in fs { + // Assume env only comes from type table, f.ty is either primitive or var. + if let TypeInner::Var(f_id) = f.ty.as_ref() { + if self.is_empty(res, f_id)? { + res.insert(id, Some(true)); + return Ok(true); + } + } + } + false + } + TypeInner::Var(id) => self.is_empty(res, id)?, + _ => false, + }; + res.insert(id, Some(result)); + Ok(result) } - TypeInner::Var(id) => { - if seen.insert(id) { - let t = self.find_type(id)?; - self.go(seen, res, t)?; - seen.remove(&id.as_str()); - } else { - *res = seen.clone(); - } + Some(None) => { + res.insert(id, Some(true)); + Ok(true) } - _ => (), - } - Ok(()) - } - fn check_empty(&self) -> Result> { - let mut res = BTreeSet::new(); - for (name, t) in &self.0 { - let mut seen: BTreeSet<&str> = BTreeSet::new(); - let mut local_res = BTreeSet::new(); - seen.insert(name); - self.go(&mut seen, &mut local_res, t)?; - res.append(&mut local_res); + Some(Some(b)) => Ok(*b), } - Ok(res) } pub fn replace_empty(&mut self) -> Result<()> { - let ids: Vec<_> = self - .check_empty()? + let mut res = BTreeMap::new(); + for name in self.0.keys() { + self.is_empty(&mut res, name)?; + } + let ids: Vec<_> = res .iter() - .map(|x| (*x).to_string()) + .filter(|(_, v)| matches!(v, Some(true))) + .map(|(id, _)| id.to_string()) .collect(); for id in ids { self.0.insert(id, TypeInner::Empty.into()); diff --git a/rust/candid/src/types/value.rs b/rust/candid/src/types/value.rs index f622a29b..d770dc26 100644 --- a/rust/candid/src/types/value.rs +++ b/rust/candid/src/types/value.rs @@ -1,6 +1,6 @@ use crate::types::number::{Int, Nat}; use crate::types::{Field, Label, Type, TypeEnv, TypeInner}; -use crate::{CandidType, Error, Result}; +use crate::{CandidType, DecoderConfig, Error, Result}; use serde::de; use serde::de::{Deserialize, Visitor}; use std::collections::HashMap; @@ -118,6 +118,21 @@ impl IDLArgs { de.done()?; Ok(IDLArgs { args }) } + pub fn from_bytes_with_types_with_config( + bytes: &[u8], + env: &TypeEnv, + types: &[Type], + config: &DecoderConfig, + ) -> Result { + let mut de = crate::de::IDLDeserialize::new_with_config(bytes, config)?; + let mut args = Vec::new(); + for ty in types.iter() { + let v = de.get_value_with_type(env, ty)?; + args.push(v); + } + de.done()?; + Ok(IDLArgs { args }) + } pub fn from_bytes(bytes: &[u8]) -> Result { let mut de = crate::de::IDLDeserialize::new(bytes)?; let mut args = Vec::new(); @@ -128,6 +143,16 @@ impl IDLArgs { de.done()?; Ok(IDLArgs { args }) } + pub fn from_bytes_with_config(bytes: &[u8], config: &DecoderConfig) -> Result { + let mut de = crate::de::IDLDeserialize::new_with_config(bytes, config)?; + let mut args = Vec::new(); + while !de.is_done() { + let v = de.get_value::()?; + args.push(v); + } + de.done()?; + Ok(IDLArgs { args }) + } } impl IDLValue { diff --git a/rust/candid/src/utils.rs b/rust/candid/src/utils.rs index 4889783f..aee8206d 100644 --- a/rust/candid/src/utils.rs +++ b/rust/candid/src/utils.rs @@ -1,4 +1,4 @@ -use crate::de::IDLDeserialize; +use crate::de::{DecoderConfig, IDLDeserialize}; use crate::ser::IDLBuilder; use crate::{CandidType, Error, Result}; use serde::de::Deserialize; @@ -54,13 +54,38 @@ macro_rules! Encode { /// Decode Candid message into a tuple of Rust values of the given types. /// Produces `Err` if the message fails to decode at any given types. /// If the message contains only one value, it returns the value directly instead of a tuple. +/// ``` +/// use candid::{Encode, Decode, DecoderConfig}; +/// let bytes = Encode!(&42u32, &"hello", &"extra arguments")?; +/// let (value1, value2) = Decode!(&bytes, u32, String)?; +/// assert_eq!(value1, 42); +/// assert_eq!(value2, "hello"); +/// +/// // Decode with quota +/// let mut config = DecoderConfig::new(); +/// config.set_decoding_quota(1000).set_skipping_quota(50); +/// let (value1, value2) = Decode!([config]; &bytes, u32, String)?; +/// let ((value1, value2), cost) = Decode!(@Debug [config]; &bytes, u32, String)?; +/// // `cost` reports the decoding cost, not the remaining quota +/// assert_eq!(cost.decoding_quota, Some(846)); +/// assert_eq!(cost.skipping_quota, Some(16)); +/// # Ok::<(), candid::Error>(()) +/// ``` #[macro_export] macro_rules! Decode { ( $hex:expr $(,$ty:ty)* ) => {{ - $crate::de::IDLDeserialize::new($hex) + Decode!([$crate::de::DecoderConfig::new()]; $hex $(,$ty)*) + }}; + ( [ $config:expr ] ; $hex:expr $(,$ty:ty)* ) => {{ + $crate::de::IDLDeserialize::new_with_config($hex, &$config) .and_then(|mut de| Decode!(@GetValue [] de $($ty,)*) .and_then(|res| de.done().and(Ok(res)))) }}; + (@Debug [ $config:expr ] ; $hex:expr $(,$ty:ty)* ) => {{ + $crate::de::IDLDeserialize::new_with_config($hex, &$config) + .and_then(|mut de| Decode!(@GetValue [] de $($ty,)*) + .and_then(|res| de.done().and(Ok((res, de.get_config().compute_cost(&$config)))))) + }}; (@GetValue [$($ans:ident)*] $de:ident $ty:ty, $($tail:ty,)* ) => {{ $de.get_value::<$ty>() .and_then(|val| Decode!(@GetValue [$($ans)* val] $de $($tail,)* )) @@ -95,6 +120,28 @@ where de.done()?; Ok(res) } +pub fn decode_args_with_config<'a, Tuple>(bytes: &'a [u8], config: &DecoderConfig) -> Result +where + Tuple: ArgumentDecoder<'a>, +{ + let mut de = IDLDeserialize::new_with_config(bytes, config)?; + let res = ArgumentDecoder::decode(&mut de)?; + de.done()?; + Ok(res) +} +pub fn decode_args_with_config_debug<'a, Tuple>( + bytes: &'a [u8], + config: &DecoderConfig, +) -> Result<(Tuple, DecoderConfig)> +where + Tuple: ArgumentDecoder<'a>, +{ + let mut de = IDLDeserialize::new_with_config(bytes, config)?; + let res = ArgumentDecoder::decode(&mut de)?; + de.done()?; + let cost = de.get_config().compute_cost(config); + Ok((res, cost)) +} /// Decode a single argument. /// @@ -116,6 +163,13 @@ where let (res,) = decode_args(bytes)?; Ok(res) } +pub fn decode_one_with_config<'a, T>(bytes: &'a [u8], config: &DecoderConfig) -> Result +where + T: Deserialize<'a> + CandidType, +{ + let (res,) = decode_args_with_config(bytes, config)?; + Ok(res) +} /// Serialize an encoding of a tuple and write it to a `Write` buffer. /// diff --git a/rust/candid/tests/serde.rs b/rust/candid/tests/serde.rs index e33bcf59..3af55f19 100644 --- a/rust/candid/tests/serde.rs +++ b/rust/candid/tests/serde.rs @@ -1,4 +1,7 @@ -use candid::{decode_one, encode_one, CandidType, Decode, Deserialize, Encode, Int, Nat}; +use candid::{ + decode_one_with_config, encode_one, CandidType, Decode, DecoderConfig, Deserialize, Encode, + Int, Nat, +}; #[test] fn test_error() { @@ -31,9 +34,9 @@ fn test_error() { || { test_decode(b"DIDL\x02\x6c\x01\x0a\x01\x6d\x00\x01\x01 ", &candid::Reserved) }, - // Depending on stack size, we either get recursion limit or parser error + // Depending on stack size, we either reach recursion limit or skipping limit "Recursion limit exceeded", - "binary parser error", + "Skipping cost exceeds the limit", ); } @@ -437,6 +440,11 @@ fn test_serde_bytes() { }; test_encode(&vec, &hex("4449444c026c02620163016d7b01000301020303010203")); test_decode(&hex("4449444c026c02620163016d7b01000301020303010203"), &vec); + // test cost + let bytes = hex("4449444c016d7b010003010203"); + let config = get_config(); + let cost = Decode!(@Debug [config]; &bytes, ByteBuf).unwrap().1; + assert_eq!(cost.decoding_quota, Some(41)); // header cost 9 * 4 + 1 + 4 } #[test] @@ -531,7 +539,12 @@ fn test_vector() { let bytes = hex("4449444c036c01d6fca702016d026c00010080ade204"); check_error( || test_decode(&bytes, &candid::Reserved), - "zero sized values too large", + "Skipping cost exceeds the limit", + ); + let bytes = hex("4449444c176c02017f027f6c02010002006c02000101016c02000201026c02000301036c02000401046c02000501056c02000601066c02000701076c02000801086c02000901096c02000a010a6c02000b010b6c02000c010c6c02000d020d6c02000e010e6c02000f010f6c02001001106c02001101116c02001201126c02001301136e146d150116050101010101"); + check_error( + || test_decode(&bytes, &candid::Reserved), + "Skipping cost exceeds the limit", ); } @@ -767,12 +780,21 @@ where ); } +fn get_config() -> DecoderConfig { + let mut config = DecoderConfig::new(); + config + .set_decoding_quota(20_000_000) + .set_skipping_quota(10_000); + config +} + fn test_decode<'de, T>(bytes: &'de [u8], expected: &T) where T: PartialEq + serde::de::Deserialize<'de> + std::fmt::Debug + CandidType, { - let decoded_one = decode_one::(bytes).unwrap(); - let decoded_macro = Decode!(bytes, T).unwrap(); + let config = get_config(); + let decoded_one = decode_one_with_config::(bytes, &config).unwrap(); + let decoded_macro = Decode!([config]; bytes, T).unwrap(); assert_eq!(decoded_one, *expected); assert_eq!(decoded_macro, *expected); } diff --git a/rust/candid_parser/src/test.rs b/rust/candid_parser/src/test.rs index f48c2ac4..563b756a 100644 --- a/rust/candid_parser/src/test.rs +++ b/rust/candid_parser/src/test.rs @@ -3,6 +3,9 @@ use super::typing::check_prog; use crate::{Error, Result}; use candid::types::value::IDLArgs; use candid::types::{Type, TypeEnv}; +use candid::DecoderConfig; + +const DECODING_COST: usize = 20_000_000; type TupType = Vec; @@ -51,7 +54,13 @@ impl Input { pub fn parse(&self, env: &TypeEnv, types: &[Type]) -> Result { match self { Input::Text(ref s) => Ok(super::parse_idl_args(s)?.annotate_types(true, env, types)?), - Input::Blob(ref bytes) => Ok(IDLArgs::from_bytes_with_types(bytes, env, types)?), + Input::Blob(ref bytes) => { + let mut config = DecoderConfig::new(); + config.set_decoding_quota(DECODING_COST); + Ok(IDLArgs::from_bytes_with_types_with_config( + bytes, env, types, &config, + )?) + } } } fn check_round_trip(&self, v: &IDLArgs, env: &TypeEnv, types: &[Type]) -> Result { @@ -105,7 +114,11 @@ impl HostTest { if !assert.pass && assert.right.is_none() { asserts.push(NotDecode(bytes, types)); } else { - let args = IDLArgs::from_bytes_with_types(&bytes, env, &types).unwrap(); + let mut config = DecoderConfig::new(); + config.set_decoding_quota(DECODING_COST); + let args = + IDLArgs::from_bytes_with_types_with_config(&bytes, env, &types, &config) + .unwrap(); asserts.push(Decode(bytes.clone(), types.clone(), true, args)); // round tripping // asserts.push(Encode(args, types.clone(), true, bytes.clone())); diff --git a/spec/Candid.md b/spec/Candid.md index c5bfbb40..598e839f 100644 --- a/spec/Candid.md +++ b/spec/Candid.md @@ -1,8 +1,8 @@ # Candid Specification -Version: 0.1.7 +Version: 0.1.8 -Date: Dec 12, 2023 +Date: Feb 22, 2024 ## Motivation @@ -1253,10 +1253,6 @@ M(ref(r) : principal) = i8(0) M(id(v*) : principal) = i8(1) M(v* : vec nat8) ``` -Note: - -* Since `null`, `reserved`, `record {}`, and records of such values, take no space, to prevent unbounded sized message, we limit the total vector length of such zero-sized values in a messagev (on the wire) to be 2,000,000 elements. For example, if a message contains two vectors, one at type `vec null` and one at type `vec record {}`, then the length of both vectors combined cannot exceed 2,000,000 elements. - #### References `R` maps an Candid value to the sequence of references contained in that value. The definition is indexed by type. @@ -1318,9 +1314,12 @@ Deserialisation at an expected type sequence `(,*)` proceeds by * checking for the magic number `DIDL` * using the inverse of the `T` function to decode the type definitions `(,*)` - * check that `(,*) <: (,*)`, else fail * using the inverse of the `M` function, indexed by `(,*)`, to decode the values `(,*)` - * use the coercion function `C[(,*) <: (,*)]((,*))` to understand the decoded values at the expected type. + * use the coercion function `v : t ~> v' : t'` to understand the decoded values at the expected type. + +Note on implementation: + +Due to the wire format and subtyping, deserializing different messages at a fixed type sequence `(,*)` requires significantly different resources, e.g., stack size, time and memory. The implementation is encouraged to self-meter the deserialisation cost to prevent: 1) stack overflow; 2) spending too much time on unneeded data, due to subtyping; 3) deserialising data of exponential size. ### Deserialisation of future types diff --git a/test/spacebomb.test.did b/test/spacebomb.test.did index 5c2d415c..fed3a2da 100644 --- a/test/spacebomb.test.did +++ b/test/spacebomb.test.did @@ -1,12 +1,14 @@ // Space bomb tests - -// Messages with more than 2_000_000 zero-length elements in vectors should be rejected +// Messages in this test all take a lot of time, memory and stack space to decode. +// With infinite resources, these are all valid Candid messages. +// When using Candid in a resource limited environment, for example one consensus round in a blockchain, +// an implementation with self-metering should reject these messages relatively early +// without going through the whole deserialisation process. // \80\94\eb\dc\03 is 1000_000_000 // \80\ad\e2\04 is 10_000_000 -// \80\89\7a is 2_000_000 // \ff\ff\3f is 1_048_575 // \80\b5\18 is 400_000 @@ -17,20 +19,7 @@ assert blob "DIDL\01\6d\70\01\00\80\94\eb\dc\03" !: () assert blob "DIDL\04\6c\03\01\7f\02\01\03\02\6c\01\01\70\6c\00\6d\00\01\03\80\94\eb\dc\03" !: () "zero-sized record (extra argument)"; assert blob "DIDL\02\6d\01\6d\7f\01\00\05\ff\ff\3f\ff\ff\3f\ff\ff\3f\ff\ff\3f\ff\ff\3f" !: () "vec vec null (extra argument)"; assert blob "DIDL\03\6c\01\d6\fc\a7\02\01\6d\02\6c\00\01\00\80\ad\e2\04" !: () "vec record {} (extra argument)"; - -// Messages with exactly 2_000_000 zero-length elements should succeed -assert blob "DIDL\01\6d\7f\01\00\80\89\7a" : () "vec null (exactly 2000000)"; -assert blob "DIDL\01\6d\70\01\00\80\89\7a" : () "vec reserved (exactly 2000000)"; -assert blob "DIDL\04\6c\03\01\7f\02\01\03\02\6c\01\01\70\6c\00\6d\00\01\03\80\89\7a" : () "zero-sized record (exactly 2000000)"; -assert blob "DIDL\02\6d\01\6d\7f\01\00\05\80\b5\18\80\b5\18\80\b5\18\80\b5\18\80\b5\18" : () "vec vec null (exactly 2000000)"; -assert blob "DIDL\03\6c\01\d6\fc\a7\02\01\6d\02\6c\00\01\00\80\89\7a" : () "vec record {} (exactly 2000000)"; - -// Messages with exactly 2_000_001 zero-length elements should fail -assert blob "DIDL\01\6d\7f\01\00\80\89\7b" !: () "vec null (exactly 2000001)"; -assert blob "DIDL\01\6d\70\01\00\80\89\7b" !: () "vec reserved (exactly 2000001)"; -assert blob "DIDL\04\6c\03\01\7f\02\01\03\02\6c\01\01\70\6c\00\6d\00\01\03\80\89\7b" !: () "zero-sized record (exactly 2000001)"; -assert blob "DIDL\02\6d\01\6d\7f\01\00\05\80\b5\18\80\b5\18\80\b5\18\80\b5\18\80\b5\19" !: () "vec vec null (exactly 2000001)"; -assert blob "DIDL\03\6c\01\d6\fc\a7\02\01\6d\02\6c\00\01\00\80\89\7b" !: () "vec record {} (exactly 2000001)"; +assert blob "DIDL\17\6c\02\01\7f\02\7f\6c\02\01\00\02\00\6c\02\00\01\01\01\6c\02\00\02\01\02\6c\02\00\03\01\03\6c\02\00\04\01\04\6c\02\00\05\01\05\6c\02\00\06\01\06\6c\02\00\07\01\07\6c\02\00\08\01\08\6c\02\00\09\01\09\6c\02\00\0a\01\0a\6c\02\00\0b\01\0b\6c\02\00\0c\01\0c\6c\02\00\0d\02\0d\6c\02\00\0e\01\0e\6c\02\00\0f\01\0f\6c\02\00\10\01\10\6c\02\00\11\01\11\6c\02\00\12\01\12\6c\02\00\13\01\13\6e\14\6d\15\01\16\02\01\01" !: () "vec opt record with 2^20 null (extra argument)"; // Decoding to actual type assert blob "DIDL\01\6d\7f\01\00\80\94\eb\dc\03" !: (vec opt nat) "vec null (not ignored)"; @@ -45,3 +34,4 @@ assert blob "DIDL\01\6d\70\01\00\80\94\eb\dc\03" !: (o assert blob "DIDL\04\6c\03\01\7f\02\01\03\02\6c\01\01\70\6c\00\6d\00\01\03\80\94\eb\dc\03" !: (opt nat) "zero-sized record (subtyping)"; assert blob "DIDL\02\6d\01\6d\7f\01\00\05\ff\ff\3f\ff\ff\3f\ff\ff\3f\ff\ff\3f\ff\ff\3f" !: (vec opt nat) "vec vec null (subtyping)"; assert blob "DIDL\03\6c\01\d6\fc\a7\02\01\6d\02\6c\00\01\00\80\ad\e2\04" !: (opt nat) "vec record {} (subtyping)"; +assert blob "DIDL\17\6c\02\01\7f\02\7f\6c\02\01\00\02\00\6c\02\00\01\01\01\6c\02\00\02\01\02\6c\02\00\03\01\03\6c\02\00\04\01\04\6c\02\00\05\01\05\6c\02\00\06\01\06\6c\02\00\07\01\07\6c\02\00\08\01\08\6c\02\00\09\01\09\6c\02\00\0a\01\0a\6c\02\00\0b\01\0b\6c\02\00\0c\01\0c\6c\02\00\0d\02\0d\6c\02\00\0e\01\0e\6c\02\00\0f\01\0f\6c\02\00\10\01\10\6c\02\00\11\01\11\6c\02\00\12\01\12\6c\02\00\13\01\13\6e\14\6d\15\01\16\05\01\01\01\01\01" !: (vec opt record {}) "vec opt record with 2^20 null (subtyping)";