From 381329c657aada6a11708bc03c5487537c28b398 Mon Sep 17 00:00:00 2001 From: "Augusto F. Hack" Date: Mon, 4 Dec 2023 14:21:17 +0100 Subject: [PATCH 1/2] CodeBlock,ProgramAst: implement Serialize/Deserialize --- assembly/src/ast/code_body.rs | 36 ++++++++++++++++++++++++++++++++++- assembly/src/ast/program.rs | 34 +++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 1 deletion(-) diff --git a/assembly/src/ast/code_body.rs b/assembly/src/ast/code_body.rs index d6a82bc6fb..405da69007 100644 --- a/assembly/src/ast/code_body.rs +++ b/assembly/src/ast/code_body.rs @@ -26,8 +26,10 @@ impl CodeBody { where N: IntoIterator, { + let nodes: Vec<_> = nodes.into_iter().collect(); + assert!(nodes.len() <= u32::MAX.try_into().unwrap()); Self { - nodes: nodes.into_iter().collect(), + nodes, locations: Vec::new(), } } @@ -163,3 +165,35 @@ impl PartialEq for CodeBody { nodes && (locations || left_empty || right_empty) } } + +// SERIALIZATION +// ================================================================================================ + +impl Serializable for CodeBody { + fn write_into(&self, target: &mut W) { + assert!(self.nodes.len() <= u32::MAX.try_into().unwrap()); + target.write_u32(self.nodes.len() as u32); + self.nodes.write_into(target); + + target.write_u64(self.locations.len() as u64); + self.locations.write_into(target); + } +} + +impl Deserializable for CodeBody { + fn read_from(source: &mut R) -> Result { + let count = source + .read_u32()? + .try_into() + .map_err(|v| DeserializationError::InvalidValue(format!("can't fit {v} into usize")))?; + let nodes = Node::read_batch_from(source, count)?; + + let count = source + .read_u64()? + .try_into() + .map_err(|v| DeserializationError::InvalidValue(format!("can't fit {v} into usize")))?; + let locations = SourceLocation::read_batch_from(source, count)?; + + Ok(Self { nodes, locations }) + } +} diff --git a/assembly/src/ast/program.rs b/assembly/src/ast/program.rs index 12d3802b86..e166222b23 100644 --- a/assembly/src/ast/program.rs +++ b/assembly/src/ast/program.rs @@ -327,3 +327,37 @@ impl fmt::Display for ProgramAst { writeln!(f, "end") } } + +// SERIALIZATION +// ================================================================================================ + +impl Serializable for ProgramAst { + fn write_into(&self, target: &mut W) { + self.body.write_into(target); + + debug_assert!(self.local_procs.len() <= MAX_LOCAL_PROCS); + target.write_u16(self.local_procs.len() as u16); + + self.local_procs.write_into(target); + self.import_info.write_into(target); + self.start.write_into(target); + } +} + +impl Deserializable for ProgramAst { + fn read_from(source: &mut R) -> Result { + let body = CodeBody::read_from(source)?; + + let num_local_procs = source.read_u16()?.into(); + let local_procs = ProcedureAst::read_batch_from(source, num_local_procs)?; + let import_info = ModuleImports::read_from(source)?; + let start = SourceLocation::read_from(source)?; + + Ok(Self { + body, + local_procs, + import_info, + start, + }) + } +} From 264c6e1d2943049c62d04aebedf8e004990b3350 Mon Sep 17 00:00:00 2001 From: Bobbin Threadbare Date: Wed, 6 Dec 2023 22:50:31 -0800 Subject: [PATCH 2/2] refactor: move serialization methods into ProgramAst struct --- assembly/src/ast/code_body.rs | 39 ++------------- assembly/src/ast/module.rs | 6 ++- assembly/src/ast/procedure.rs | 6 +++ assembly/src/ast/program.rs | 92 ++++++++++++++++------------------- assembly/src/errors.rs | 10 ++++ 5 files changed, 67 insertions(+), 86 deletions(-) diff --git a/assembly/src/ast/code_body.rs b/assembly/src/ast/code_body.rs index 405da69007..9f34d5f2e4 100644 --- a/assembly/src/ast/code_body.rs +++ b/assembly/src/ast/code_body.rs @@ -1,6 +1,6 @@ use super::{ ByteReader, ByteWriter, Deserializable, DeserializationError, Node, Serializable, - SourceLocation, Vec, + SourceLocation, Vec, MAX_BODY_LEN, }; use core::{iter, slice}; @@ -22,12 +22,15 @@ impl CodeBody { // -------------------------------------------------------------------------------------------- /// Creates a new instance of [CodeBody] populated with the provided `nodes`. + /// + /// # Panics + /// Assumes that the number of nodes is smaller than 2^16 and panics otherwise. pub fn new(nodes: N) -> Self where N: IntoIterator, { let nodes: Vec<_> = nodes.into_iter().collect(); - assert!(nodes.len() <= u32::MAX.try_into().unwrap()); + assert!(nodes.len() <= MAX_BODY_LEN, "too many nodes"); Self { nodes, locations: Vec::new(), @@ -165,35 +168,3 @@ impl PartialEq for CodeBody { nodes && (locations || left_empty || right_empty) } } - -// SERIALIZATION -// ================================================================================================ - -impl Serializable for CodeBody { - fn write_into(&self, target: &mut W) { - assert!(self.nodes.len() <= u32::MAX.try_into().unwrap()); - target.write_u32(self.nodes.len() as u32); - self.nodes.write_into(target); - - target.write_u64(self.locations.len() as u64); - self.locations.write_into(target); - } -} - -impl Deserializable for CodeBody { - fn read_from(source: &mut R) -> Result { - let count = source - .read_u32()? - .try_into() - .map_err(|v| DeserializationError::InvalidValue(format!("can't fit {v} into usize")))?; - let nodes = Node::read_batch_from(source, count)?; - - let count = source - .read_u64()? - .try_into() - .map_err(|v| DeserializationError::InvalidValue(format!("can't fit {v} into usize")))?; - let locations = SourceLocation::read_batch_from(source, count)?; - - Ok(Self { nodes, locations }) - } -} diff --git a/assembly/src/ast/module.rs b/assembly/src/ast/module.rs index a5f5798f43..661e170fb9 100644 --- a/assembly/src/ast/module.rs +++ b/assembly/src/ast/module.rs @@ -1,5 +1,3 @@ -use vm_core::utils::Serializable; - use super::{ format::*, imports::ModuleImports, @@ -13,6 +11,10 @@ use super::{ }, }; use core::{fmt, str::from_utf8}; +use vm_core::utils::Serializable; + +// MODULE AST +// ================================================================================================ /// An abstract syntax tree of a Miden module. /// diff --git a/assembly/src/ast/procedure.rs b/assembly/src/ast/procedure.rs index ec1c3bb313..8197f5f17d 100644 --- a/assembly/src/ast/procedure.rs +++ b/assembly/src/ast/procedure.rs @@ -7,6 +7,9 @@ use super::{ }; use core::{iter, str::from_utf8}; +// PROCEDURE AST +// ================================================================================================ + /// An abstract syntax tree of a Miden procedure. /// /// A procedure AST consists of a list of body nodes and additional metadata about the procedure @@ -158,6 +161,9 @@ impl Deserializable for ProcedureAst { } } +// PROCEDURE RE-EXPORT +// ================================================================================================ + /// Represents a re-exported procedure. /// /// A re-exported procedure is a procedure that is defined in a different module in the same diff --git a/assembly/src/ast/program.rs b/assembly/src/ast/program.rs index e166222b23..0843bb29e2 100644 --- a/assembly/src/ast/program.rs +++ b/assembly/src/ast/program.rs @@ -20,6 +20,9 @@ use core::{fmt, iter}; #[cfg(feature = "std")] use std::{fs, io, path::Path}; +// PROGRAM AST +// ================================================================================================ + /// An abstract syntax tree of an executable Miden program. /// /// A program AST consists of a body of the program, a list of internal procedure ASTs, a list of @@ -39,7 +42,17 @@ impl ProgramAst { /// Returns a new [ProgramAst]. /// /// A program consist of a body and a set of internal (i.e., not exported) procedures. + /// + /// # Errors + /// Returns an error if: + /// - The number of body nodes is greater than or equal to 2^16. + /// - The number of local procedures is greater than or equal to 2^16. pub fn new(body: Vec, local_procs: Vec) -> Result { + // TODO: instead of ParsingError, this should probably return a different error type: + // e.g., AstError. + if body.len() > MAX_BODY_LEN { + return Err(ParsingError::too_many_body_nodes(body.len(), MAX_BODY_LEN)); + } if local_procs.len() > MAX_LOCAL_PROCS { return Err(ParsingError::too_many_module_procs(local_procs.len(), MAX_LOCAL_PROCS)); } @@ -175,60 +188,64 @@ impl ProgramAst { // SERIALIZATION / DESERIALIZATION // -------------------------------------------------------------------------------------------- - /// Returns byte representation of this [ProgramAst]. + /// Writes byte representation of this [ProgramAst] into the specified target according with + /// the specified serde options. /// /// The serde options are serialized as header information for the purposes of deserialization. - pub fn to_bytes(&self, options: AstSerdeOptions) -> Vec { - let mut target = Vec::::default(); - + pub fn write_into(&self, target: &mut W, options: AstSerdeOptions) { // serialize the options, so that deserialization knows what to do - options.write_into(&mut target); + options.write_into(target); // asserts below are OK because we enforce limits on the number of procedure and the // number of body instructions in relevant parsers // serialize imports if required if options.serialize_imports { - self.import_info.write_into(&mut target); + self.import_info.write_into(target); } // serialize procedures assert!(self.local_procs.len() <= MAX_LOCAL_PROCS, "too many local procs"); target.write_u16(self.local_procs.len() as u16); - self.local_procs.write_into(&mut target); + self.local_procs.write_into(target); // serialize program body assert!(self.body.nodes().len() <= MAX_BODY_LEN, "too many body instructions"); target.write_u16(self.body.nodes().len() as u16); - self.body.nodes().write_into(&mut target); + self.body.nodes().write_into(target); + } + /// Returns byte representation of this [ProgramAst]. + /// + /// The serde options are serialized as header information for the purposes of deserialization. + pub fn to_bytes(&self, options: AstSerdeOptions) -> Vec { + let mut target = Vec::::default(); + self.write_into(&mut target, options); target } - /// Returns a [ProgramAst] struct deserialized from the provided bytes. + /// Returns a [ProgramAst] struct deserialized from the specified reader. /// /// This function assumes that the byte array contains a serialized [AstSerdeOptions] struct as /// a header. - pub fn from_bytes(bytes: &[u8]) -> Result { - let mut source = SliceReader::new(bytes); - + pub fn read_from(source: &mut R) -> Result { // Deserialize the serialization options used when serializing - let options = AstSerdeOptions::read_from(&mut source)?; + let options = AstSerdeOptions::read_from(source)?; // deserialize imports if required let import_info = if options.serialize_imports { - ModuleImports::read_from(&mut source)? + ModuleImports::read_from(source)? } else { ModuleImports::default() }; // deserialize local procs let num_local_procs = source.read_u16()?; - let local_procs = Deserializable::read_batch_from(&mut source, num_local_procs as usize)?; + let local_procs = Deserializable::read_batch_from(source, num_local_procs as usize)?; // deserialize program body let body_len = source.read_u16()? as usize; - let nodes = Deserializable::read_batch_from(&mut source, body_len)?; + let nodes = Deserializable::read_batch_from(source, body_len)?; match Self::new(nodes, local_procs) { Err(err) => Err(DeserializationError::UnknownError(err.message().clone())), @@ -236,6 +253,15 @@ impl ProgramAst { } } + /// Returns a [ProgramAst] struct deserialized from the provided bytes. + /// + /// This function assumes that the byte array contains a serialized [AstSerdeOptions] struct as + /// a header. + pub fn from_bytes(bytes: &[u8]) -> Result { + let mut source = SliceReader::new(bytes); + Self::read_from(&mut source) + } + /// Loads the [SourceLocation] from the `source`. /// /// It expects the `start` location at the first position, and will subsequently load the @@ -327,37 +353,3 @@ impl fmt::Display for ProgramAst { writeln!(f, "end") } } - -// SERIALIZATION -// ================================================================================================ - -impl Serializable for ProgramAst { - fn write_into(&self, target: &mut W) { - self.body.write_into(target); - - debug_assert!(self.local_procs.len() <= MAX_LOCAL_PROCS); - target.write_u16(self.local_procs.len() as u16); - - self.local_procs.write_into(target); - self.import_info.write_into(target); - self.start.write_into(target); - } -} - -impl Deserializable for ProgramAst { - fn read_from(source: &mut R) -> Result { - let body = CodeBody::read_from(source)?; - - let num_local_procs = source.read_u16()?.into(); - let local_procs = ProcedureAst::read_batch_from(source, num_local_procs)?; - let import_info = ModuleImports::read_from(source)?; - let start = SourceLocation::read_from(source)?; - - Ok(Self { - body, - local_procs, - import_info, - start, - }) - } -} diff --git a/assembly/src/errors.rs b/assembly/src/errors.rs index cd1c78fc98..47c6983812 100644 --- a/assembly/src/errors.rs +++ b/assembly/src/errors.rs @@ -417,6 +417,16 @@ impl ParsingError { } } + pub fn too_many_body_nodes(num_nodes: usize, max_nodes: usize) -> Self { + ParsingError { + message: format!( + "a code body cannot contain more than {num_nodes} nodes, but had {max_nodes}" + ), + location: SourceLocation::default(), + op: "".to_string(), + } + } + pub fn module_docs_too_long(doc_len: usize, max_len: usize) -> Self { ParsingError { message: format!(