Merge pull request #1354 from eclipse-zenoh/dev/bytes_raw

Add ZBytes::slices
eclipse-zenoh · Sep 4, 2024 · 60af274 · 60af274
2 parents 4bcf093 + 7b5f9ae
commit 60af274
Show file tree

Hide file tree

Showing 2 changed files with 165 additions and 9 deletions.
diff --git a/examples/examples/z_bytes.rs b/examples/examples/z_bytes.rs
@@ -33,7 +33,8 @@ fn main() {
     // Corresponding encoding to be used in operations like `.put()`, `.reply()`, etc.
     // let encoding = Encoding::ZENOH_STRING;
 
-    // Cow
+    // Cow<str>
+    // See [`zenoh::bytes::ZBytes`] documentation for zero-copy behaviour.
     let input = Cow::from("test");
     let payload = ZBytes::from(&input);
     let output: Cow<str> = payload.deserialize().unwrap();
@@ -49,6 +50,15 @@ fn main() {
     // Corresponding encoding to be used in operations like `.put()`, `.reply()`, etc.
     // let encoding = Encoding::ZENOH_BYTES;
 
+    // Cow<[u8]>
+    // See [`zenoh::bytes::ZBytes`] documentation for zero-copy behaviour.
+    let input = Cow::from(vec![1, 2, 3, 4]);
+    let payload = ZBytes::from(&input);
+    let output: Cow<[u8]> = payload.into();
+    assert_eq!(input, output);
+    // Corresponding encoding to be used in operations like `.put()`, `.reply()`, etc.
+    // let encoding = Encoding::ZENOH_BYTES;
+
     // Writer & Reader
     // serialization
     let mut bytes = ZBytes::empty();
@@ -81,6 +91,13 @@ fn main() {
         assert_eq!(input[idx], value.unwrap());
     }
 
+    // Iterator RAW
+    let input: [i32; 4] = [1, 2, 3, 4];
+    let payload = ZBytes::from_iter(input.iter());
+    for slice in payload.slices() {
+        println!("{:02x?}", slice);
+    }
+
     // HashMap
     let mut input: HashMap<usize, String> = HashMap::new();
     input.insert(0, String::from("abc"));

diff --git a/zenoh/src/api/bytes.rs b/zenoh/src/api/bytes.rs
@@ -106,7 +106,7 @@ pub trait Deserialize<T> {
 ///
 /// `ZBytes` provides convenient methods to the user for serialization/deserialization based on the default Zenoh serializer [`ZSerde`].
 ///
-/// **NOTE:** Zenoh semantic and protocol take care of sending and receiving bytes without restricting the actual data types.
+/// **NOTE 1:** Zenoh semantic and protocol take care of sending and receiving bytes without restricting the actual data types.
 /// [`ZSerde`] is the default serializer/deserializer provided for convenience to the users to deal with primitives data types via
 /// a simple out-of-the-box encoding. [`ZSerde`] is **NOT** by any means the only serializer/deserializer users can use nor a limitation
 /// to the types supported by Zenoh. Users are free and encouraged to use any serializer/deserializer of their choice like *serde*,
@@ -185,6 +185,40 @@ pub trait Deserialize<T> {
 /// assert_eq!(start, end);
 /// ```
 ///
+/// **NOTE 2:** `ZBytes` may store data in non-contiguous regions of memory.
+/// The typical case for `ZBytes` to store data in different memory regions is when data is received fragmented from the network.
+/// The user then can decided to use [`ZBytes::deserialize`], [`ZBytes::reader`], [`ZBytes::into`], or [`ZBytes::slices`] depending
+/// on their needs.
+///
+/// To directly access raw data as contiguous slice it is preferred to convert `ZBytes` into a [`std::borrow::Cow<[u8]>`].
+/// If `ZBytes` contains all the data in a single memory location, this is guaranteed to be zero-copy. This is the common case for small messages.
+/// If `ZBytes` contains data scattered in different memory regions, this operation will do an allocation and a copy. This is the common case for large messages.
+///
+/// Example:
+/// ```rust
+/// use std::borrow::Cow;
+/// use zenoh::bytes::ZBytes;
+///
+/// let buf: Vec<u8> = vec![0, 1, 2, 3];
+/// let bytes = ZBytes::from(buf.clone());
+/// let deser: Cow<[u8]> = bytes.into();
+/// assert_eq!(buf.as_slice(), deser.as_ref());
+/// ```
+///
+/// It is also possible to iterate over the raw data that may be scattered on different memory regions.
+/// Please note that no guarantee is provided on the internal memory layout of [`ZBytes`] nor on how many slices a given [`ZBytes`] will be composed of.
+/// The only provided guarantee is on the bytes order that is preserved.
+///
+/// Example:
+/// ```rust
+/// use zenoh::bytes::ZBytes;
+///
+/// let buf: Vec<u8> = vec![0, 1, 2, 3];
+/// let bytes = ZBytes::from(buf.clone());
+/// for slice in bytes.slices() {
+///     println!("{:02x?}", slice);
+/// }
+/// ```
 #[repr(transparent)]
 #[derive(Clone, Debug, Default, PartialEq, Eq)]
 pub struct ZBytes(ZBuf);
@@ -208,7 +242,7 @@ impl ZBytes {
         self.0.is_empty()
     }
 
-    /// Returns the length of the ZBytes.
+    /// Returns the total number of bytes in the ZBytes.
     pub fn len(&self) -> usize {
         self.0.len()
     }
@@ -229,11 +263,25 @@ impl ZBytes {
     }
 
     /// Get a [`ZBytesWriter`] implementing [`std::io::Write`] trait.
+    ///
+    /// See [`ZBytesWriter`] on how to chain the serialization of different types into a single [`ZBytes`].
     pub fn writer(&mut self) -> ZBytesWriter<'_> {
         ZBytesWriter(self.0.writer())
     }
 
-    /// Get a [`ZBytesReader`] implementing [`std::io::Read`] trait.
+    /// Get a [`ZBytesIterator`] that deserializes a sequence of `T`.
+    ///
+    /// Example:
+    /// ```rust
+    /// use zenoh::bytes::ZBytes;
+    ///
+    /// let list: Vec<f32> = vec![1.1, 2.2, 3.3];
+    /// let mut zbs = ZBytes::from_iter(list.iter());
+    ///
+    /// for (index, elem) in zbs.iter::<f32>().enumerate() {
+    ///     assert_eq!(list[index], elem.unwrap());
+    /// }
+    /// ```
     pub fn iter<T>(&self) -> ZBytesIterator<'_, T>
     where
         for<'b> ZSerde: Deserialize<T, Input<'b> = &'b ZBytes>,
@@ -245,6 +293,62 @@ impl ZBytes {
         }
     }
 
+    /// Return an iterator on raw bytes slices contained in the [`ZBytes`].
+    ///
+    /// [`ZBytes`] may store data in non-contiguous regions of memory, this iterator
+    /// then allows to access raw data directly without any attempt of deserializing it.
+    /// Please note that no guarantee is provided on the internal memory layout of [`ZBytes`].
+    /// The only provided guarantee is on the bytes order that is preserved.
+    ///
+    /// Please note that [`ZBytes::iter`] will perform deserialization while iterating while [`ZBytes::slices`] will not.
+    ///
+    /// ```rust
+    /// use std::io::Write;
+    /// use zenoh::bytes::ZBytes;
+    ///
+    /// let buf1: Vec<u8> = vec![1, 2, 3];
+    /// let buf2: Vec<u8> = vec![4, 5, 6, 7, 8];
+    /// let mut zbs = ZBytes::empty();
+    /// let mut writer = zbs.writer();
+    /// writer.write(&buf1);
+    /// writer.write(&buf2);
+    ///
+    /// // Access the raw content
+    /// for slice in zbs.slices() {
+    ///     println!("{:02x?}", slice);
+    /// }
+    ///
+    /// // Concatenate input in a single vector
+    /// let buf: Vec<u8> = buf1.into_iter().chain(buf2.into_iter()).collect();
+    /// // Concatenate raw bytes in a single vector
+    /// let out: Vec<u8> = zbs.slices().fold(Vec::new(), |mut b, x| { b.extend_from_slice(x); b });
+    /// // The previous line is the equivalent of
+    /// // let out: Vec<u8> = zbs.into();
+    /// assert_eq!(buf, out);    
+    /// ```
+    ///
+    /// The example below shows how the [`ZBytesWriter::append`] simply appends the slices of one [`ZBytes`]
+    /// to another and how those slices can be iterated over to access the raw data.
+    /// ```rust
+    /// use std::io::Write;
+    /// use zenoh::bytes::ZBytes;
+    ///
+    /// let buf1: Vec<u8> = vec![1, 2, 3];
+    /// let buf2: Vec<u8> = vec![4, 5, 6, 7, 8];
+    ///
+    /// let mut zbs = ZBytes::empty();
+    /// let mut writer = zbs.writer();
+    /// writer.append(ZBytes::from(buf1.clone()));
+    /// writer.append(ZBytes::from(buf2.clone()));
+    ///
+    /// let mut iter = zbs.slices();
+    /// assert_eq!(buf1.as_slice(), iter.next().unwrap());
+    /// assert_eq!(buf2.as_slice(), iter.next().unwrap());
+    /// ```
+    pub fn slices(&self) -> impl Iterator<Item = &[u8]> {
+        self.0.slices()
+    }
+
     /// Serialize an object of type `T` as a [`ZBytes`] using the [`ZSerde`].
     ///
     /// ```rust
@@ -293,7 +397,11 @@ impl ZBytes {
         ZSerde.serialize(t)
     }
 
-    /// Deserialize an object of type `T` from a [`Value`] using the [`ZSerde`].
+    /// Deserialize an object of type `T` using [`ZSerde`].
+    ///
+    /// See [`ZBytes::serialize`] and [`ZBytes::try_serialize`] for the examples.
+    ///
+    /// See [`ZBytes::into`] for infallible conversion, e.g. to get raw bytes.
     pub fn deserialize<'a, T>(&'a self) -> Result<T, <ZSerde as Deserialize<T>>::Error>
     where
         ZSerde: Deserialize<T, Input<'a> = &'a ZBytes>,
@@ -302,7 +410,7 @@ impl ZBytes {
         ZSerde.deserialize(self)
     }
 
-    /// Deserialize an object of type `T` from a [`Value`] using the [`ZSerde`].
+    /// Deserialize an object of type `T` using [`ZSerde`].
     pub fn deserialize_mut<'a, T>(&'a mut self) -> Result<T, <ZSerde as Deserialize<T>>::Error>
     where
         ZSerde: Deserialize<T, Input<'a> = &'a mut ZBytes>,
@@ -311,7 +419,37 @@ impl ZBytes {
         ZSerde.deserialize(self)
     }
 
-    /// Infallibly deserialize an object of type `T` from a [`Value`] using the [`ZSerde`].
+    /// Infallibly deserialize an object of type `T` using [`ZSerde`].
+    ///
+    /// To directly access raw data as contiguous slice it is preferred to convert `ZBytes` into a [`std::borrow::Cow<[u8]>`](`std::borrow::Cow`).
+    /// If [`ZBytes`] contains all the data in a single memory location, then it is guaranteed to be zero-copy. This is the common case for small messages.
+    /// If [`ZBytes`] contains data scattered in different memory regions, this operation will do an allocation and a copy. This is the common case for large messages.
+    ///
+    /// ```rust
+    /// use std::borrow::Cow;
+    /// use zenoh::bytes::ZBytes;
+    ///
+    /// let buf: Vec<u8> = vec![0, 1, 2, 3];
+    /// let bytes = ZBytes::from(buf.clone());
+    /// let deser: Cow<[u8]> = bytes.into();
+    /// assert_eq!(buf.as_slice(), deser.as_ref());
+    /// ```
+    ///
+    /// An alternative is to convert `ZBytes` into a [`std::vec::Vec<u8>`].
+    /// Converting to [`std::vec::Vec<u8>`] will always allocate and make a copy.
+    ///
+    /// ```rust
+    /// use std::borrow::Cow;
+    /// use zenoh::bytes::ZBytes;
+    ///
+    /// let buf: Vec<u8> = vec![0, 1, 2, 3];
+    /// let bytes = ZBytes::from(buf.clone());
+    /// let deser: Vec<u8> = bytes.into();
+    /// assert_eq!(buf.as_slice(), deser.as_slice());
+    /// ```
+    ///
+    /// If you want to be sure that no copy is performed at all, then you should use [`ZBytes::slices`].
+    /// Please note that in this case data may not be contiguous in memory and it is the responsibility of the user to properly parse the raw slices.
     pub fn into<'a, T>(&'a self) -> T
     where
         ZSerde: Deserialize<T, Input<'a> = &'a ZBytes, Error = Infallible>,
@@ -320,7 +458,7 @@ impl ZBytes {
         ZSerde.deserialize(self).unwrap_infallible()
     }
 
-    /// Infallibly deserialize an object of type `T` from a [`Value`] using the [`ZSerde`].
+    /// Infallibly deserialize an object of type `T` using the [`ZSerde`].
     pub fn into_mut<'a, T>(&'a mut self) -> T
     where
         ZSerde: Deserialize<T, Input<'a> = &'a mut ZBytes, Error = Infallible>,
@@ -553,7 +691,7 @@ where
 }
 
 /// The default serializer for [`ZBytes`]. It supports primitives types, such as: `Vec<u8>`, `uX`, `iX`, `fX`, `String`, `bool`.
-/// It also supports common Rust serde values like `serde_json::Value`.
+/// It also supports common Rust serde values like [`serde_json::Value`].
 ///
 /// **NOTE:** Zenoh semantic and protocol take care of sending and receiving bytes without restricting the actual data types.
 /// [`ZSerde`] is the default serializer/deserializer provided for convenience to the users to deal with primitives data types via
@@ -1164,6 +1302,7 @@ impl From<&mut Cow<'_, str>> for ZBytes {
     }
 }
 
+/// See [`Deserialize<Cow<'a, [u8]>>`] for guarantees on copies.
 impl<'a> Deserialize<Cow<'a, str>> for ZSerde {
     type Input<'b> = &'a ZBytes;
     type Error = Utf8Error;