Skip to content

Commit

Permalink
Merge pull request #1354 from eclipse-zenoh/dev/bytes_raw
Browse files Browse the repository at this point in the history
Add ZBytes::slices
  • Loading branch information
Mallets authored Sep 4, 2024
2 parents 4bcf093 + 7b5f9ae commit 60af274
Show file tree
Hide file tree
Showing 2 changed files with 165 additions and 9 deletions.
19 changes: 18 additions & 1 deletion examples/examples/z_bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ fn main() {
// Corresponding encoding to be used in operations like `.put()`, `.reply()`, etc.
// let encoding = Encoding::ZENOH_STRING;

// Cow
// Cow<str>
// See [`zenoh::bytes::ZBytes`] documentation for zero-copy behaviour.
let input = Cow::from("test");
let payload = ZBytes::from(&input);
let output: Cow<str> = payload.deserialize().unwrap();
Expand All @@ -49,6 +50,15 @@ fn main() {
// Corresponding encoding to be used in operations like `.put()`, `.reply()`, etc.
// let encoding = Encoding::ZENOH_BYTES;

// Cow<[u8]>
// See [`zenoh::bytes::ZBytes`] documentation for zero-copy behaviour.
let input = Cow::from(vec![1, 2, 3, 4]);
let payload = ZBytes::from(&input);
let output: Cow<[u8]> = payload.into();
assert_eq!(input, output);
// Corresponding encoding to be used in operations like `.put()`, `.reply()`, etc.
// let encoding = Encoding::ZENOH_BYTES;

// Writer & Reader
// serialization
let mut bytes = ZBytes::empty();
Expand Down Expand Up @@ -81,6 +91,13 @@ fn main() {
assert_eq!(input[idx], value.unwrap());
}

// Iterator RAW
let input: [i32; 4] = [1, 2, 3, 4];
let payload = ZBytes::from_iter(input.iter());
for slice in payload.slices() {
println!("{:02x?}", slice);
}

// HashMap
let mut input: HashMap<usize, String> = HashMap::new();
input.insert(0, String::from("abc"));
Expand Down
155 changes: 147 additions & 8 deletions zenoh/src/api/bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ pub trait Deserialize<T> {
///
/// `ZBytes` provides convenient methods to the user for serialization/deserialization based on the default Zenoh serializer [`ZSerde`].
///
/// **NOTE:** Zenoh semantic and protocol take care of sending and receiving bytes without restricting the actual data types.
/// **NOTE 1:** Zenoh semantic and protocol take care of sending and receiving bytes without restricting the actual data types.
/// [`ZSerde`] is the default serializer/deserializer provided for convenience to the users to deal with primitives data types via
/// a simple out-of-the-box encoding. [`ZSerde`] is **NOT** by any means the only serializer/deserializer users can use nor a limitation
/// to the types supported by Zenoh. Users are free and encouraged to use any serializer/deserializer of their choice like *serde*,
Expand Down Expand Up @@ -185,6 +185,40 @@ pub trait Deserialize<T> {
/// assert_eq!(start, end);
/// ```
///
/// **NOTE 2:** `ZBytes` may store data in non-contiguous regions of memory.
/// The typical case for `ZBytes` to store data in different memory regions is when data is received fragmented from the network.
/// The user then can decided to use [`ZBytes::deserialize`], [`ZBytes::reader`], [`ZBytes::into`], or [`ZBytes::slices`] depending
/// on their needs.
///
/// To directly access raw data as contiguous slice it is preferred to convert `ZBytes` into a [`std::borrow::Cow<[u8]>`].
/// If `ZBytes` contains all the data in a single memory location, this is guaranteed to be zero-copy. This is the common case for small messages.
/// If `ZBytes` contains data scattered in different memory regions, this operation will do an allocation and a copy. This is the common case for large messages.
///
/// Example:
/// ```rust
/// use std::borrow::Cow;
/// use zenoh::bytes::ZBytes;
///
/// let buf: Vec<u8> = vec![0, 1, 2, 3];
/// let bytes = ZBytes::from(buf.clone());
/// let deser: Cow<[u8]> = bytes.into();
/// assert_eq!(buf.as_slice(), deser.as_ref());
/// ```
///
/// It is also possible to iterate over the raw data that may be scattered on different memory regions.
/// Please note that no guarantee is provided on the internal memory layout of [`ZBytes`] nor on how many slices a given [`ZBytes`] will be composed of.
/// The only provided guarantee is on the bytes order that is preserved.
///
/// Example:
/// ```rust
/// use zenoh::bytes::ZBytes;
///
/// let buf: Vec<u8> = vec![0, 1, 2, 3];
/// let bytes = ZBytes::from(buf.clone());
/// for slice in bytes.slices() {
/// println!("{:02x?}", slice);
/// }
/// ```
#[repr(transparent)]
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct ZBytes(ZBuf);
Expand All @@ -208,7 +242,7 @@ impl ZBytes {
self.0.is_empty()
}

/// Returns the length of the ZBytes.
/// Returns the total number of bytes in the ZBytes.
pub fn len(&self) -> usize {
self.0.len()
}
Expand All @@ -229,11 +263,25 @@ impl ZBytes {
}

/// Get a [`ZBytesWriter`] implementing [`std::io::Write`] trait.
///
/// See [`ZBytesWriter`] on how to chain the serialization of different types into a single [`ZBytes`].
pub fn writer(&mut self) -> ZBytesWriter<'_> {
ZBytesWriter(self.0.writer())
}

/// Get a [`ZBytesReader`] implementing [`std::io::Read`] trait.
/// Get a [`ZBytesIterator`] that deserializes a sequence of `T`.
///
/// Example:
/// ```rust
/// use zenoh::bytes::ZBytes;
///
/// let list: Vec<f32> = vec![1.1, 2.2, 3.3];
/// let mut zbs = ZBytes::from_iter(list.iter());
///
/// for (index, elem) in zbs.iter::<f32>().enumerate() {
/// assert_eq!(list[index], elem.unwrap());
/// }
/// ```
pub fn iter<T>(&self) -> ZBytesIterator<'_, T>
where
for<'b> ZSerde: Deserialize<T, Input<'b> = &'b ZBytes>,
Expand All @@ -245,6 +293,62 @@ impl ZBytes {
}
}

/// Return an iterator on raw bytes slices contained in the [`ZBytes`].
///
/// [`ZBytes`] may store data in non-contiguous regions of memory, this iterator
/// then allows to access raw data directly without any attempt of deserializing it.
/// Please note that no guarantee is provided on the internal memory layout of [`ZBytes`].
/// The only provided guarantee is on the bytes order that is preserved.
///
/// Please note that [`ZBytes::iter`] will perform deserialization while iterating while [`ZBytes::slices`] will not.
///
/// ```rust
/// use std::io::Write;
/// use zenoh::bytes::ZBytes;
///
/// let buf1: Vec<u8> = vec![1, 2, 3];
/// let buf2: Vec<u8> = vec![4, 5, 6, 7, 8];
/// let mut zbs = ZBytes::empty();
/// let mut writer = zbs.writer();
/// writer.write(&buf1);
/// writer.write(&buf2);
///
/// // Access the raw content
/// for slice in zbs.slices() {
/// println!("{:02x?}", slice);
/// }
///
/// // Concatenate input in a single vector
/// let buf: Vec<u8> = buf1.into_iter().chain(buf2.into_iter()).collect();
/// // Concatenate raw bytes in a single vector
/// let out: Vec<u8> = zbs.slices().fold(Vec::new(), |mut b, x| { b.extend_from_slice(x); b });
/// // The previous line is the equivalent of
/// // let out: Vec<u8> = zbs.into();
/// assert_eq!(buf, out);
/// ```
///
/// The example below shows how the [`ZBytesWriter::append`] simply appends the slices of one [`ZBytes`]
/// to another and how those slices can be iterated over to access the raw data.
/// ```rust
/// use std::io::Write;
/// use zenoh::bytes::ZBytes;
///
/// let buf1: Vec<u8> = vec![1, 2, 3];
/// let buf2: Vec<u8> = vec![4, 5, 6, 7, 8];
///
/// let mut zbs = ZBytes::empty();
/// let mut writer = zbs.writer();
/// writer.append(ZBytes::from(buf1.clone()));
/// writer.append(ZBytes::from(buf2.clone()));
///
/// let mut iter = zbs.slices();
/// assert_eq!(buf1.as_slice(), iter.next().unwrap());
/// assert_eq!(buf2.as_slice(), iter.next().unwrap());
/// ```
pub fn slices(&self) -> impl Iterator<Item = &[u8]> {
self.0.slices()
}

/// Serialize an object of type `T` as a [`ZBytes`] using the [`ZSerde`].
///
/// ```rust
Expand Down Expand Up @@ -293,7 +397,11 @@ impl ZBytes {
ZSerde.serialize(t)
}

/// Deserialize an object of type `T` from a [`Value`] using the [`ZSerde`].
/// Deserialize an object of type `T` using [`ZSerde`].
///
/// See [`ZBytes::serialize`] and [`ZBytes::try_serialize`] for the examples.
///
/// See [`ZBytes::into`] for infallible conversion, e.g. to get raw bytes.
pub fn deserialize<'a, T>(&'a self) -> Result<T, <ZSerde as Deserialize<T>>::Error>
where
ZSerde: Deserialize<T, Input<'a> = &'a ZBytes>,
Expand All @@ -302,7 +410,7 @@ impl ZBytes {
ZSerde.deserialize(self)
}

/// Deserialize an object of type `T` from a [`Value`] using the [`ZSerde`].
/// Deserialize an object of type `T` using [`ZSerde`].
pub fn deserialize_mut<'a, T>(&'a mut self) -> Result<T, <ZSerde as Deserialize<T>>::Error>
where
ZSerde: Deserialize<T, Input<'a> = &'a mut ZBytes>,
Expand All @@ -311,7 +419,37 @@ impl ZBytes {
ZSerde.deserialize(self)
}

/// Infallibly deserialize an object of type `T` from a [`Value`] using the [`ZSerde`].
/// Infallibly deserialize an object of type `T` using [`ZSerde`].
///
/// To directly access raw data as contiguous slice it is preferred to convert `ZBytes` into a [`std::borrow::Cow<[u8]>`](`std::borrow::Cow`).
/// If [`ZBytes`] contains all the data in a single memory location, then it is guaranteed to be zero-copy. This is the common case for small messages.
/// If [`ZBytes`] contains data scattered in different memory regions, this operation will do an allocation and a copy. This is the common case for large messages.
///
/// ```rust
/// use std::borrow::Cow;
/// use zenoh::bytes::ZBytes;
///
/// let buf: Vec<u8> = vec![0, 1, 2, 3];
/// let bytes = ZBytes::from(buf.clone());
/// let deser: Cow<[u8]> = bytes.into();
/// assert_eq!(buf.as_slice(), deser.as_ref());
/// ```
///
/// An alternative is to convert `ZBytes` into a [`std::vec::Vec<u8>`].
/// Converting to [`std::vec::Vec<u8>`] will always allocate and make a copy.
///
/// ```rust
/// use std::borrow::Cow;
/// use zenoh::bytes::ZBytes;
///
/// let buf: Vec<u8> = vec![0, 1, 2, 3];
/// let bytes = ZBytes::from(buf.clone());
/// let deser: Vec<u8> = bytes.into();
/// assert_eq!(buf.as_slice(), deser.as_slice());
/// ```
///
/// If you want to be sure that no copy is performed at all, then you should use [`ZBytes::slices`].
/// Please note that in this case data may not be contiguous in memory and it is the responsibility of the user to properly parse the raw slices.
pub fn into<'a, T>(&'a self) -> T
where
ZSerde: Deserialize<T, Input<'a> = &'a ZBytes, Error = Infallible>,
Expand All @@ -320,7 +458,7 @@ impl ZBytes {
ZSerde.deserialize(self).unwrap_infallible()
}

/// Infallibly deserialize an object of type `T` from a [`Value`] using the [`ZSerde`].
/// Infallibly deserialize an object of type `T` using the [`ZSerde`].
pub fn into_mut<'a, T>(&'a mut self) -> T
where
ZSerde: Deserialize<T, Input<'a> = &'a mut ZBytes, Error = Infallible>,
Expand Down Expand Up @@ -553,7 +691,7 @@ where
}

/// The default serializer for [`ZBytes`]. It supports primitives types, such as: `Vec<u8>`, `uX`, `iX`, `fX`, `String`, `bool`.
/// It also supports common Rust serde values like `serde_json::Value`.
/// It also supports common Rust serde values like [`serde_json::Value`].
///
/// **NOTE:** Zenoh semantic and protocol take care of sending and receiving bytes without restricting the actual data types.
/// [`ZSerde`] is the default serializer/deserializer provided for convenience to the users to deal with primitives data types via
Expand Down Expand Up @@ -1164,6 +1302,7 @@ impl From<&mut Cow<'_, str>> for ZBytes {
}
}

/// See [`Deserialize<Cow<'a, [u8]>>`] for guarantees on copies.
impl<'a> Deserialize<Cow<'a, str>> for ZSerde {
type Input<'b> = &'a ZBytes;
type Error = Utf8Error;
Expand Down

0 comments on commit 60af274

Please sign in to comment.