From 7d9d684cf953f9f813eb320812eaf95f767ef200 Mon Sep 17 00:00:00 2001 From: Pierre Avital Date: Mon, 26 Feb 2024 17:33:16 +0100 Subject: [PATCH] verbatim and intersect supports added to keformat (#752) * verbatim and intersect supports added to keformat * improve documentation and remove dbg * improve documentation and make codegen better * one more no_std check missing * address review --- .../zenoh-keyexpr/src/key_expr/borrowed.rs | 266 ++++++++++++++- .../zenoh-keyexpr/src/key_expr/format/mod.rs | 146 ++++++++- .../src/key_expr/format/parsing.rs | 309 ++++++++++-------- .../src/key_expr/format/support.rs | 27 ++ .../src/keyexpr_tree/arc_tree.rs | 13 + .../src/keyexpr_tree/iters/inclusion.rs | 8 +- .../src/keyexpr_tree/iters/intersection.rs | 4 +- commons/zenoh-keyexpr/src/lib.rs | 33 +- commons/zenoh-macros/src/lib.rs | 52 ++- examples/examples/z_formats.rs | 6 +- zenoh/src/key_expr.rs | 34 +- zenoh/src/lib.rs | 2 +- 12 files changed, 719 insertions(+), 181 deletions(-) diff --git a/commons/zenoh-keyexpr/src/key_expr/borrowed.rs b/commons/zenoh-keyexpr/src/key_expr/borrowed.rs index ca3a4c7bbc..85b4ef79e2 100644 --- a/commons/zenoh-keyexpr/src/key_expr/borrowed.rs +++ b/commons/zenoh-keyexpr/src/key_expr/borrowed.rs @@ -11,13 +11,12 @@ // Contributors: // ZettaScale Zenoh Team, // + use super::{canon::Canonizable, OwnedKeyExpr, FORBIDDEN_CHARS}; -// use crate::core::WireExpr; use alloc::{ borrow::{Borrow, ToOwned}, format, string::String, - vec, vec::Vec, }; use core::{ @@ -44,7 +43,7 @@ use zenoh_result::{bail, Error as ZError, ZResult}; /// * Two sets A and B are equal if all A includes B and B includes A. The Key Expression language is designed so that string equality is equivalent to set equality. #[allow(non_camel_case_types)] #[repr(transparent)] -#[derive(PartialEq, Eq, Hash)] +#[derive(PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct keyexpr(str); impl keyexpr { @@ -129,6 +128,11 @@ impl keyexpr { self.0.contains(super::SINGLE_WILD as char) } + pub(crate) const fn is_double_wild(&self) -> bool { + let bytes = self.0.as_bytes(); + bytes.len() == 2 && bytes[0] == b'*' + } + /// Returns the longest prefix of `self` that doesn't contain any wildcard character (`**` or `$*`). /// /// NOTE: this operation can typically be used in a backend implementation, at creation of a Storage to get the keys prefix, @@ -222,7 +226,7 @@ impl keyexpr { /// ); /// ``` pub fn strip_prefix(&self, prefix: &Self) -> Vec<&keyexpr> { - let mut result = vec![]; + let mut result = alloc::vec![]; 'chunks: for i in (0..=self.len()).rev() { if if i == self.len() { self.ends_with("**") @@ -265,8 +269,8 @@ impl keyexpr { result } - pub fn as_str(&self) -> &str { - self + pub const fn as_str(&self) -> &str { + &self.0 } /// # Safety @@ -274,7 +278,7 @@ impl keyexpr { /// /// Much like [`core::str::from_utf8_unchecked`], this is memory-safe, but calling this without maintaining /// [`keyexpr`]'s invariants yourself may lead to unexpected behaviors, the Zenoh network dropping your messages. - pub unsafe fn from_str_unchecked(s: &str) -> &Self { + pub const unsafe fn from_str_unchecked(s: &str) -> &Self { core::mem::transmute(s) } @@ -286,11 +290,249 @@ impl keyexpr { pub unsafe fn from_slice_unchecked(s: &[u8]) -> &Self { core::mem::transmute(s) } - pub fn chunks(&self) -> impl Iterator + DoubleEndedIterator { - self.split('/').map(|c| unsafe { - // Any chunk of a valid KE is itself a valid KE => we can safely call the unchecked constructor. - Self::from_str_unchecked(c) - }) + pub const fn chunks(&self) -> Chunks { + Chunks { + inner: self.as_str(), + } + } + pub(crate) fn next_delimiter(&self, i: usize) -> Option { + self.as_str() + .get(i + 1..) + .and_then(|s| s.find('/').map(|j| i + 1 + j)) + } + pub(crate) fn previous_delimiter(&self, i: usize) -> Option { + self.as_str().get(..i).and_then(|s| s.rfind('/')) + } + pub(crate) fn first_byte(&self) -> u8 { + unsafe { *self.as_bytes().get_unchecked(0) } + } + pub(crate) fn iter_splits_ltr(&self) -> SplitsLeftToRight { + SplitsLeftToRight { + inner: self, + index: 0, + } + } + pub(crate) fn iter_splits_rtl(&self) -> SplitsRightToLeft { + SplitsRightToLeft { + inner: self, + index: self.len(), + } + } +} +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub(crate) struct SplitsLeftToRight<'a> { + inner: &'a keyexpr, + index: usize, +} +impl<'a> SplitsLeftToRight<'a> { + fn right(&self) -> &'a str { + &self.inner[self.index + ((self.index != 0) as usize)..] + } + fn left(&self, followed_by_double: bool) -> &'a str { + &self.inner[..(self.index + ((self.index != 0) as usize + 2) * followed_by_double as usize)] + } +} +impl<'a> Iterator for SplitsLeftToRight<'a> { + type Item = (&'a keyexpr, &'a keyexpr); + fn next(&mut self) -> Option { + match self.index < self.inner.len() { + false => None, + true => { + let right = self.right(); + let double_wild = right.starts_with("**"); + let left = self.left(double_wild); + self.index = if left.is_empty() { + self.inner.next_delimiter(0).unwrap_or(self.inner.len()) + } else { + self.inner + .next_delimiter(left.len()) + .unwrap_or(self.inner.len() + (left.len() == self.inner.len()) as usize) + }; + if left.is_empty() { + self.next() + } else { + // SAFETY: because any keyexpr split at `/` becomes 2 valid keyexprs by design, it's safe to assume the constraint is valid once both sides have been validated to not be empty. + (!right.is_empty()).then(|| unsafe { + ( + keyexpr::from_str_unchecked(left), + keyexpr::from_str_unchecked(right), + ) + }) + } + } + } + } +} +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub(crate) struct SplitsRightToLeft<'a> { + inner: &'a keyexpr, + index: usize, +} +impl<'a> SplitsRightToLeft<'a> { + fn right(&self, followed_by_double: bool) -> &'a str { + &self.inner[(self.index + - ((self.index != self.inner.len()) as usize + 2) * followed_by_double as usize)..] + } + fn left(&self) -> &'a str { + &self.inner[..(self.index - ((self.index != self.inner.len()) as usize))] + } +} +impl<'a> Iterator for SplitsRightToLeft<'a> { + type Item = (&'a keyexpr, &'a keyexpr); + fn next(&mut self) -> Option { + match self.index { + 0 => None, + _ => { + let left = self.left(); + let double_wild = left.ends_with("**"); + let right = self.right(double_wild); + self.index = if right.is_empty() { + self.inner + .previous_delimiter(self.inner.len()) + .map_or(0, |n| n + 1) + } else { + self.inner + .previous_delimiter( + self.inner.len() + - right.len() + - (self.inner.len() != right.len()) as usize, + ) + .map_or(0, |n| n + 1) + }; + if right.is_empty() { + self.next() + } else { + // SAFETY: because any keyexpr split at `/` becomes 2 valid keyexprs by design, it's safe to assume the constraint is valid once both sides have been validated to not be empty. + (!left.is_empty()).then(|| unsafe { + ( + keyexpr::from_str_unchecked(left), + keyexpr::from_str_unchecked(right), + ) + }) + } + } + } + } +} +#[test] +fn splits() { + let ke = keyexpr::new("a/**/b/c").unwrap(); + let mut splits = ke.iter_splits_ltr(); + assert_eq!( + splits.next(), + Some(( + keyexpr::new("a/**").unwrap(), + keyexpr::new("**/b/c").unwrap() + )) + ); + assert_eq!( + splits.next(), + Some((keyexpr::new("a/**/b").unwrap(), keyexpr::new("c").unwrap())) + ); + assert_eq!(splits.next(), None); + let mut splits = ke.iter_splits_rtl(); + assert_eq!( + splits.next(), + Some((keyexpr::new("a/**/b").unwrap(), keyexpr::new("c").unwrap())) + ); + assert_eq!( + splits.next(), + Some(( + keyexpr::new("a/**").unwrap(), + keyexpr::new("**/b/c").unwrap() + )) + ); + assert_eq!(splits.next(), None); + let ke = keyexpr::new("**").unwrap(); + let mut splits = ke.iter_splits_ltr(); + assert_eq!( + splits.next(), + Some((keyexpr::new("**").unwrap(), keyexpr::new("**").unwrap())) + ); + assert_eq!(splits.next(), None); + let ke = keyexpr::new("ab").unwrap(); + let mut splits = ke.iter_splits_ltr(); + assert_eq!(splits.next(), None); + let ke = keyexpr::new("ab/cd").unwrap(); + let mut splits = ke.iter_splits_ltr(); + assert_eq!( + splits.next(), + Some((keyexpr::new("ab").unwrap(), keyexpr::new("cd").unwrap())) + ); + assert_eq!(splits.next(), None); + for (i, ke) in crate::fuzzer::KeyExprFuzzer(rand::thread_rng()) + .take(100) + .enumerate() + { + dbg!(i, &ke); + let splits = ke.iter_splits_ltr().collect::>(); + assert_eq!(splits, { + let mut rtl_rev = ke.iter_splits_rtl().collect::>(); + rtl_rev.reverse(); + rtl_rev + }); + assert!(!splits + .iter() + .any(|s| s.0.ends_with('/') || s.1.starts_with('/'))); + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Chunks<'a> { + inner: &'a str, +} +impl<'a> Chunks<'a> { + /// Convert the remaining part of the iterator to a keyexpr if it is not empty. + pub const fn as_keyexpr(self) -> Option<&'a keyexpr> { + match self.inner.is_empty() { + true => None, + _ => Some(unsafe { keyexpr::from_str_unchecked(self.inner) }), + } + } + /// Peek at the next chunk without consuming it. + pub fn peek(&self) -> Option<&keyexpr> { + if self.inner.is_empty() { + None + } else { + Some(unsafe { + keyexpr::from_str_unchecked( + &self.inner[..self.inner.find('/').unwrap_or(self.inner.len())], + ) + }) + } + } + /// Peek at the last chunk without consuming it. + pub fn peek_back(&self) -> Option<&keyexpr> { + if self.inner.is_empty() { + None + } else { + Some(unsafe { + keyexpr::from_str_unchecked( + &self.inner[self.inner.rfind('/').map_or(0, |i| i + 1)..], + ) + }) + } + } +} +impl<'a> Iterator for Chunks<'a> { + type Item = &'a keyexpr; + fn next(&mut self) -> Option { + if self.inner.is_empty() { + return None; + } + let (next, inner) = self.inner.split_once('/').unwrap_or((self.inner, "")); + self.inner = inner; + Some(unsafe { keyexpr::from_str_unchecked(next) }) + } +} +impl<'a> DoubleEndedIterator for Chunks<'a> { + fn next_back(&mut self) -> Option { + if self.inner.is_empty() { + return None; + } + let (inner, next) = self.inner.rsplit_once('/').unwrap_or(("", self.inner)); + self.inner = inner; + Some(unsafe { keyexpr::from_str_unchecked(next) }) } } diff --git a/commons/zenoh-keyexpr/src/key_expr/format/mod.rs b/commons/zenoh-keyexpr/src/key_expr/format/mod.rs index dbdf0e6446..9a39fbeee1 100644 --- a/commons/zenoh-keyexpr/src/key_expr/format/mod.rs +++ b/commons/zenoh-keyexpr/src/key_expr/format/mod.rs @@ -12,10 +12,45 @@ // ZettaScale Zenoh Team, // +//! # Building and parsing Key Expressions +//! A common issue in REST API is the association of meaning to sections of the URL, and respecting that API in a convenient manner. +//! The same issue arises naturally when designing a KE space, and [`KeFormat`] was designed to help you with this, +//! both in constructing and in parsing KEs that fit the formats you've defined. +//! +//! [`kedefine`](https://docs.rs/zenoh/0.10.1-rc/zenoh/macro.kedefine.html) also allows you to define formats at compile time, allowing a more performant, but more importantly safer and more convenient use of said formats, +//! as the [`keformat`](https://docs.rs/zenoh/0.10.1-rc/zenoh/macro.keformat.html) and [`kewrite`](https://docs.rs/zenoh/0.10.1-rc/zenoh/macro.kewrite.html) macros will be able to tell you if you're attempting to set fields of the format that do not exist. +//! +//! ## The format syntax +//! KE formats are defined following a syntax that extends the [`keyexpr`] syntax. In addition to existing chunk types, KE formmats support "specification" chunks. +//! These chunks must follow the one of the following syntaxes: `${id:pattern}`, `${id:pattern#default}`, `$#{id:pattern}#`, or `$#{id:pattern#default}#`, where: +//! - `id` is the chunk identifer: it cannot contain the `:` character, and is used to name the chunk in accessors. +//! - `pattern` must be a valid KE (and therefore cannot contain `#`) and defines the range of values that the chunk may adopt. +//! - `default` (optional) is used as the chunk value when formatting if the builder wasn't supplied with a value for `id`. +//! +//! ## Formatting +//! To use a format to build a Key Expression, its [formatter](KeFormat::formatter) must be constructed. +//! +//! A formatter functions like as an `id`-value map which can be [`KeFormatter::build`] into a [`OwnedKeyExpr`] once all specs have a value. +//! +//! The formatter will notably prevent you from setting values for a spec that isn't included by its pattern. +//! +//! ## Parsing +//! [`KeFormat`] can also be used to parse any [`keyexpr`] that intersects with it, using [`KeFormat::parse`]. +//! +//! The parser will then assign subsections of the [`keyexpr`] to each spec, and the resulting [`Parsed`] result can then be queried +//! for each spec's assigned value. +//! +//! Specs are considered greedy and evaluated left-to-right: if your format would allow ambiguous parsings, chunks will be consumed +//! by the leftmost specs first. For example `${a:**}/-/${b:**}` parsing `hey/-/-/there` would assign `hey/-` to `a` and `there` to `b`, +//! (even though you might have expected `a` to only consume `hey` and `b` to consume the remaining `-/there`). +//! +//! A good way to avoid ambiguities when working with formats that contain multiple `**` specs is to separate such specs using verbatim chunks +//! (chunks that start with an `@`), as `**` is incapable of consuming these chunks. + use alloc::{boxed::Box, string::String, vec::Vec}; use core::{ convert::{TryFrom, TryInto}, - fmt::Display, + fmt::{Debug, Display}, num::NonZeroU32, }; @@ -27,25 +62,66 @@ mod support; pub use support::{IKeFormatStorage, Segment}; use support::{IterativeConstructor, Spec}; -/// A utility to define Key Expression (KE) formats. +/// # Building and parsing Key Expressions +/// A common issue in REST API is the association of meaning to sections of the URL, and respecting that API in a convenient manner. +/// The same issue arises naturally when designing a KE space, and [`KeFormat`] was designed to help you with this, +/// both in constructing and in parsing KEs that fit the formats you've defined. +/// +/// [`zenoh::kedefine`](https://docs.rs/zenoh/0.10.1-rc/zenoh/macro.kedefine.html) also allows you to define formats at compile time, allowing a more performant, but more importantly safer and more convenient use of said formats, +/// as the [`zenoh::keformat`](https://docs.rs/zenoh/0.10.1-rc/zenoh/macro.keformat.html) and [`zenoh::kewrite`](https://docs.rs/zenoh/0.10.1-rc/zenoh/macro.kewrite.html) macros will be able to tell you if you're attempting to set fields of the format that do not exist. +/// +/// ## The format syntax +/// KE formats are defined following a syntax that extends the [`keyexpr`] syntax. In addition to existing chunk types, KE formmats support "specification" chunks. +/// These chunks must follow the one of the following syntaxes: `${id:pattern}`, `${id:pattern#default}`, `$#{id:pattern}#`, or `$#{id:pattern#default}#`, where: +/// - `id` is the chunk identifer: it cannot contain the `:` character, and is used to name the chunk in accessors. +/// - `pattern` must be a valid KE (and therefore cannot contain `#`) and defines the range of values that the chunk may adopt. +/// - `default` (optional) is used as the chunk value when formatting if the builder wasn't supplied with a value for `id`. +/// +/// ## Formatting +/// To use a format to build a Key Expression, its [formatter](KeFormat::formatter) must be constructed. +/// +/// A formatter functions like as an `id`-value map which can be [`KeFormatter::build`] into a [`OwnedKeyExpr`] once all specs have a value. +/// +/// The formatter will notably prevent you from setting values for a spec that isn't included by its pattern. +/// +/// ## Parsing +/// [`KeFormat`] can also be used to parse any [`keyexpr`] that intersects with it, using [`KeFormat::parse`]. /// -/// Formats are written like KEs, except sections can be substituted for specs using the `${id:pattern#default}` format to define fields. -/// `id` is the name of the field that gets encoded in that section, it must be non-empty and will stop at the first encountered `:`. -/// `pattern` is a KE pattern that any value set for that field must match. It stops at the first encountered `#` or end of spec. -/// `default` is optional, and lets you specify a value at construction for the field. +/// The parser will then assign subsections of the [`keyexpr`] to each spec, and the resulting [`Parsed`] result can then be queried +/// for each spec's assigned value. /// -/// Note that the spec is considered to end at the first encountered `}`; if you need your id, pattern or default to contain `}`, you may use `$#{spec}#. +/// Specs are considered greedy and evaluated left-to-right: if your format would allow ambiguous parsings, chunks will be consumed +/// by the leftmost specs first. For example `${a:**}/-/${b:**}` parsing `hey/-/-/there` would assign `hey/-` to `a` and `there` to `b`, +/// (even though you might have expected `a` to only consume `hey` and `b` to consume the remaining `-/there`). /// -/// Specs may only be preceded and followed by `/`. -#[derive(Debug, Clone, Copy, Hash)] +/// A good way to avoid ambiguities when working with formats that contain multiple `**` specs is to separate such specs using verbatim chunks +/// (chunks that start with an `@`), as `**` is incapable of consuming these chunks. +#[derive(Clone, Copy, Hash)] pub struct KeFormat<'s, Storage: IKeFormatStorage<'s> + 's = Vec>> { + /// The [`[Segment]`](Segment)s of the format. storage: Storage, + /// The end of the format. It may be one of 3 cases: + /// - An empty string, in which case the format ends with the last segment. + /// - A keyexpr preceded by `/`. + /// - A keyexpr, in the case the format contains no specs. suffix: &'s str, } +impl<'s, Storage: IKeFormatStorage<'s>> Debug for KeFormat<'s, Storage> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "{self}") + } +} impl<'s> KeFormat<'s, Vec>> { + /// Construct a new [`KeFormat`], using a vector to store its state-machine and parser results. pub fn new + ?Sized>(value: &'s S) -> ZResult { value.as_ref().try_into() } + /// Construct a new [`KeFormat], using a stack-allocated array to store its state-machine and parser results. + /// + /// `N` is simply the number of specifications in `value`. If this number of specs isn't known at compile-time, use [`KeFormat::new`] instead. + /// + /// If you know `value` at compile time, using [`zenoh::kedefine`](https://docs.rs/zenoh/0.10.1-rc/zenoh/macro.kedefine.html) instead is advised, + /// as it will provide more features and construct higher performance formats than this constructor. pub fn noalloc_new(value: &'s str) -> ZResult; N]>> { value.try_into() } @@ -138,6 +214,7 @@ pub mod macro_support { } } impl<'s, Storage: IKeFormatStorage<'s> + 's> KeFormat<'s, Storage> { + /// Constructs a new formatter for the format. pub fn formatter(&'s self) -> KeFormatter<'s, Storage> { KeFormatter { format: self, @@ -216,10 +293,11 @@ impl<'s, Storage: IKeFormatStorage<'s> + 's> TryFrom<&'s str> for KeFormat<'s, S bail!("Invalid KeFormat: {value} contains duplicated ids") } } - Ok(KeFormat { - storage, - suffix: &value[segment_start..], - }) + let suffix = &value[segment_start..]; + if suffix.contains('*') { + bail!("Invalid KeFormat: wildcards are only allowed in specs when writing formats") + } + Ok(KeFormat { storage, suffix }) } } @@ -258,11 +336,24 @@ impl<'s, Storage: IKeFormatStorage<'s> + 's> core::fmt::Display for KeFormat<'s, write!(f, "{}", self.suffix) } } + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +struct NonMaxU32(NonZeroU32); +impl NonMaxU32 { + fn new(value: u32) -> Option { + NonZeroU32::new(!value).map(NonMaxU32) + } + fn get(&self) -> u32 { + !self.0.get() + } +} + +/// An active formatter for a [`KeFormat`]. #[derive(Clone)] pub struct KeFormatter<'s, Storage: IKeFormatStorage<'s>> { format: &'s KeFormat<'s, Storage>, buffer: String, - values: Storage::ValuesStorage>, + values: Storage::ValuesStorage>, } impl<'s, Storage: IKeFormatStorage<'s>> core::fmt::Debug for KeFormatter<'s, Storage> { @@ -358,9 +449,12 @@ impl core::fmt::Display for FormatSetError { } impl IError for FormatSetError {} impl<'s, Storage: IKeFormatStorage<'s>> KeFormatter<'s, Storage> { + /// Access the formatter's format pub fn format(&self) -> &KeFormat<'s, Storage> { self.format } + + /// Clear the formatter of previously set values, without deallocating its internal formatting buffer. pub fn clear(&mut self) -> &mut Self { self.buffer.clear(); for value in self.values.as_mut() { @@ -368,9 +462,15 @@ impl<'s, Storage: IKeFormatStorage<'s>> KeFormatter<'s, Storage> { } self } + + /// Build a key-expression according to the format and the currently set values. + /// + /// This doesn't clear the formatter of already set values, allowing to reuse the builder and only + /// change a subset of its properties before building a new key-expression again. pub fn build(&self) -> ZResult { self.try_into() } + /// Access the current value for `id`. pub fn get(&self, id: &str) -> Option<&str> { let segments = self.format.storage.segments(); segments @@ -381,6 +481,12 @@ impl<'s, Storage: IKeFormatStorage<'s>> KeFormatter<'s, Storage> { .map(|(start, end)| &self.buffer[start as usize..end.get() as usize]) }) } + /// Set a new value for `id` using `S`'s [`Display`] formatting. + /// + /// # Errors + /// If the result of `format!("{value}")` is neither: + /// - A valid key expression that is included by the pattern for `id` + /// - An empty string, on the condition that `id`'s pattern is `**` pub fn set(&mut self, id: &str, value: S) -> Result<&mut Self, FormatSetError> { use core::fmt::Write; let segments = self.format.storage.segments(); @@ -397,7 +503,7 @@ impl<'s, Storage: IKeFormatStorage<'s>> KeFormatter<'s, Storage> { continue; } *s -= shift; - *e = NonZeroU32::new(e.get() - shift).unwrap() + *e = NonMaxU32::new(e.get() - shift).unwrap() } } let pattern = segments[i].spec.pattern(); @@ -405,7 +511,11 @@ impl<'s, Storage: IKeFormatStorage<'s>> KeFormatter<'s, Storage> { write!(&mut self.buffer, "{value}").unwrap(); // Writing on `&mut String` should be infallible. match (|| { let end = self.buffer.len(); - if pattern.as_str() != "**" { + if start == end { + if !pattern.is_double_wild() { + return Err(()); + } + } else { let Ok(ke) = keyexpr::new(&self.buffer[start..end]) else { return Err(()); }; @@ -413,9 +523,10 @@ impl<'s, Storage: IKeFormatStorage<'s>> KeFormatter<'s, Storage> { return Err(()); } } + values[i] = Some(( start as u32, - NonZeroU32::new(end.try_into().map_err(|_| ())?).ok_or(())?, + NonMaxU32::new(end.try_into().map_err(|_| ())?).ok_or(())?, )); Ok(()) })() { @@ -428,6 +539,7 @@ impl<'s, Storage: IKeFormatStorage<'s>> KeFormatter<'s, Storage> { } } +/// A [`KeFormat`] that owns its format-string. pub struct OwnedKeFormat + 'static = Vec>> { _owner: Box, format: KeFormat<'static, Storage>, diff --git a/commons/zenoh-keyexpr/src/key_expr/format/parsing.rs b/commons/zenoh-keyexpr/src/key_expr/format/parsing.rs index 18509201ec..52f01c5b6a 100644 --- a/commons/zenoh-keyexpr/src/key_expr/format/parsing.rs +++ b/commons/zenoh-keyexpr/src/key_expr/format/parsing.rs @@ -14,8 +14,8 @@ use zenoh_result::{bail, ZResult}; -use super::{support::trim_suffix_slash, IKeFormatStorage, KeFormat, Segment}; -use crate::key_expr::{format::support::trim_prefix_slash, keyexpr}; +use super::{IKeFormatStorage, KeFormat, Segment}; +use crate::key_expr::keyexpr; pub struct Parsed<'s, Storage: IKeFormatStorage<'s>> { format: &'s KeFormat<'s, Storage>, @@ -23,7 +23,13 @@ pub struct Parsed<'s, Storage: IKeFormatStorage<'s>> { } impl<'s, Storage: IKeFormatStorage<'s>> Parsed<'s, Storage> { - pub fn get(&self, id: &str) -> ZResult> { + /// Access the `id` element. + /// + /// The returned string is guaranteed to either be an empty string or a valid key expression. + /// + /// # Errors + /// If `id` is not part of `self`'s specs. + pub fn get(&self, id: &str) -> ZResult<&'s str> { let Some(i) = self .format .storage @@ -33,11 +39,13 @@ impl<'s, Storage: IKeFormatStorage<'s>> Parsed<'s, Storage> { else { bail!("{} has no {id} field", self.format) }; - Ok(self.results.as_ref()[i]) + Ok(self.results.as_ref()[i].map_or("", keyexpr::as_str)) } + /// The raw values for each spec, in left-to-right order. pub fn values(&self) -> &[Option<&'s keyexpr>] { self.results.as_ref() } + /// Iterates over id-value pairs. pub fn iter(&'s self) -> Iter<'s, Storage> { self.into_iter() } @@ -47,7 +55,11 @@ impl<'s, Storage: IKeFormatStorage<'s>> IntoIterator for &'s Parsed<'s, Storage> type Item = ::Item; type IntoIter = Iter<'s, Storage>; fn into_iter(self) -> Self::IntoIter { - todo!() + Iter { + parsed: self, + start: 0, + end: self.format.storage.segments().len(), + } } } pub struct Iter<'s, Storage: IKeFormatStorage<'s>> { @@ -95,147 +107,124 @@ impl<'s, Storage: IKeFormatStorage<'s>> DoubleEndedIterator for Iter<'s, Storage } impl<'s, Storage: IKeFormatStorage<'s> + 's> KeFormat<'s, Storage> { + /// Parses `target` according to `self`. The returned [`Parsed`] object can be used to extract the values of the fields in `self` from `target`. + /// + /// Parsing is greedy and done left-to-right. Please refer to [`KeFormat`]'s documentation for more details. + /// + /// # Errors + /// If `target` does not intersect with `self`, an error is returned. pub fn parse(&'s self, target: &'s keyexpr) -> ZResult> { let segments = self.storage.segments(); + if segments.is_empty() + && !target.intersects(unsafe { keyexpr::from_str_unchecked(self.suffix) }) + { + bail!("{target} does not intersect with {self}") + } let mut results = self.storage.values_storage(|_| None); - let Some(target) = target.strip_suffix(self.suffix) else { - if !segments.is_empty() - && segments.iter().all(|s| s.spec.pattern() == "**") - && self.suffix.as_bytes()[0] == b'/' - && target == &self.suffix[1..] - { - return Ok(Parsed { - format: self, - results, - }); + let results_mut = results.as_mut(); + debug_assert_eq!(segments.len(), results_mut.len()); + let found = 'a: { + match self.suffix.as_bytes() { + [] => do_parse(Some(target), segments, results_mut), + [b'/', suffix @ ..] => { + let suffix = unsafe { keyexpr::from_slice_unchecked(suffix) }; + for (target, candidate) in target.iter_splits_rtl() { + if suffix.intersects(candidate) + && do_parse(Some(target), segments, results_mut) + { + break 'a true; + } + } + suffix.intersects(target) && do_parse(None, segments, results_mut) + } + _ => { + unreachable!(); + } } - bail!("{target} is not included in {self}") }; - assert_eq!(segments.len(), results.as_mut().len()); - if do_parse(target, segments, results.as_mut()) { + if found { Ok(Parsed { format: self, results, }) } else { - bail!("{target} is not included in {self}") + bail!("{target} does not intersect with {self}") } } } -fn do_parse<'s>( - input: &'s str, - segments: &[Segment<'s>], - results: &mut [Option<&'s keyexpr>], +fn do_parse<'a>( + target: Option<&'a keyexpr>, + segments: &[Segment], + results: &mut [Option<&'a keyexpr>], ) -> bool { - debug_assert!(!input.starts_with('/')); - // Parsing is finished if there are no more segments to process AND the input is now empty. - let [segment, segments @ ..] = segments else { - return input.is_empty(); - }; - let [result, results @ ..] = results else { - unreachable!() - }; - // reset result to None in case of backtracking - *result = None; - // Inspect the pattern: we want to know how many chunks we need to have a chance of inclusion, as well as if we need to worry about double wilds - let pattern = segment.spec.pattern(); - let mut has_double_wilds = false; - let min_chunks = pattern - .split('/') - .filter(|s| { - if *s == "**" { - has_double_wilds = true; - false - } else { - true + match (segments, results) { + ([], []) => target.map_or(true, keyexpr::is_double_wild), + ([segment, segments @ ..], [result, results @ ..]) => { + let prefix = segment.prefix(); + let pattern = segment.pattern(); + // if target is empty + let Some(target) = target else { + // this segment only matches if the pattern is `**` and the prefix is empty (since it cannot be `**`) + if prefix.is_none() && pattern.is_double_wild() { + *result = None; + // the next segments still have to be checked to respect the same condition + return !segments.iter().zip(results).any(|(segment, result)| { + *result = None; + segment.prefix().is_some() || !segment.pattern().is_double_wild() + }); + } else { + return false; + } + }; + macro_rules! try_intersect { + ($pattern: expr, $result: expr, $target: expr, $segments: expr, $results: expr) => {{ + let target = $target; + let segments = $segments; + if $pattern.intersects(target) + && do_parse( + target.is_double_wild().then_some(target), + segments, + $results, + ) + { + *$result = Some(target); + return true; + } + for (candidate, target) in target.iter_splits_rtl() { + if $pattern.intersects(candidate) + && do_parse(Some(target), segments, $results) + { + *result = Some(candidate); + return true; + } + } + if $pattern.is_double_wild() && do_parse(Some(target), segments, $results) { + *$result = None; + return true; + } + }}; } - }) - .count(); - // Since input is /-stripped, we need to strip it from the prefix too. - let prefix = trim_prefix_slash(segment.prefix); - // We handle double-wild segments that may branch in a different function, to keep this one tail-recursive - if has_double_wilds { - return do_parse_doublewild( - input, segments, results, result, pattern, prefix, min_chunks, - ); - } - // Strip the prefix (including the end-/ if the prefix is non-empty) - let Some(input) = input.strip_prefix(prefix) else { - return false; - }; - let mut chunks = 0; - for i in (0..input.len()).filter(|i| input.as_bytes()[*i] == b'/') { - chunks += 1; - if chunks < min_chunks { - continue; - } - let r = keyexpr::new(&input[..i]).expect("any subsection of a keyexpr is a keyexpr"); - if pattern.includes(r) { - *result = Some(r); - return do_parse(trim_prefix_slash(&input[(i + 1)..]), segments, results); - } else { - return false; - } - } - chunks += 1; - if chunks < min_chunks { - return false; - } - let r = keyexpr::new(input).expect("any subsection of a keyexpr is a keyexpr"); - if pattern.includes(r) { - *result = Some(r); - do_parse("", segments, results) - } else { - false - } -} -fn do_parse_doublewild<'s>( - input: &'s str, - segments: &[Segment<'s>], - results: &mut [Option<&'s keyexpr>], - result: &mut Option<&'s keyexpr>, - pattern: &keyexpr, - prefix: &str, - min_chunks: usize, -) -> bool { - if min_chunks == 0 { - if let Some(input) = input.strip_prefix(trim_suffix_slash(prefix)) { - if do_parse(trim_prefix_slash(input), segments, results) { - return true; + //if the prefix can be compressed to empty, + if prefix.is_none() { + try_intersect!(pattern, result, target, segments, results); } - } else { - return false; - } - } - let Some(input) = input.strip_prefix(prefix) else { - return false; - }; - let input = trim_prefix_slash(input); - let mut chunks = 0; - for i in (0..input.len()).filter(|i| input.as_bytes()[*i] == b'/') { - chunks += 1; - if chunks < min_chunks { - continue; - } - let r = keyexpr::new(&input[..i]).expect("any subsection of a keyexpr is a keyexpr"); - if pattern.includes(r) { - *result = Some(r); - if do_parse(trim_prefix_slash(&input[(i + 1)..]), segments, results) { - return true; + // iterate through as many splits as `prefix` could possibly consume. + for (candidate, target) in target.iter_splits_ltr().take(match prefix { + None => 1, + Some(prefix) => (prefix.bytes().filter(|&c| c == b'/').count() + 1) * 3, + }) { + if prefix.map_or(candidate.is_double_wild(), |prefix| { + prefix.intersects(candidate) + }) { + try_intersect!(pattern, result, target, segments, results); + } } + pattern.is_double_wild() + && prefix.map_or(false, |prefix| prefix.intersects(target)) + && do_parse(None, segments, results) } - } - chunks += 1; - if chunks < min_chunks { - return false; - } - let r = keyexpr::new(input).expect("any subsection of a keyexpr is a keyexpr"); - if pattern.includes(r) { - *result = Some(r); - do_parse("", segments, results) - } else { - false + _ => unreachable!(), } } @@ -243,7 +232,7 @@ fn do_parse_doublewild<'s>( fn parsing() { use crate::key_expr::OwnedKeyExpr; use core::convert::TryFrom; - for a_spec in ["${a:*}", "a/${a:*}", "a/${a:*/**}"] { + for a_spec in ["${a:*}", "a/${a:*}"] { for b_spec in ["b/${b:**}", "${b:**}"] { let specs = [a_spec, b_spec, "c"]; for spec in [2, 3] { @@ -256,11 +245,71 @@ fn parsing() { formatter.set("b", b_val).unwrap(); let ke = OwnedKeyExpr::try_from(&formatter).unwrap(); let parsed = format.parse(&ke).unwrap(); - assert_eq!(parsed.get("a").unwrap().unwrap().as_str(), a_val); - assert_eq!(parsed.get("b").unwrap().map_or("", |s| s.as_str()), b_val); + assert_eq!(parsed.get("a").unwrap(), a_val); + assert_eq!(parsed.get("b").unwrap(), b_val); } } } } } + KeFormat::new("**/${a:**}/${b:**}/**").unwrap_err(); + let format = KeFormat::new("${a:**}/${b:**}").unwrap(); + assert_eq!( + format + .parse(keyexpr::new("a/b/c").unwrap()) + .unwrap() + .get("a") + .unwrap(), + "a/b/c" + ); + assert_eq!( + format + .parse(keyexpr::new("**").unwrap()) + .unwrap() + .get("a") + .unwrap(), + "**" + ); + assert_eq!( + format + .parse(keyexpr::new("**").unwrap()) + .unwrap() + .get("b") + .unwrap(), + "**" + ); + let format = KeFormat::new("hi/${a:there}/${b:**}").unwrap(); + assert_eq!( + format + .parse(keyexpr::new("hi/**").unwrap()) + .unwrap() + .get("a") + .unwrap(), + "**" + ); + assert_eq!( + format + .parse(keyexpr::new("hi/**").unwrap()) + .unwrap() + .get("b") + .unwrap(), + "**" + ); + let format = KeFormat::new("hi/${a:there}/@/${b:**}").unwrap(); + assert_eq!( + format + .parse(keyexpr::new("hi/**/@").unwrap()) + .unwrap() + .get("a") + .unwrap(), + "**" + ); + assert_eq!( + format + .parse(keyexpr::new("hi/**/@").unwrap()) + .unwrap() + .get("b") + .unwrap(), + "" + ); } diff --git a/commons/zenoh-keyexpr/src/key_expr/format/support.rs b/commons/zenoh-keyexpr/src/key_expr/format/support.rs index ed79e88d59..16451797aa 100644 --- a/commons/zenoh-keyexpr/src/key_expr/format/support.rs +++ b/commons/zenoh-keyexpr/src/key_expr/format/support.rs @@ -86,9 +86,33 @@ impl core::fmt::Display for Spec<'_> { } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct Segment<'a> { + /// What precedes a spec in a [`KeFormat`]. + /// It may be: + /// - empty if the spec is the first thing in the format. + /// - `/` if the spec comes right after another spec. + /// - a valid keyexpr followed by `/` if the spec comes after a keyexpr. pub(crate) prefix: &'a str, pub(crate) spec: Spec<'a>, } +impl Segment<'_> { + pub fn prefix(&self) -> Option<&keyexpr> { + match self.prefix { + "" | "/" => None, + _ => Some(unsafe { + keyexpr::from_str_unchecked(trim_suffix_slash(trim_prefix_slash(self.prefix))) + }), + } + } + pub fn id(&self) -> &str { + self.spec.id() + } + pub fn pattern(&self) -> &keyexpr { + self.spec.pattern() + } + pub fn default(&self) -> Option<&keyexpr> { + self.spec.default() + } +} pub enum IterativeConstructor { Complete(Complete), @@ -246,6 +270,9 @@ impl<'s> IKeFormatStorage<'s> for Vec> { } } +/// Trim the prefix slash from a target string if it has one. +/// # Safety +/// `target` is assumed to be a valid `keyexpr` except for the leading slash. pub(crate) fn trim_prefix_slash(target: &str) -> &str { &target[matches!(target.as_bytes().first(), Some(b'/')) as usize..] } diff --git a/commons/zenoh-keyexpr/src/keyexpr_tree/arc_tree.rs b/commons/zenoh-keyexpr/src/keyexpr_tree/arc_tree.rs index c2a7ff5375..a0428ac563 100644 --- a/commons/zenoh-keyexpr/src/keyexpr_tree/arc_tree.rs +++ b/commons/zenoh-keyexpr/src/keyexpr_tree/arc_tree.rs @@ -58,6 +58,7 @@ fn ketree_borrow_mut<'a, T, Token: TokenTrait>( /// The tree and its nodes have shared ownership, while their mutability is managed through the `Token`. /// /// Most of its methods are declared in the [`ITokenKeyExprTree`] trait. +// tags{ketree.arc} pub struct KeArcTree< Weight, Token: TokenTrait = DefaultToken, @@ -143,6 +144,7 @@ where &'a Arc, Wildness, Children, Token>, Token>>, &'a mut Token, ); + // tags{ketree.arc.node} fn node(&'a self, token: &'a Token, at: &keyexpr) -> Option { let inner = ketree_borrow(&self.inner, token); let mut chunks = at.chunks(); @@ -155,10 +157,12 @@ where } Some((node.as_node(), token)) } + // tags{ketree.arc.node.mut} fn node_mut(&'a self, token: &'a mut Token, at: &keyexpr) -> Option { self.node(unsafe { core::mem::transmute(&*token) }, at) .map(|(node, _)| (node, token)) } + // tags{ketree.arc.node.or_create} fn node_or_create(&'a self, token: &'a mut Token, at: &keyexpr) -> Self::NodeMut { let inner = ketree_borrow_mut(&self.inner, token); if at.is_wild() { @@ -206,6 +210,7 @@ where >, &'a Token, >; + // tags{ketree.arc.tree_iter} fn tree_iter(&'a self, token: &'a Token) -> Self::TreeIter { let inner = ketree_borrow(&self.inner, token); TokenPacker { @@ -227,6 +232,7 @@ where >, &'a mut Token, >; + // tags{ketree.arc.tree_iter.mut} fn tree_iter_mut(&'a self, token: &'a mut Token) -> Self::TreeIterMut { let inner = ketree_borrow(&self.inner, token); TokenPacker { @@ -248,6 +254,7 @@ where >, Self::IntersectionItem, >; + // tags{ketree.arc.intersecting} fn intersecting_nodes(&'a self, token: &'a Token, key: &'a keyexpr) -> Self::Intersection { let inner = ketree_borrow(&self.inner, token); if inner.wildness.get() || key.is_wild() { @@ -272,6 +279,7 @@ where >, Self::IntersectionItemMut, >; + // tags{ketree.arc.intersecting.mut} fn intersecting_nodes_mut( &'a self, token: &'a mut Token, @@ -301,6 +309,7 @@ where >, Self::InclusionItem, >; + // tags{ketree.arc.included} fn included_nodes(&'a self, token: &'a Token, key: &'a keyexpr) -> Self::Inclusion { let inner = ketree_borrow(&self.inner, token); if inner.wildness.get() || key.is_wild() { @@ -325,6 +334,7 @@ where >, Self::InclusionItemMut, >; + // tags{ketree.arc.included.mut} fn included_nodes_mut(&'a self, token: &'a mut Token, key: &'a keyexpr) -> Self::InclusionMut { let inner = ketree_borrow(&self.inner, token); if inner.wildness.get() || key.is_wild() { @@ -352,6 +362,7 @@ where >, Self::IncluderItem, >; + // tags{ketree.arc.including} fn nodes_including(&'a self, token: &'a Token, key: &'a keyexpr) -> Self::Includer { let inner = ketree_borrow(&self.inner, token); if inner.wildness.get() || key.is_wild() { @@ -376,6 +387,7 @@ where >, Self::IncluderItemMut, >; + // tags{ketree.arc.including.mut} fn nodes_including_mut(&'a self, token: &'a mut Token, key: &'a keyexpr) -> Self::IncluderMut { let inner = ketree_borrow(&self.inner, token); if inner.wildness.get() || key.is_wild() { @@ -391,6 +403,7 @@ where } type PruneNode = KeArcTreeNode, Wildness, Children, Token>; + // tags{ketree.arc.prune.where} fn prune_where bool>( &self, token: &mut Token, diff --git a/commons/zenoh-keyexpr/src/keyexpr_tree/iters/inclusion.rs b/commons/zenoh-keyexpr/src/keyexpr_tree/iters/inclusion.rs index bd875be1b9..0ed2c96645 100644 --- a/commons/zenoh-keyexpr/src/keyexpr_tree/iters/inclusion.rs +++ b/commons/zenoh-keyexpr/src/keyexpr_tree/iters/inclusion.rs @@ -96,7 +96,7 @@ where }; } let chunk = node.chunk(); - let chunk_is_verbatim = chunk.as_bytes()[0] == b'@'; + let chunk_is_verbatim = chunk.first_byte() == b'@'; for i in *start..*end { let kec_start = self.ke_indices[i]; if kec_start == self.key.len() { @@ -136,7 +136,7 @@ where } None => { let key = unsafe { keyexpr::from_slice_unchecked(key) }; - if unlikely(key == "**") && chunk.as_bytes()[0] != b'@' { + if unlikely(key == "**") && chunk.first_byte() != b'@' { push!(kec_start); node_matches = true; } else if key.includes(chunk) { @@ -259,7 +259,7 @@ where }; } let chunk = node.chunk(); - let chunk_is_verbatim = chunk.as_bytes()[0] == b'@'; + let chunk_is_verbatim = chunk.first_byte() == b'@'; for i in *start..*end { let kec_start = self.ke_indices[i]; if kec_start == self.key.len() { @@ -299,7 +299,7 @@ where } None => { let key = unsafe { keyexpr::from_slice_unchecked(key) }; - if unlikely(key == "**") && chunk.as_bytes()[0] != b'@' { + if unlikely(key == "**") && chunk.first_byte() != b'@' { push!(kec_start); node_matches = true; } else if key.includes(chunk) { diff --git a/commons/zenoh-keyexpr/src/keyexpr_tree/iters/intersection.rs b/commons/zenoh-keyexpr/src/keyexpr_tree/iters/intersection.rs index e46305adbf..34902810f0 100644 --- a/commons/zenoh-keyexpr/src/keyexpr_tree/iters/intersection.rs +++ b/commons/zenoh-keyexpr/src/keyexpr_tree/iters/intersection.rs @@ -95,7 +95,7 @@ where }; } let chunk = node.chunk(); - let chunk_is_verbatim = chunk.as_bytes()[0] == b'@'; + let chunk_is_verbatim = chunk.first_byte() == b'@'; if unlikely(chunk.as_bytes() == b"**") { // If the current node is `**`, it is guaranteed to match... node_matches = true; @@ -295,7 +295,7 @@ where }; } let chunk = node.chunk(); - let chunk_is_verbatim = chunk.as_bytes()[0] == b'@'; + let chunk_is_verbatim = chunk.first_byte() == b'@'; if unlikely(chunk.as_bytes() == b"**") { // If the current node is `**`, it is guaranteed to match... node_matches = true; diff --git a/commons/zenoh-keyexpr/src/lib.rs b/commons/zenoh-keyexpr/src/lib.rs index a31fcb24a5..f80a9c177c 100644 --- a/commons/zenoh-keyexpr/src/lib.rs +++ b/commons/zenoh-keyexpr/src/lib.rs @@ -12,14 +12,41 @@ // ZettaScale Zenoh Team, // -//! ⚠️ WARNING ⚠️ +//! [Key expression](https://github.com/eclipse-zenoh/roadmap/blob/main/rfcs/ALL/Key%20Expressions.md) are Zenoh's address space. //! -//! This crate is intended for Zenoh's internal use. +//! In Zenoh, operations are performed on keys. To allow addressing multiple keys with a single operation, we use Key Expressions (KE). +//! KEs are a small language that express sets of keys through a glob-like language. //! -//! [Click here for Zenoh's documentation](../zenoh/index.html) +//! These semantics can be a bit difficult to implement, so this module provides the following facilities: +//! +//! # Storing Key Expressions +//! This module provides 2 flavours to store strings that have been validated to respect the KE syntax, and a third is provided by [`zenoh`](https://docs.rs/zenoh): +//! - [`keyexpr`] is the equivalent of a [`str`], +//! - [`OwnedKeyExpr`] works like an [`Arc`], +//! - [`KeyExpr`](https://docs.rs/zenoh/latest/zenoh/key_expr/struct.KeyExpr.html) works like a [`Cow`], but also stores some additional context internal to Zenoh to optimize +//! routing and network usage. +//! +//! All of these types [`Deref`](core::ops::Deref) to [`keyexpr`], which notably has methods to check whether a given [`keyexpr::intersects`] with another, +//! or even if a [`keyexpr::includes`] another. +//! +//! # Tying values to Key Expressions +//! When storing values tied to Key Expressions, you might want something more specialized than a [`HashMap`](std::collections::HashMap) if you want to respect +//! the Key Expression semantics with high performance. +//! +//! Enter [KeTrees](keyexpr_tree). These are data-structures specially built to store KE-value pairs in a manner that supports the set-semantics of KEs. +//! +//! # Building and parsing Key Expressions +//! A common issue in REST API is the association of meaning to sections of the URL, and respecting that API in a convenient manner. +//! The same issue arises naturally when designing a KE space, and [`KeFormat`](format::KeFormat) was designed to help you with this, +//! both in constructing and in parsing KEs that fit the formats you've defined. +//! +//! [`kedefine`] also allows you to define formats at compile time, allowing a more performant, but more importantly safer and more convenient use of said formats, +//! as the [`keformat`] and [`kewrite`] macros will be able to tell you if you're attempting to set fields of the format that do not exist. + #![cfg_attr(not(feature = "std"), no_std)] extern crate alloc; pub mod key_expr; + pub use key_expr::*; pub mod keyexpr_tree; diff --git a/commons/zenoh-macros/src/lib.rs b/commons/zenoh-macros/src/lib.rs index 2ee5aebeac..800ad3475d 100644 --- a/commons/zenoh-macros/src/lib.rs +++ b/commons/zenoh-macros/src/lib.rs @@ -19,9 +19,13 @@ //! [Click here for Zenoh's documentation](../zenoh/index.html) use proc_macro::TokenStream; use quote::quote; -use zenoh_keyexpr::format::{ - macro_support::{self, SegmentBuilder}, - KeFormat, +use syn::LitStr; +use zenoh_keyexpr::{ + format::{ + macro_support::{self, SegmentBuilder}, + KeFormat, + }, + key_expr::keyexpr, }; const RUSTC_VERSION: &str = include_str!(concat!(env!("OUT_DIR"), "/version.rs")); @@ -90,11 +94,34 @@ fn keformat_support(source: &str) -> proc_macro2::TokenStream { } }); let getters = specs.iter().map(|spec| { - let id = &source[spec.spec_start..(spec.spec_start + spec.id_end as usize)]; + let source = &source[spec.spec_start..spec.spec_end]; + let id = &source[..(spec.id_end as usize)]; let get_id = quote::format_ident!("{}", id); - quote! { - pub fn #get_id (&self) -> Option<& ::zenoh::key_expr::keyexpr> { - unsafe {self._0.get(#id).unwrap_unchecked()} + let pattern = unsafe { + keyexpr::from_str_unchecked(if spec.pattern_end != u16::MAX { + &source[(spec.id_end as usize + 1)..(spec.spec_start + spec.pattern_end as usize)] + } else { + &source[(spec.id_end as usize + 1)..] + }) + }; + let doc = format!("Get the parsed value for `{id}`.\n\nThis value is guaranteed to be a valid key expression intersecting with `{pattern}`"); + if pattern.as_bytes() == b"**" { + quote! { + #[doc = #doc] + /// Since the pattern is `**`, this may return `None` if the pattern didn't consume any chunks. + pub fn #get_id (&self) -> Option<& ::zenoh::key_expr::keyexpr> { + unsafe { + let s =self._0.get(#id).unwrap_unchecked(); + (!s.is_empty()).then(|| ::zenoh::key_expr::keyexpr::from_str_unchecked(s)) + } + } + } + } else { + quote! { + #[doc = #doc] + pub fn #get_id (&self) -> &::zenoh::key_expr::keyexpr { + unsafe {::zenoh::key_expr::keyexpr::from_str_unchecked(self._0.get(#id).unwrap_unchecked())} + } } } }); @@ -314,3 +341,14 @@ pub fn keformat(tokens: TokenStream) -> TokenStream { }) .into() } + +/// Equivalent to [`keyexpr::new`](zenoh_keyexpr::keyexpr::new), but the check is run at compile-time and will throw a compile error in case of failure. +#[proc_macro] +pub fn ke(tokens: TokenStream) -> TokenStream { + let value: LitStr = syn::parse(tokens).unwrap(); + let ke = value.value(); + match zenoh_keyexpr::keyexpr::new(&ke) { + Ok(_) => quote!(unsafe {::zenoh::key_expr::keyexpr::from_str_unchecked(#ke)}).into(), + Err(e) => panic!("{}", e), + } +} diff --git a/examples/examples/z_formats.rs b/examples/examples/z_formats.rs index d173cccb6e..357448143e 100644 --- a/examples/examples/z_formats.rs +++ b/examples/examples/z_formats.rs @@ -15,8 +15,8 @@ use zenoh::prelude::keyexpr; zenoh::kedefine!( - pub file_format: "user_id/${user_id:*}/file/${file:**}", - pub(crate) settings_format: "user_id/${user_id:*}/settings/${setting:*/**}" + pub file_format: "user_id/${user_id:*}/file/${file:*/**}", + pub(crate) settings_format: "user_id/${user_id:*}/settings/${setting:**}" ); fn main() { @@ -28,6 +28,6 @@ fn main() { // Parsing let settings_ke = keyexpr::new("user_id/30/settings/dark_mode").unwrap(); let parsed = settings_format::parse(settings_ke).unwrap(); - assert_eq!(parsed.user_id(), keyexpr::new("30").ok()); + assert_eq!(parsed.user_id(), keyexpr::new("30").unwrap()); assert_eq!(parsed.setting(), keyexpr::new("dark_mode").ok()); } diff --git a/zenoh/src/key_expr.rs b/zenoh/src/key_expr.rs index d2295f9798..2eee6c0665 100644 --- a/zenoh/src/key_expr.rs +++ b/zenoh/src/key_expr.rs @@ -12,7 +12,36 @@ // ZettaScale Zenoh Team, // -//! [Key expression](https://github.com/eclipse-zenoh/roadmap/blob/main/rfcs/ALL/Key%20Expressions.md) types and utils. +//! [Key expression](https://github.com/eclipse-zenoh/roadmap/blob/main/rfcs/ALL/Key%20Expressions.md) are Zenoh's address space. +//! +//! In Zenoh, operations are performed on keys. To allow addressing multiple keys with a single operation, we use Key Expressions (KE). +//! KEs are a small language that express sets of keys through a glob-like language. +//! +//! These semantics can be a bit difficult to implement, so this module provides the following facilities: +//! +//! # Storing Key Expressions +//! This module provides 3 flavours to store strings that have been validated to respect the KE syntax: +//! - [`keyexpr`] is the equivalent of a [`str`], +//! - [`OwnedKeyExpr`] works like an [`Arc`], +//! - [`KeyExpr`] works like a [`Cow`], but also stores some additional context internal to Zenoh to optimize +//! routing and network usage. +//! +//! All of these types [`Deref`](core::ops::Deref) to [`keyexpr`], which notably has methods to check whether a given [`keyexpr::intersects`] with another, +//! or even if a [`keyexpr::includes`] another. +//! +//! # Tying values to Key Expressions +//! When storing values tied to Key Expressions, you might want something more specialized than a [`HashMap`](std::collections::HashMap) if you want to respect +//! the Key Expression semantics with high performance. +//! +//! Enter [KeTrees](keyexpr_tree). These are data-structures specially built to store KE-value pairs in a manner that supports the set-semantics of KEs. +//! +//! # Building and parsing Key Expressions +//! A common issue in REST API is the association of meaning to sections of the URL, and respecting that API in a convenient manner. +//! The same issue arises naturally when designing a KE space, and [`KeFormat`](format::KeFormat) was designed to help you with this, +//! both in constructing and in parsing KEs that fit the formats you've defined. +//! +//! [`kedefine`] also allows you to define formats at compile time, allowing a more performant, but more importantly safer and more convenient use of said formats, +//! as the [`keformat`] and [`kewrite`] macros will be able to tell you if you're attempting to set fields of the format that do not exist. use std::{ convert::{TryFrom, TryInto}, @@ -20,7 +49,8 @@ use std::{ str::FromStr, }; use zenoh_core::{AsyncResolve, Resolvable, SyncResolve}; -pub use zenoh_protocol::core::key_expr::*; +pub use zenoh_keyexpr::*; +pub use zenoh_macros::{kedefine, keformat, kewrite}; use zenoh_protocol::{ core::{key_expr::canon::Canonizable, ExprId, WireExpr}, network::{declare, DeclareBody, Mapping, UndeclareKeyExpr}, diff --git a/zenoh/src/lib.rs b/zenoh/src/lib.rs index 5c3b938e5b..0a8f1feb64 100644 --- a/zenoh/src/lib.rs +++ b/zenoh/src/lib.rs @@ -87,7 +87,7 @@ use prelude::*; use scouting::ScoutBuilder; use std::future::Ready; use zenoh_core::{AsyncResolve, Resolvable, SyncResolve}; -pub use zenoh_macros::{kedefine, keformat, kewrite}; +pub use zenoh_macros::{ke, kedefine, keformat, kewrite}; use zenoh_protocol::core::WhatAmIMatcher; use zenoh_result::{zerror, ZResult}; use zenoh_util::concat_enabled_features;