feat: add PortId::from_bytes and rework identifier validation

The main motivating factor for this change is scenario in which one has a slice of bytes and wants to parse it as PartId. Since PortId implements FromStr and has a new method which takes a String such slice must first be converted into a string. A parsing code might look something like: fn port_id_from_bytes(bytes: &[u8]) -> Result<PortId, Error> { let id = core::str::from_utf8(bytes)?; let id = PortId::from_str(id)?; Ok(id) } However, notice that in this situation the bytes are validated twice (in fact three times, see below). First `from_utf8` has to go through it to check if the identifier is valid UTF-8; and then `from_str` has to go through the bytes again. This by itself is wasteful but what’s even worse is that Unicode strings are not valid identifiers so the logic of checking if bytes are valid UTF-8 is unnecessary. With PortId::from_bytes, the code checks whether the bytes includes any invalid characters. If it doesn’t, than it knows the entire slice is all ASCII bytes and thus can be converted to a string. To handle error cases, introduce Error::InvalidUtf8 error which is used in the bytes aren’t valid UTF-8. ---- With this change, validate_identifier_chars now works on slice of bytes rather than on a str. This by itself is probably an optimisation since iterating over bytes is easier than over characters of a string. Since Unicode characters aren’t valid parts of identifiers this doesn’t affect observable behaviour of the code. ---- Furthermore, this change also refactors the validation code. Specifically, the old identifier validation code contained the following: if id.contains(PATH_SEPARATOR) { return Err(Error::ContainSeparator { id: id.into() }); } if !id.chars().all(|c| /* ... */) { return Err(Error::InvalidCharacter { id: id.into() }); } This means that all identifiers had to be scanned twice. First to look for a slash and then to check if all characters are valid. After all the refactoring the code is now equivalence of: if !id.bytes().all(|c| /* ... */) { if id.as_bytes().contains(PATH_SEPARATOR) { return Err(Error::ContainSeparator { id: id.into() }); } else { return Err(Error::InvalidCharacter { id: id.into() }); } } With this, correct identifiers are scanned only once.
cosmos · Oct 26, 2023 · 1c62501 · 1c62501
1 parent 553bc79
commit 1c62501
Show file tree

Hide file tree

Showing 3 changed files with 162 additions and 46 deletions.
diff --git a/crates/ibc/src/core/ics24_host/identifier.rs b/crates/ibc/src/core/ics24_host/identifier.rs
@@ -352,7 +352,12 @@ pub struct PortId(String);
 
 impl PortId {
     pub fn new(id: String) -> Result<Self, IdentifierError> {
-        Self::from_str(&id)
+        validate_port_identifier(id.as_bytes())?;
+        Ok(Self(id))
+    }
+
+    pub fn from_bytes(bytes: &[u8]) -> Result<Self, IdentifierError> {
+        validate_port_identifier(bytes).map(|id| Self(id.into()))
     }
 
     /// Infallible creation of the well-known transfer port
@@ -371,7 +376,7 @@ impl PortId {
     }
 
     pub fn validate(&self) -> Result<(), IdentifierError> {
-        validate_port_identifier(self.as_str())
+        validate_port_identifier(self.as_str()).map(|_| ())
     }
 }
 
@@ -495,6 +500,7 @@ impl PartialEq<str> for ChannelId {
 
 #[cfg_attr(feature = "serde", derive(serde::Serialize))]
 #[derive(Debug, Display)]
+#[cfg_attr(test, derive(Clone, PartialEq))]
 pub enum IdentifierError {
     /// identifier `{id}` cannot contain separator '/'
     ContainSeparator { id: String },
@@ -511,6 +517,8 @@ pub enum IdentifierError {
     InvalidPrefix { prefix: String },
     /// identifier cannot be empty
     Empty,
+    /// identifier `{id:?}` is an invalid UTF-8 string.
+    InvalidUtf8 { id: Vec<u8> },
 }
 
 #[cfg(feature = "std")]
@@ -541,4 +549,67 @@ mod tests {
         assert!(ChainId::from_str("/chainA-1").is_err());
         assert!(ChainId::from_str("chainA-1-").is_err());
     }
+
+    #[test]
+    fn test_port_id() {
+        fn good(id: &str) {
+            let want = Ok(PortId(id.into()));
+            assert_eq!(want, PortId::from_str(id), "id: {id}");
+            assert_eq!(want, PortId::from_bytes(id.as_bytes()), "id: {id}");
+        }
+
+        good(TRANSFER_PORT_ID);
+        good(DEFAULT_PORT_ID);
+        good("xx");
+        good(&"x".repeat(128));
+
+        fn bad(want: IdentifierError, id: &str) {
+            assert_eq!(Err(want.clone()), PortId::from_str(id), "id: {id}");
+            assert_eq!(Err(want), PortId::from_bytes(id.as_bytes()), "id: {id}");
+        }
+
+        bad(IdentifierError::Empty, "");
+        bad(
+            IdentifierError::InvalidLength {
+                id: "x".into(),
+                length: 1,
+                min: 2,
+                max: 128,
+            },
+            "x",
+        );
+        let id = "x".repeat(129);
+        bad(
+            IdentifierError::InvalidLength {
+                id: id.clone(),
+                length: 129,
+                min: 2,
+                max: 128,
+            },
+            &id,
+        );
+
+        bad(
+            IdentifierError::ContainSeparator { id: "x*y/z".into() },
+            "x*y/z",
+        );
+        bad(
+            IdentifierError::InvalidCharacter { id: "x*y".into() },
+            "x*y",
+        );
+        bad(
+            IdentifierError::InvalidCharacter {
+                id: "żólw".into()
+            },
+            "żólw",
+        );
+
+        let bytes = b"\xbf\xf3\xb3\x77";
+        assert_eq!(
+            Err(IdentifierError::InvalidUtf8 {
+                id: bytes[..].into()
+            }),
+            PortId::from_bytes(bytes)
+        );
+    }
 }
diff --git a/crates/ibc/src/core/ics24_host/identifier/validate.rs b/crates/ibc/src/core/ics24_host/identifier/validate.rs
@@ -2,36 +2,75 @@ use super::IdentifierError as Error;
 use crate::prelude::*;
 
 /// Path separator (ie. forward slash '/')
-const PATH_SEPARATOR: char = '/';
-const VALID_SPECIAL_CHARS: &str = "._+-#[]<>";
+const PATH_SEPARATOR: u8 = b'/';
 
 /// Checks if the identifier only contains valid characters as specified in the
 /// [`ICS-24`](https://github.com/cosmos/ibc/tree/main/spec/core/ics-024-host-requirements#paths-identifiers-separators)]
 /// spec.
-pub fn validate_identifier_chars(id: &str) -> Result<(), Error> {
-    // Check identifier is not empty
-    if id.is_empty() {
-        return Err(Error::Empty);
+pub(crate) fn validate_identifier_chars<I: Id + ?Sized>(id: &I) -> Result<&str, Error> {
+    /// Checks whether byte corresponds to valid identifier character.
+    ///
+    /// Valid identifier characters are:
+    /// - alphanumeric,
+    /// - `.`, `_`, `+`, `-`, `#`,
+    /// - `[`, `]`, `<` and `>`
+    fn validate_char(byte: &u8) -> bool {
+        const VALID_SPECIAL_CHARS: &[u8; 9] = b"._+-#[]<>";
+        byte.is_ascii_alphanumeric() || VALID_SPECIAL_CHARS.contains(byte)
     }
 
-    // Check identifier does not contain path separators
-    if id.contains(PATH_SEPARATOR) {
-        return Err(Error::ContainSeparator { id: id.into() });
+    /// Possible validation results.
+    enum CheckError {
+        Empty,
+        BadChar(bool),
     }
 
-    // Check that the identifier comprises only valid characters:
-    // - Alphanumeric
-    // - `.`, `_`, `+`, `-`, `#`
-    // - `[`, `]`, `<`, `>`
-    if !id
-        .chars()
-        .all(|c| c.is_alphanumeric() || VALID_SPECIAL_CHARS.contains(c))
-    {
-        return Err(Error::InvalidCharacter { id: id.into() });
+    /// Monomorphisation of the validation check.
+    fn validate(id: &[u8]) -> Result<&str, CheckError> {
+        if id.is_empty() {
+            Err(CheckError::Empty)
+        } else if let Some(pos) = id.iter().position(|b| !validate_char(b)) {
+            Err(CheckError::BadChar(id[pos..].contains(&PATH_SEPARATOR)))
+        } else {
+            // SAFETY: We've just checked that id consists of ASCII characters
+            // only.
+            #[allow(unsafe_code)]
+            Ok(unsafe { core::str::from_utf8_unchecked(id) })
+        }
     }
 
-    // All good!
-    Ok(())
+    match validate(id.as_ref()) {
+        Ok(id) => Ok(id),
+        Err(CheckError::Empty) => Err(Error::Empty),
+        Err(CheckError::BadChar(separator)) => {
+            let id = id.try_to_string()?;
+            Err(match separator {
+                true => Error::ContainSeparator { id },
+                false => Error::InvalidCharacter { id },
+            })
+        }
+    }
+}
+
+pub(crate) trait Id: AsRef<[u8]> {
+    /// Converts identifier into String or returns `Error::InvalidUtf8` if the
+    /// identifier is invalid UTF-8.
+    fn try_to_string(&self) -> Result<String, Error>;
+}
+
+impl Id for str {
+    fn try_to_string(&self) -> Result<String, Error> {
+        Ok(self.into())
+    }
+}
+
+impl Id for [u8] {
+    fn try_to_string(&self) -> Result<String, Error> {
+        match core::str::from_utf8(self) {
+            Ok(id) => Ok(id.into()),
+            Err(_) => Err(Error::InvalidUtf8 { id: self.into() }),
+        }
+    }
 }
 
 /// Checks if the identifier forms a valid identifier with the given min/max length as specified in the
@@ -84,31 +123,37 @@ pub fn validate_client_type(id: &str) -> Result<(), Error> {
     validate_prefix_length(id, 9, 64)
 }
 
-/// Default validator function for Client identifiers.
-///
-/// A valid client identifier must be between 9-64 characters as specified in
-/// the ICS-24 spec.
-pub fn validate_client_identifier(id: &str) -> Result<(), Error> {
-    validate_identifier_chars(id)?;
-    validate_identifier_length(id, 9, 64)
-}
-
-/// Default validator function for Connection identifiers.
-///
-/// A valid connection identifier must be between 10-64 characters as specified
-/// in the ICS-24 spec.
-pub fn validate_connection_identifier(id: &str) -> Result<(), Error> {
-    validate_identifier_chars(id)?;
-    validate_identifier_length(id, 10, 64)
+macro_rules! define_validate {
+    ($( $(#[$meta:meta])* $name:ident($min:literal..=$max:literal); )*) => {
+        $(
+            $(#[$meta])*
+            pub(crate) fn $name<I: Id + ?Sized>(id: &I) -> Result<&str, Error> {
+                let id = validate_identifier_chars(id)?;
+                validate_identifier_length(id, $min, $max)?;
+                Ok(id)
+            }
+        )*
+    }
 }
 
-/// Default validator function for Port identifiers.
-///
-/// A valid port identifier must be between 2-128 characters as specified in the
-/// ICS-24 spec.
-pub fn validate_port_identifier(id: &str) -> Result<(), Error> {
-    validate_identifier_chars(id)?;
-    validate_identifier_length(id, 2, 128)
+define_validate! {
+    /// Default validator function for Client identifiers.
+    ///
+    /// A valid client identifier must be between 9-64 characters as specified in
+    /// the ICS-24 spec.
+    validate_client_identifier(9..=64);
+
+    /// Default validator function for Connection identifiers.
+    ///
+    /// A valid connection identifier must be between 10-64 characters as specified
+    /// in the ICS-24 spec.
+    validate_connection_identifier(10..=64);
+
+    /// Default validator function for Port identifiers.
+    ///
+    /// A valid port identifier must be between 2-128 characters as specified in the
+    /// ICS-24 spec.
+    validate_port_identifier(2..=128);
 }
 
 /// Default validator function for Channel identifiers.

diff --git a/crates/ibc/src/lib.rs b/crates/ibc/src/lib.rs
@@ -9,7 +9,7 @@
     rust_2018_idioms
 )]
 #![cfg_attr(not(test), deny(clippy::disallowed_methods, clippy::disallowed_types,))]
-#![forbid(unsafe_code)]
+#![deny(unsafe_code)]
 // https://github.com/cosmos/ibc-rs/issues/342
 #![allow(clippy::result_large_err)]
 //! This library implements the InterBlockchain Communication (IBC) protocol in Rust. IBC is