Skip to content

Commit

Permalink
feat: add PortId::from_bytes and rework identifier validation
Browse files Browse the repository at this point in the history
The main motivating factor for this change is scenario in which one
has a slice of bytes and wants to parse it as PartId.  Since PortId
implements FromStr and has a new method which takes a String such
slice must first be converted into a string.  A parsing code might
look something like:

    fn port_id_from_bytes(bytes: &[u8]) -> Result<PortId, Error> {
        let id = core::str::from_utf8(bytes)?;
        let id = PortId::from_str(id)?;
        Ok(id)
    }

However, notice that in this situation the bytes are validated twice
(in fact three times, see below).  First `from_utf8` has to go through
it to check if the identifier is valid UTF-8; and then `from_str` has
to go through the bytes again.  This by itself is wasteful but what’s
even worse is that Unicode strings are not valid identifiers so the
logic of checking if bytes are valid UTF-8 is unnecessary.

With PortId::from_bytes, the code checks whether the bytes includes
any invalid characters.  If it doesn’t, than it knows the entire slice
is all ASCII bytes and thus can be converted to a string.

To handle error cases, introduce Error::InvalidUtf8 error which is
used in the bytes aren’t valid UTF-8.

----

With this change, validate_identifier_chars now works on slice of
bytes rather than on a str.  This by itself is probably an
optimisation since iterating over bytes is easier than over characters
of a string.  Since Unicode characters aren’t valid parts of
identifiers this doesn’t affect observable behaviour of the code.

----

Furthermore, this change also refactors the validation code.
Specifically, the old identifier validation code contained the
following:

    if id.contains(PATH_SEPARATOR) {
        return Err(Error::ContainSeparator { id: id.into() });
    }
    if !id.chars().all(|c| /* ... */) {
        return Err(Error::InvalidCharacter { id: id.into() });
    }

This means that all identifiers had to be scanned twice.  First to
look for a slash and then to check if all characters are valid.  After
all the refactoring the code is now equivalence of:

    if !id.bytes().all(|c| /* ... */) {
        if id.as_bytes().contains(PATH_SEPARATOR) {
            return Err(Error::ContainSeparator { id: id.into() });
        } else {
            return Err(Error::InvalidCharacter { id: id.into() });
        }
    }

With this, correct identifiers are scanned only once.
  • Loading branch information
mina86 committed Oct 26, 2023
1 parent 553bc79 commit 1c62501
Show file tree
Hide file tree
Showing 3 changed files with 162 additions and 46 deletions.
75 changes: 73 additions & 2 deletions crates/ibc/src/core/ics24_host/identifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,12 @@ pub struct PortId(String);

impl PortId {
pub fn new(id: String) -> Result<Self, IdentifierError> {
Self::from_str(&id)
validate_port_identifier(id.as_bytes())?;
Ok(Self(id))
}

pub fn from_bytes(bytes: &[u8]) -> Result<Self, IdentifierError> {
validate_port_identifier(bytes).map(|id| Self(id.into()))
}

/// Infallible creation of the well-known transfer port
Expand All @@ -371,7 +376,7 @@ impl PortId {
}

pub fn validate(&self) -> Result<(), IdentifierError> {
validate_port_identifier(self.as_str())
validate_port_identifier(self.as_str()).map(|_| ())
}
}

Expand Down Expand Up @@ -495,6 +500,7 @@ impl PartialEq<str> for ChannelId {

#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[derive(Debug, Display)]
#[cfg_attr(test, derive(Clone, PartialEq))]
pub enum IdentifierError {
/// identifier `{id}` cannot contain separator '/'
ContainSeparator { id: String },
Expand All @@ -511,6 +517,8 @@ pub enum IdentifierError {
InvalidPrefix { prefix: String },
/// identifier cannot be empty
Empty,
/// identifier `{id:?}` is an invalid UTF-8 string.
InvalidUtf8 { id: Vec<u8> },
}

#[cfg(feature = "std")]
Expand Down Expand Up @@ -541,4 +549,67 @@ mod tests {
assert!(ChainId::from_str("/chainA-1").is_err());
assert!(ChainId::from_str("chainA-1-").is_err());
}

#[test]
fn test_port_id() {
fn good(id: &str) {
let want = Ok(PortId(id.into()));
assert_eq!(want, PortId::from_str(id), "id: {id}");
assert_eq!(want, PortId::from_bytes(id.as_bytes()), "id: {id}");
}

good(TRANSFER_PORT_ID);
good(DEFAULT_PORT_ID);
good("xx");
good(&"x".repeat(128));

fn bad(want: IdentifierError, id: &str) {
assert_eq!(Err(want.clone()), PortId::from_str(id), "id: {id}");
assert_eq!(Err(want), PortId::from_bytes(id.as_bytes()), "id: {id}");
}

bad(IdentifierError::Empty, "");
bad(
IdentifierError::InvalidLength {
id: "x".into(),
length: 1,
min: 2,
max: 128,
},
"x",
);
let id = "x".repeat(129);
bad(
IdentifierError::InvalidLength {
id: id.clone(),
length: 129,
min: 2,
max: 128,
},
&id,
);

bad(
IdentifierError::ContainSeparator { id: "x*y/z".into() },
"x*y/z",
);
bad(
IdentifierError::InvalidCharacter { id: "x*y".into() },
"x*y",
);
bad(
IdentifierError::InvalidCharacter {
id: "żólw".into()
},
"żólw",
);

let bytes = b"\xbf\xf3\xb3\x77";
assert_eq!(
Err(IdentifierError::InvalidUtf8 {
id: bytes[..].into()
}),
PortId::from_bytes(bytes)
);
}
}
131 changes: 88 additions & 43 deletions crates/ibc/src/core/ics24_host/identifier/validate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,75 @@ use super::IdentifierError as Error;
use crate::prelude::*;

/// Path separator (ie. forward slash '/')
const PATH_SEPARATOR: char = '/';
const VALID_SPECIAL_CHARS: &str = "._+-#[]<>";
const PATH_SEPARATOR: u8 = b'/';

/// Checks if the identifier only contains valid characters as specified in the
/// [`ICS-24`](https://github.com/cosmos/ibc/tree/main/spec/core/ics-024-host-requirements#paths-identifiers-separators)]
/// spec.
pub fn validate_identifier_chars(id: &str) -> Result<(), Error> {
// Check identifier is not empty
if id.is_empty() {
return Err(Error::Empty);
pub(crate) fn validate_identifier_chars<I: Id + ?Sized>(id: &I) -> Result<&str, Error> {
/// Checks whether byte corresponds to valid identifier character.
///
/// Valid identifier characters are:
/// - alphanumeric,
/// - `.`, `_`, `+`, `-`, `#`,
/// - `[`, `]`, `<` and `>`
fn validate_char(byte: &u8) -> bool {
const VALID_SPECIAL_CHARS: &[u8; 9] = b"._+-#[]<>";
byte.is_ascii_alphanumeric() || VALID_SPECIAL_CHARS.contains(byte)
}

// Check identifier does not contain path separators
if id.contains(PATH_SEPARATOR) {
return Err(Error::ContainSeparator { id: id.into() });
/// Possible validation results.
enum CheckError {
Empty,
BadChar(bool),
}

// Check that the identifier comprises only valid characters:
// - Alphanumeric
// - `.`, `_`, `+`, `-`, `#`
// - `[`, `]`, `<`, `>`
if !id
.chars()
.all(|c| c.is_alphanumeric() || VALID_SPECIAL_CHARS.contains(c))
{
return Err(Error::InvalidCharacter { id: id.into() });
/// Monomorphisation of the validation check.
fn validate(id: &[u8]) -> Result<&str, CheckError> {
if id.is_empty() {
Err(CheckError::Empty)
} else if let Some(pos) = id.iter().position(|b| !validate_char(b)) {
Err(CheckError::BadChar(id[pos..].contains(&PATH_SEPARATOR)))
} else {
// SAFETY: We've just checked that id consists of ASCII characters
// only.
#[allow(unsafe_code)]
Ok(unsafe { core::str::from_utf8_unchecked(id) })
}
}

// All good!
Ok(())
match validate(id.as_ref()) {
Ok(id) => Ok(id),
Err(CheckError::Empty) => Err(Error::Empty),
Err(CheckError::BadChar(separator)) => {
let id = id.try_to_string()?;
Err(match separator {
true => Error::ContainSeparator { id },
false => Error::InvalidCharacter { id },
})
}
}
}

pub(crate) trait Id: AsRef<[u8]> {
/// Converts identifier into String or returns `Error::InvalidUtf8` if the
/// identifier is invalid UTF-8.
fn try_to_string(&self) -> Result<String, Error>;
}

impl Id for str {
fn try_to_string(&self) -> Result<String, Error> {
Ok(self.into())
}
}

impl Id for [u8] {
fn try_to_string(&self) -> Result<String, Error> {
match core::str::from_utf8(self) {
Ok(id) => Ok(id.into()),
Err(_) => Err(Error::InvalidUtf8 { id: self.into() }),
}
}
}

/// Checks if the identifier forms a valid identifier with the given min/max length as specified in the
Expand Down Expand Up @@ -84,31 +123,37 @@ pub fn validate_client_type(id: &str) -> Result<(), Error> {
validate_prefix_length(id, 9, 64)
}

/// Default validator function for Client identifiers.
///
/// A valid client identifier must be between 9-64 characters as specified in
/// the ICS-24 spec.
pub fn validate_client_identifier(id: &str) -> Result<(), Error> {
validate_identifier_chars(id)?;
validate_identifier_length(id, 9, 64)
}

/// Default validator function for Connection identifiers.
///
/// A valid connection identifier must be between 10-64 characters as specified
/// in the ICS-24 spec.
pub fn validate_connection_identifier(id: &str) -> Result<(), Error> {
validate_identifier_chars(id)?;
validate_identifier_length(id, 10, 64)
macro_rules! define_validate {
($( $(#[$meta:meta])* $name:ident($min:literal..=$max:literal); )*) => {
$(
$(#[$meta])*
pub(crate) fn $name<I: Id + ?Sized>(id: &I) -> Result<&str, Error> {
let id = validate_identifier_chars(id)?;
validate_identifier_length(id, $min, $max)?;
Ok(id)
}
)*
}
}

/// Default validator function for Port identifiers.
///
/// A valid port identifier must be between 2-128 characters as specified in the
/// ICS-24 spec.
pub fn validate_port_identifier(id: &str) -> Result<(), Error> {
validate_identifier_chars(id)?;
validate_identifier_length(id, 2, 128)
define_validate! {
/// Default validator function for Client identifiers.
///
/// A valid client identifier must be between 9-64 characters as specified in
/// the ICS-24 spec.
validate_client_identifier(9..=64);

/// Default validator function for Connection identifiers.
///
/// A valid connection identifier must be between 10-64 characters as specified
/// in the ICS-24 spec.
validate_connection_identifier(10..=64);

/// Default validator function for Port identifiers.
///
/// A valid port identifier must be between 2-128 characters as specified in the
/// ICS-24 spec.
validate_port_identifier(2..=128);
}

/// Default validator function for Channel identifiers.
Expand Down
2 changes: 1 addition & 1 deletion crates/ibc/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
rust_2018_idioms
)]
#![cfg_attr(not(test), deny(clippy::disallowed_methods, clippy::disallowed_types,))]
#![forbid(unsafe_code)]
#![deny(unsafe_code)]
// https://github.com/cosmos/ibc-rs/issues/342
#![allow(clippy::result_large_err)]
//! This library implements the InterBlockchain Communication (IBC) protocol in Rust. IBC is
Expand Down

0 comments on commit 1c62501

Please sign in to comment.