From ed75a8237dbd175eb5194a234e123f050126c7b9 Mon Sep 17 00:00:00 2001 From: dswij Date: Sun, 17 Mar 2024 01:31:51 +0800 Subject: [PATCH] feat: add strict version of uri parsing --- src/uri/mod.rs | 60 ++++++++++++++++++++++++++++++++++++ src/uri/path.rs | 81 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+) diff --git a/src/uri/mod.rs b/src/uri/mod.rs index 6ef2dc8e..a74cb391 100644 --- a/src/uri/mod.rs +++ b/src/uri/mod.rs @@ -339,6 +339,57 @@ impl Uri { parse_full(s) } + /// Similar to [`from_shared`] but does not accept `{`, `}` and `"` + fn from_shared_strict(s: Bytes) -> Result { + use self::ErrorKind::*; + + if s.len() > MAX_LEN { + return Err(TooLong.into()); + } + + match s.len() { + 0 => { + return Err(Empty.into()); + } + 1 => match s[0] { + b'/' => { + return Ok(Uri { + scheme: Scheme::empty(), + authority: Authority::empty(), + path_and_query: PathAndQuery::slash(), + }); + } + b'*' => { + return Ok(Uri { + scheme: Scheme::empty(), + authority: Authority::empty(), + path_and_query: PathAndQuery::star(), + }); + } + _ => { + let authority = Authority::from_shared(s)?; + + return Ok(Uri { + scheme: Scheme::empty(), + authority, + path_and_query: PathAndQuery::empty(), + }); + } + }, + _ => {} + } + + if s[0] == b'/' { + return Ok(Uri { + scheme: Scheme::empty(), + authority: Authority::empty(), + path_and_query: PathAndQuery::from_shared_strict(s)?, + }); + } + + parse_full(s) + } + /// Convert a `Uri` from a static string. /// /// This function will not perform any copying, however the string is @@ -365,6 +416,15 @@ impl Uri { } } + /// Similar to [`from_static`] but marks `{` `}` and `"` as invalid. + pub fn from_static_strict(src: &'static str) -> Self { + let s = Bytes::from_static(src.as_bytes()); + match Uri::from_shared_strict(s) { + Ok(uri) => uri, + Err(e) => panic!("static str is not valid URI: {}", e), + } + } + /// Convert a `Uri` into `Parts`. /// /// # Note diff --git a/src/uri/path.rs b/src/uri/path.rs index 341ba2e6..2ebceda9 100644 --- a/src/uri/path.rs +++ b/src/uri/path.rs @@ -105,6 +105,81 @@ impl PathAndQuery { }) } + /// Similar to [`from_shared`] but does not accept `{`, `}` and `"` + pub(super) fn from_shared_strict(mut src: Bytes) -> Result { + let mut query = NONE; + let mut fragment = None; + + // block for iterator borrow + { + let mut iter = src.as_ref().iter().enumerate(); + + // path ... + for (i, &b) in &mut iter { + // See https://url.spec.whatwg.org/#path-state + match b { + b'?' => { + debug_assert_eq!(query, NONE); + query = i as u16; + break; + } + b'#' => { + fragment = Some(i); + break; + } + + // This is the range of bytes that don't need to be + // percent-encoded in the path. If it should have been + // percent-encoded, then error. + #[rustfmt::skip] + 0x21 | + 0x24..=0x3B | + 0x3D | + 0x40..=0x5F | + 0x61..=0x7A | + 0x7C | + 0x7E => {} + + _ => return Err(ErrorKind::InvalidUriChar.into()), + } + } + + // query ... + if query != NONE { + for (i, &b) in iter { + match b { + // While queries *should* be percent-encoded, most + // bytes are actually allowed... + // See https://url.spec.whatwg.org/#query-state + // + // Allowed: 0x21 / 0x24 - 0x3B / 0x3D / 0x3F - 0x7E + #[rustfmt::skip] + 0x21 | + 0x24..=0x3B | + 0x3D | + 0x3F..=0x7E => {} + + b'#' => { + fragment = Some(i); + break; + } + + _ => return Err(ErrorKind::InvalidUriChar.into()), + } + } + } + } + + if let Some(i) = fragment { + src.truncate(i); + } + + Ok(PathAndQuery { + data: unsafe { ByteStr::from_utf8_unchecked(src) }, + query, + }) + } + /// Convert a `PathAndQuery` from a static string. /// /// This function will not perform any copying, however the string is @@ -564,6 +639,12 @@ mod tests { ); } + #[test] + fn fails_json_on_strict() { + let pq_bytes = r#"/{"bread":"baguette"}"#.as_bytes(); + PathAndQuery::from_shared_strict(pq_bytes.into()).expect_err("should err"); + } + fn pq(s: &str) -> PathAndQuery { s.parse().expect(&format!("parsing {}", s)) }