From a3c300c8026c8e76ce32e01077f31196d75b243b Mon Sep 17 00:00:00 2001 From: Owen Shepherd Date: Fri, 20 Dec 2024 05:15:47 +0000 Subject: [PATCH] Optimize output of non-required properties For this schema: ``` { "properties": { "a": { "const": "a" }, "b": { "const": "b" }, "c": { "const": "c" } } } ``` We currently produce the following regex (spacing added around alternatives for clarity) ``` \{("a":"a"(,"b":"b")?(,"c":"c")? |("a":"a",)?"b":"b"(,"c":"c")? |("a":"a",)?("b":"b",)?"c":"c")?\} ``` This works perfectly well, but contains redundancy. This is seen by the fact that all three alternatives would match JSON with all three fields. The difference between cases at the moment, is which field is mandatory. I propose that we make the alternatives model the choice of last field. This will produce a regex like this: ``` \{("a":"a" |("a":"a",)?"b":"b" |("a":"a",)?("b":"b",)?"c":"c")?\} ``` This will give us a shorter, but 100% equivalent regex. --- src/json_schema/mod.rs | 6 +++--- src/json_schema/parsing.rs | 3 --- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/json_schema/mod.rs b/src/json_schema/mod.rs index 4ec5c7c..832559f 100644 --- a/src/json_schema/mod.rs +++ b/src/json_schema/mod.rs @@ -838,7 +838,7 @@ mod tests { "title": "Character", "type": "object" }"#, - format!(r#"\{{([ ]?"name"[ ]?:[ ]?({STRING}|null)([ ]?,[ ]?"age"[ ]?:[ ]?({INTEGER}|null))?([ ]?,[ ]?"strength"[ ]?:[ ]?({INTEGER}|null))?|([ ]?"name"[ ]?:[ ]?({STRING}|null)[ ]?,)?[ ]?"age"[ ]?:[ ]?({INTEGER}|null)([ ]?,[ ]?"strength"[ ]?:[ ]?({INTEGER}|null))?|([ ]?"name"[ ]?:[ ]?({STRING}|null)[ ]?,)?([ ]?"age"[ ]?:[ ]?({INTEGER}|null)[ ]?,)?[ ]?"strength"[ ]?:[ ]?({INTEGER}|null))?[ ]?\}}"#).as_str(), + format!(r#"\{{([ ]?"name"[ ]?:[ ]?({STRING}|null)|([ ]?"name"[ ]?:[ ]?({STRING}|null)[ ]?,)?[ ]?"age"[ ]?:[ ]?({INTEGER}|null)|([ ]?"name"[ ]?:[ ]?({STRING}|null)[ ]?,)?([ ]?"age"[ ]?:[ ]?({INTEGER}|null)[ ]?,)?[ ]?"strength"[ ]?:[ ]?({INTEGER}|null))?[ ]?\}}"#).as_str(), vec![ r#"{ "name" : "Player" }"#, r#"{ "name" : "Player", "age" : 10, "strength" : 10 }"#, @@ -898,7 +898,7 @@ mod tests { ] { let json: Value = serde_json::from_str(schema).expect("Can't parse json"); let result = to_regex(&json, None).expect("To regex failed"); - assert_eq!(result, regex); + assert_eq!(result, regex, "JSON Schema {} didn't match", schema); let re = Regex::new(&result).expect("Regex failed"); for m in a_match { @@ -1075,7 +1075,7 @@ mod tests { assert!(result.is_ok(), "{:?}", result); let regex = result.unwrap(); assert_eq!( - r#"\{([ ]?"node"[ ]?:[ ]?\{([ ]?"value"[ ]?:[ ]?(-)?(0|[1-9][0-9]*)([ ]?,[ ]?"next"[ ]?:[ ]?\{([ ]?"value"[ ]?:[ ]?(-)?(0|[1-9][0-9]*))?[ ]?\})?|([ ]?"value"[ ]?:[ ]?(-)?(0|[1-9][0-9]*)[ ]?,)?[ ]?"next"[ ]?:[ ]?\{([ ]?"value"[ ]?:[ ]?(-)?(0|[1-9][0-9]*))?[ ]?\})?[ ]?\})?[ ]?\}"#, + r#"\{([ ]?"node"[ ]?:[ ]?\{([ ]?"value"[ ]?:[ ]?(-)?(0|[1-9][0-9]*)|([ ]?"value"[ ]?:[ ]?(-)?(0|[1-9][0-9]*)[ ]?,)?[ ]?"next"[ ]?:[ ]?\{([ ]?"value"[ ]?:[ ]?(-)?(0|[1-9][0-9]*))?[ ]?\})?[ ]?\})?[ ]?\}"#, regex, ); } diff --git a/src/json_schema/parsing.rs b/src/json_schema/parsing.rs index 9ac373b..f058759 100644 --- a/src/json_schema/parsing.rs +++ b/src/json_schema/parsing.rs @@ -161,9 +161,6 @@ impl<'a> Parser<'a> { pattern += &format!("({}{},)?", subregex, self.whitespace_pattern); } pattern += &property_subregexes[i]; - for subregex in &property_subregexes[i + 1..] { - pattern += &format!("({},{})?", self.whitespace_pattern, subregex); - } possible_patterns.push(pattern); }