From 6058f926c8645ef3c2abe7ac2d756aa00838259a Mon Sep 17 00:00:00 2001 From: "Victoria Terenina (torymur)" Date: Fri, 13 Dec 2024 11:42:25 +0000 Subject: [PATCH] Add more tests to json schema --- src/json_schema/mod.rs | 83 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/src/json_schema/mod.rs b/src/json_schema/mod.rs index 950306c..445d363 100644 --- a/src/json_schema/mod.rs +++ b/src/json_schema/mod.rs @@ -876,6 +876,89 @@ mod tests { ], vec!["this isnt valid json"], ), + // Confirm that oneOf doesn't produce illegal lookaround: https://github.com/dottxt-ai/outlines/issues/823 + // + // The pet field uses the discriminator field to decide which schema (Cat or Dog) applies, based on the pet_type property. + // - if pet_type is "cat", the Cat schema applies, requiring a meows field (integer) + // - if pet_type is "dog", the Dog schema applies, requiring a barks field (number) + // + // So, expected object requires two fields: + // - pet, which must be one of two types: Cat or Dog, determined by the pet_type field + // - n, an integer + ( + r##"{ + "$defs": { + "Cat": { + "properties": { + "pet_type": { + "const": "cat", + "enum": ["cat"], + "title": "Pet Type", + "type": "string" + }, + "meows": { + "title": "Meows", + "type": "integer" + } + }, + "required": ["pet_type", "meows"], + "title": "Cat", + "type": "object" + }, + "Dog": { + "properties": { + "pet_type": { + "const": "dog", + "enum": ["dog"], + "title": "Pet Type", + "type": "string" + }, + "barks": { + "title": "Barks", + "type": "number" + } + }, + "required": ["pet_type", "barks"], + "title": "Dog", + "type": "object" + } + }, + "properties": { + "pet": { + "discriminator": { + "mapping": { + "cat": "#/$defs/Cat", + "dog": "#/$defs/Dog" + }, + "propertyName": "pet_type" + }, + "oneOf": [ + {"$ref": "#/$defs/Cat"}, + {"$ref": "#/$defs/Dog"} + ], + "title": "Pet" + }, + "n": { + "title": "N", + "type": "integer" + } + }, + "required": ["pet", "n"], + "title": "Model", + "type": "object" + }"##, + r#"\{[ ]?"pet"[ ]?:[ ]?((?:\{[ ]?"pet_type"[ ]?:[ ]?("cat")[ ]?,[ ]?"meows"[ ]?:[ ]?(-)?(0|[1-9][0-9]*)[ ]?\})|(?:\{[ ]?"pet_type"[ ]?:[ ]?("dog")[ ]?,[ ]?"barks"[ ]?:[ ]?((-)?(0|[1-9][0-9]*))(\.[0-9]+)?([eE][+-][0-9]+)?[ ]?\}))[ ]?,[ ]?"n"[ ]?:[ ]?(-)?(0|[1-9][0-9]*)[ ]?\}"#, + vec![ + r#"{ "pet": { "pet_type": "cat", "meows": 5 }, "n": 10 }"#, + r#"{ "pet": { "pet_type": "dog", "barks": 3.5 }, "n": 7 }"#, + ], + vec![ + // Missing required fields + r#"{ "pet": { "pet_type": "cat" }, "n": 10 }"#, + // Incorrect pet_type + r#"{ "pet": { "pet_type": "bird", "meows": 2 }, "n": 5 }"# + ], + ), ] { let json: Value = serde_json::from_str(schema).expect("Can't parse json"); let result = to_regex(&json, None).expect("To regex failed");