diff --git a/tests/fsm/test_json_schema.py b/tests/fsm/test_json_schema.py index da801296..5dd8b5d4 100644 --- a/tests/fsm/test_json_schema.py +++ b/tests/fsm/test_json_schema.py @@ -4,966 +4,8 @@ import interegular import pytest -from outlines_core.fsm.json_schema import ( - BOOLEAN, - DATE, - DATE_TIME, - INTEGER, - NULL, - NUMBER, - STRING, - STRING_INNER, - TIME, - UUID, - WHITESPACE, - build_regex_from_schema, - to_regex, -) -from pydantic import BaseModel, Field, constr - - -def test_from_pydantic(): - class User(BaseModel): - user_id: int - name: str - maxlength_name: constr(max_length=10) - minlength_name: constr(min_length=10) - value: float - is_true: bool - - schema = json.dumps(User.model_json_schema()) - schedule = build_regex_from_schema(schema) - assert isinstance(schedule, str) - - -@pytest.mark.parametrize( - "pattern,does_match", - [ - ({"integer": "0"}, True), - ({"integer": "1"}, True), - ({"integer": "-1"}, True), - ({"integer": "01"}, False), - ({"integer": "1.3"}, False), - ({"integer": "t"}, False), - ], -) -def test_match_integer(pattern, does_match): - step = {"title": "Foo", "type": "integer"} - regex = to_regex(step) - assert regex == INTEGER - - value = pattern["integer"] - match = re.fullmatch(regex, value) - if does_match: - assert match[0] == value - assert match.span() == (0, len(value)) - else: - assert match is None - - -@pytest.mark.parametrize( - "pattern,does_match", - [ - ({"number": "1"}, True), - ({"number": "0"}, True), - ({"number": "01"}, False), - ({"number": ".3"}, False), - ({"number": "1.3"}, True), - ({"number": "-1.3"}, True), - ({"number": "1.3e9"}, False), - ({"number": "1.3e+9"}, True), - ], -) -def test_match_number(pattern, does_match): - step = {"title": "Foo", "type": "number"} - regex = to_regex(step) - assert regex == NUMBER - - value = pattern["number"] - match = re.fullmatch(regex, value) - if does_match: - assert match[0] == value - assert match.span() == (0, len(value)) - else: - assert match is None - - -@pytest.mark.parametrize( - "schema,regex,examples", - [ - # String - ( - {"title": "Foo", "type": "string"}, - STRING, - [ - ("unquotedstring", False), - ('"(parenthesized_string)"', True), - ('"malformed) parenthesis (((() string"', True), - ('"quoted_string"', True), - (r'"escape_\character"', False), - (r'"double_\\escape"', True), - (r'"\n"', False), - (r'"\\n"', True), - (r'"unescaped " quote"', False), - (r'"escaped \" quote"', True), - ], - ), - # String with maximum length - ( - {"title": "Foo", "type": "string", "maxLength": 3}, - f'"{STRING_INNER}{{0,3}}"', - [('"ab"', True), ('"a""', False), ('"abcd"', False)], - ), - # String with minimum length - ( - {"title": "Foo", "type": "string", "minLength": 3}, - f'"{STRING_INNER}{{3,}}"', - [('"ab"', False), ('"abcd"', True), ('"abc""', False)], - ), - # String with both minimum and maximum length - ( - {"title": "Foo", "type": "string", "minLength": 3, "maxLength": 5}, - f'"{STRING_INNER}{{3,5}}"', - [('"ab"', False), ('"abcd"', True), ('"abcdef""', False)], - ), - # String defined by a regular expression - ( - {"title": "Foo", "type": "string", "pattern": r"^[a-z]$"}, - r'("[a-z]")', - [('"a"', True), ('"1"', False)], - ), - # Boolean - ( - {"title": "Foo", "type": "boolean"}, - BOOLEAN, - [ - ("true", True), - ("false", True), - ("null", False), - ("0", False), - ], - ), - # Null - ( - {"title": "Foo", "type": "null"}, - NULL, - [ - ("null", True), - ("true", False), - ("0", False), - ], - ), - # Const string - ( - {"title": "Foo", "const": "Marc", "type": "string"}, - '"Marc"', - [('"Marc"', True), ('"Jean"', False), ('"John"', False)], - ), - # Make sure strings are escaped with regex escaping - ( - {"title": "Foo", "const": ".*", "type": "string"}, - r'"\.\*"', - [('".*"', True), (r'"\s*"', False), (r'"\.\*"', False)], - ), - # Make sure strings are escaped with JSON escaping - ( - {"title": "Foo", "const": '"', "type": "string"}, - r'"\\""', - [('"\\""', True), ('"""', False)], - ), - # Const integer - ( - {"title": "Foo", "const": 0, "type": "integer"}, - "0", - [("0", True), ("1", False), ("a", False)], - ), - # Const float - ( - {"title": "Foo", "const": 0.2, "type": "float"}, - r"0\.2", - [("0.2", True), ("032", False)], - ), - # Const boolean - ( - {"title": "Foo", "const": True, "type": "boolean"}, - "true", - [("true", True), ("True", False)], - ), - # Const null - ( - {"title": "Foo", "const": None, "type": "null"}, - "null", - [("null", True), ("None", False), ("", False)], - ), - # Enum string - ( - {"title": "Foo", "enum": ["Marc", "Jean"], "type": "string"}, - '("Marc"|"Jean")', - [('"Marc"', True), ('"Jean"', True), ('"John"', False)], - ), - # Make sure strings are escaped with regex and JSON escaping - ( - {"title": "Foo", "enum": [".*", r"\s*"], "type": "string"}, - r'("\.\*"|"\\\\s\*")', - [('".*"', True), (r'"\\s*"', True), (r'"\.\*"', False)], - ), - # Enum integer - ( - {"title": "Foo", "enum": [0, 1], "type": "integer"}, - "(0|1)", - [("0", True), ("1", True), ("a", False)], - ), - # Enum mix of types - ( - {"title": "Foo", "enum": [6, 5.3, "potato", True, None]}, - r'(6|5\.3|"potato"|true|null)', - [ - ("6", True), - ("5.3", True), - ('"potato"', True), - ("true", True), - ("null", True), - ("523", False), - ("True", False), - ("None", False), - ], - ), - # integer - ( - { - "title": "Foo", - "type": "object", - "properties": {"count": {"title": "Count", "type": "integer"}}, - "required": ["count"], - }, - '\\{[ ]?"count"[ ]?:[ ]?(-)?(0|[1-9][0-9]*)[ ]?\\}', - [('{ "count": 100 }', True)], - ), - # integer with minimum digits - ( - { - "title": "Foo", - "type": "object", - "properties": { - "count": {"title": "Count", "type": "integer", "minDigits": 3} - }, - "required": ["count"], - }, - '\\{[ ]?"count"[ ]?:[ ]?(-)?(0|[1-9][0-9]{2,})[ ]?\\}', - [('{ "count": 10 }', False), ('{ "count": 100 }', True)], - ), - # integer with maximum digits - ( - { - "title": "Foo", - "type": "object", - "properties": { - "count": {"title": "Count", "type": "integer", "maxDigits": 3} - }, - "required": ["count"], - }, - '\\{[ ]?"count"[ ]?:[ ]?(-)?(0|[1-9][0-9]{0,2})[ ]?\\}', - [('{ "count": 100 }', True), ('{ "count": 1000 }', False)], - ), - # integer with minimum and maximum digits - ( - { - "title": "Foo", - "type": "object", - "properties": { - "count": { - "title": "Count", - "type": "integer", - "minDigits": 3, - "maxDigits": 5, - } - }, - "required": ["count"], - }, - '\\{[ ]?"count"[ ]?:[ ]?(-)?(0|[1-9][0-9]{2,4})[ ]?\\}', - [ - ('{ "count": 10 }', False), - ('{ "count": 100 }', True), - ('{ "count": 10000 }', True), - ('{ "count": 100000 }', False), - ], - ), - # number - ( - { - "title": "Foo", - "type": "object", - "properties": {"count": {"title": "Count", "type": "number"}}, - "required": ["count"], - }, - '\\{[ ]?"count"[ ]?:[ ]?((-)?(0|[1-9][0-9]*))(\\.[0-9]+)?([eE][+-][0-9]+)?[ ]?\\}', - [('{ "count": 100 }', True), ('{ "count": 100.5 }', True)], - ), - # number with min and max integer digits - ( - { - "title": "Foo", - "type": "object", - "properties": { - "count": { - "title": "Count", - "type": "number", - "minDigitsInteger": 3, - "maxDigitsInteger": 5, - } - }, - "required": ["count"], - }, - '\\{[ ]?"count"[ ]?:[ ]?((-)?(0|[1-9][0-9]{2,4}))(\\.[0-9]+)?([eE][+-][0-9]+)?[ ]?\\}', - [ - ('{ "count": 10.005 }', False), - ('{ "count": 100.005 }', True), - ('{ "count": 10000.005 }', True), - ('{ "count": 100000.005 }', False), - ], - ), - # number with min and max fraction digits - ( - { - "title": "Foo", - "type": "object", - "properties": { - "count": { - "title": "Count", - "type": "number", - "minDigitsFraction": 3, - "maxDigitsFraction": 5, - } - }, - "required": ["count"], - }, - '\\{[ ]?"count"[ ]?:[ ]?((-)?(0|[1-9][0-9]*))(\\.[0-9]{3,5})?([eE][+-][0-9]+)?[ ]?\\}', - [ - ('{ "count": 1.05 }', False), - ('{ "count": 1.005 }', True), - ('{ "count": 1.00005 }', True), - ('{ "count": 1.000005 }', False), - ], - ), - # number with min and max exponent digits - ( - { - "title": "Foo", - "type": "object", - "properties": { - "count": { - "title": "Count", - "type": "number", - "minDigitsExponent": 3, - "maxDigitsExponent": 5, - } - }, - "required": ["count"], - }, - '\\{[ ]?"count"[ ]?:[ ]?((-)?(0|[1-9][0-9]*))(\\.[0-9]+)?([eE][+-][0-9]{3,5})?[ ]?\\}', - [ - ('{ "count": 1.05e1 }', False), - ('{ "count": 1.05e+001 }', True), - ('{ "count": 1.05e-00001 }', True), - ('{ "count": 1.05e0000001 }', False), - ], - ), - # number with min and max integer, fraction and exponent digits - ( - { - "title": "Foo", - "type": "object", - "properties": { - "count": { - "title": "Count", - "type": "number", - "minDigitsInteger": 3, - "maxDigitsInteger": 5, - "minDigitsFraction": 3, - "maxDigitsFraction": 5, - "minDigitsExponent": 3, - "maxDigitsExponent": 5, - } - }, - "required": ["count"], - }, - '\\{[ ]?"count"[ ]?:[ ]?((-)?(0|[1-9][0-9]{2,4}))(\\.[0-9]{3,5})?([eE][+-][0-9]{3,5})?[ ]?\\}', - [ - ('{ "count": 1.05e1 }', False), - ('{ "count": 100.005e+001 }', True), - ('{ "count": 10000.00005e-00001 }', True), - ('{ "count": 100000.000005e0000001 }', False), - ], - ), - # array - ( - {"title": "Foo", "type": "array", "items": {"type": "number"}}, - rf"\[{WHITESPACE}(({NUMBER})(,{WHITESPACE}({NUMBER})){{0,}})?{WHITESPACE}\]", - [("[1e+9,1.3]", True), ("[]", True), ("[1", False)], - ), - # array with a set length of 1 - ( - { - "title": "Foo", - "type": "array", - "items": {"type": "integer"}, - "minItems": 1, - "maxItems": 1, - }, - rf"\[{WHITESPACE}(({INTEGER})(,{WHITESPACE}({INTEGER})){{0,0}}){WHITESPACE}\]", - [("[1]", True), ("[1,2]", False), ('["a"]', False), ("[]", False)], - ), - # array with a set length greather than 1 - ( - { - "title": "Foo", - "type": "array", - "items": {"type": "integer"}, - "minItems": 3, - "maxItems": 3, - }, - rf"\[{WHITESPACE}(({INTEGER})(,{WHITESPACE}({INTEGER})){{2,2}}){WHITESPACE}\]", - [("[1]", False), ("[]", False), ("[1,2,3]", True), ("[1,2,3,4]", False)], - ), - # array with length 0 - ( - { - "title": "Foo", - "type": "array", - "items": {"type": "integer"}, - "minItems": 0, - "maxItems": 0, - }, - rf"\[{WHITESPACE}\]", - [("[1]", False), ("[]", True), ("[1,2,3]", False), ("[1,2,3,4]", False)], - ), - # object - ( - { - "title": "TestSchema", - "type": "object", - "properties": { - "test_dict": { - "title": "Test Dict", - "additionalProperties": {"type": "string"}, - "type": "object", - } - }, - "required": ["test_dict"], - }, - rf"""\{{{WHITESPACE}"test_dict"{WHITESPACE}:{WHITESPACE}\{{{WHITESPACE}({STRING}{WHITESPACE}:{WHITESPACE}{STRING}({WHITESPACE},{WHITESPACE}{STRING}{WHITESPACE}:{WHITESPACE}{STRING}){{0,}})?{WHITESPACE}\}}{WHITESPACE}\}}""", - [ - ("""{ "test_dict":{"foo":"bar","baz": "bif"}}""", True), - ("""{ "test_dict":{"foo":"bar" }}""", True), - ("""{ "test_dict":{}}""", True), - ("""{ "WRONG_KEY":{}}""", False), - ("""{ "test_dict":{"wrong_type" 1}}""", False), - ], - ), - # object containing object - ( - { - "title": "TestSchema", - "type": "object", - "properties": { - "test_dict": { - "title": "Test Dict", - "additionalProperties": { - "additionalProperties": {"type": "integer"}, - "type": "object", - }, - "type": "object", - } - }, - "required": ["test_dict"], - }, - rf"""\{{{WHITESPACE}"test_dict"{WHITESPACE}:{WHITESPACE}\{{{WHITESPACE}({STRING}{WHITESPACE}:{WHITESPACE}\{{{WHITESPACE}({STRING}{WHITESPACE}:{WHITESPACE}{INTEGER}({WHITESPACE},{WHITESPACE}{STRING}{WHITESPACE}:{WHITESPACE}{INTEGER}){{0,}})?{WHITESPACE}\}}({WHITESPACE},{WHITESPACE}{STRING}{WHITESPACE}:{WHITESPACE}\{{{WHITESPACE}({STRING}{WHITESPACE}:{WHITESPACE}{INTEGER}({WHITESPACE},{WHITESPACE}{STRING}{WHITESPACE}:{WHITESPACE}{INTEGER}){{0,}})?{WHITESPACE}\}}){{0,}})?{WHITESPACE}\}}{WHITESPACE}\}}""", - [ - ( - """{"test_dict": {"foo": {"bar": 123, "apple": 99}, "baz": {"bif": 456}}}""", - True, - ), - ( - """{"test_dict": {"anykey": {"anykey": 123}, "anykey2": {"bif": 456}}}""", - True, - ), - ("""{"test_dict": {}}""", True), - ("""{"test_dict": {"dict of empty dicts are ok": {} }}""", True), - ( - """{"test_dict": {"anykey": {"ONLY Dict[Dict]": 123}, "No Dict[int]" 1: }}""", - False, - ), - ], - ), - # oneOf - ( - { - "title": "Foo", - "oneOf": [{"type": "string"}, {"type": "number"}, {"type": "boolean"}], - }, - rf'((?:"{STRING_INNER}*")|(?:{NUMBER})|(?:{BOOLEAN}))', - [ - ("12.3", True), - ("true", True), - ('"a"', True), - ("null", False), - ("", False), - ("12true", False), - ('1.3"a"', False), - ('12.3true"a"', False), - ], - ), - # anyOf - ( - { - "title": "Foo", - "anyOf": [{"type": "string"}, {"type": "integer"}], - }, - rf"({STRING}|{INTEGER})", - [("12", True), ('"a"', True), ('1"a"', False)], - ), - # allOf - ( - { - "title": "Foo", - "allOf": [{"type": "string"}, {"type": "integer"}], - }, - rf"({STRING}{INTEGER})", - [('"a"1', True), ('"a"', False), ('"1"', False)], - ), - # Tuple / prefixItems - ( - { - "title": "Foo", - "prefixItems": [{"type": "string"}, {"type": "integer"}], - }, - rf"\[{WHITESPACE}{STRING}{WHITESPACE},{WHITESPACE}{INTEGER}{WHITESPACE}\]", - [('["a", 1]', True), ('["a", 1, 1]', False), ("[]", False)], - ), - # Nested schema - ( - { - "title": "Bar", - "type": "object", - "properties": { - "fuzz": { - "title": "Foo", - "type": "object", - "properties": {"spam": {"title": "Spam", "type": "integer"}}, - "required": ["spam"], - } - }, - "required": ["fuzz"], - }, - f'\\{{[ ]?"fuzz"[ ]?:[ ]?\\{{[ ]?"spam"[ ]?:[ ]?{INTEGER}[ ]?\\}}[ ]?\\}}', - [('{ "fuzz": { "spam": 100 }}', True)], - ), - # Schema with a reference - ( - { - "title": "User", - "type": "object", - "properties": { - "user_id": {"title": "User Id", "type": "integer"}, - "name": {"title": "Name", "type": "string"}, - "a": {"$ref": "#/properties/name"}, - }, - "required": ["user_id", "name", "a"], - }, - f'\\{{[ ]?"user_id"[ ]?:[ ]?{INTEGER}[ ]?,[ ]?"name"[ ]?:[ ]?{STRING}[ ]?,[ ]?"a"[ ]?:[ ]?{STRING}[ ]?\\}}', - [('{"user_id": 100, "name": "John", "a": "Marc"}', True)], - ), - ( - { - "title": "User", - "type": "object", - "$defs": {"name": {"title": "Name2", "type": "string"}}, - "properties": { - "user_id": {"title": "User Id", "type": "integer"}, - "name": {"title": "Name", "type": "string"}, - "name2": {"$ref": "#/$defs/name"}, - }, - "required": ["user_id", "name", "name2"], - }, - f'\\{{[ ]?"user_id"[ ]?:[ ]?{INTEGER}[ ]?,[ ]?"name"[ ]?:[ ]?{STRING}[ ]?,[ ]?"name2"[ ]?:[ ]?{STRING}[ ]?\\}}', - [('{"user_id": 100, "name": "John", "name2": "Marc"}', True)], - ), - ( - { - "$id": "customer", - "$schema": "https://json-schema.org/draft/2020-12/schema", - "title": "Customer", - "type": "object", - "properties": { - "name": {"type": "string"}, - "last_name": {"type": "string"}, - "address": {"$ref": "customer#/$defs/address"}, - }, - "required": [ - "name", - "first_name", - "last_name", - "address", - "shipping_address", - "billing_address", - ], - "$defs": { - "address": { - "title": "Address", - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "city": {"type": "string"}, - }, - "required": ["street_address", "city", "state"], - "definitions": { - "state": { - "type": "object", - "title": "State", - "properties": {"name": {"type": "string"}}, - "required": ["name"], - } - }, - } - }, - }, - f'\\{{[ ]?"name"[ ]?:[ ]?{STRING}[ ]?,[ ]?"last_name"[ ]?:[ ]?{STRING}[ ]?,[ ]?"address"[ ]?:[ ]?\\{{[ ]?"city"[ ]?:[ ]?{STRING}[ ]?\\}}[ ]?\\}}', - [ - ( - '{"name": "John", "last_name": "Doe", "address": {"city": "Paris"}}', - True, - ) - ], - ), - # Optional properties - # Last required property in first position - ( - { - "properties": { - "name": {"type": "string"}, - "age": {"anyOf": [{"type": "integer"}, {"type": "null"}]}, - "weapon": {"anyOf": [{"type": "string"}, {"type": "null"}]}, - }, - "required": ["name"], - "title": "Character", - "type": "object", - }, - f'\\{{[ ]?"name"[ ]?:[ ]?{STRING}([ ]?,[ ]?"age"[ ]?:[ ]?({INTEGER}|null))?([ ]?,[ ]?"weapon"[ ]?:[ ]?({STRING}|null))?[ ]?\\}}', - [ - ('{ "name" : "Player" }', True), - ('{ "name" : "Player", "weapon" : "sword" }', True), - ('{ "age" : 10, "weapon" : "sword" }', False), - ], - ), - # Last required property in middle position - ( - { - "properties": { - "name": {"type": "string"}, - "age": {"anyOf": [{"type": "integer"}, {"type": "null"}]}, - "weapon": {"type": "string"}, - "strength": {"anyOf": [{"type": "integer"}, {"type": "null"}]}, - }, - "required": ["name", "weapon"], - "title": "Character", - "type": "object", - }, - f'\\{{[ ]?"name"[ ]?:[ ]?{STRING}[ ]?,([ ]?"age"[ ]?:[ ]?({INTEGER}|null)[ ]?,)?[ ]?"weapon"[ ]?:[ ]?{STRING}([ ]?,[ ]?"strength"[ ]?:[ ]?({INTEGER}|null))?[ ]?\\}}', - [ - ('{ "name" : "Player" , "weapon" : "sword" }', True), - ( - '{ "name" : "Player", "age" : 10, "weapon" : "sword" , "strength" : 10 }', - True, - ), - ('{ "weapon" : "sword" }', False), - ], - ), - # Last required property in last position - ( - { - "properties": { - "name": {"anyOf": [{"type": "string"}, {"type": "null"}]}, - "age": {"type": "integer"}, - "armor": {"type": "string"}, - "strength": {"anyOf": [{"type": "integer"}, {"type": "null"}]}, - "weapon": {"title": "Weapon", "type": "string"}, - }, - "required": ["age", "armor", "weapon"], - "title": "Character", - "type": "object", - }, - f'\\{{([ ]?"name"[ ]?:[ ]?({STRING}|null)[ ]?,)?[ ]?"age"[ ]?:[ ]?{INTEGER}[ ]?,[ ]?"armor"[ ]?:[ ]?{STRING}[ ]?,([ ]?"strength"[ ]?:[ ]?({INTEGER}|null)[ ]?,)?[ ]?"weapon"[ ]?:[ ]?{STRING}[ ]?\\}}', - [ - ( - '{ "name" : "Player", "age" : 10, "armor" : "plate", "strength" : 11, "weapon" : "sword" }', - True, - ), - ('{ "age" : 10, "armor" : "plate", "weapon" : "sword" }', True), - ( - '{ "name" : "Kahlhanbeh", "armor" : "plate", "weapon" : "sword" }', - False, - ), - ], - ), - # All properties are optional - ( - { - "properties": { - "name": {"anyOf": [{"type": "string"}, {"type": "null"}]}, - "age": {"anyOf": [{"type": "integer"}, {"type": "null"}]}, - "strength": {"anyOf": [{"type": "integer"}, {"type": "null"}]}, - }, - "title": "Character", - "type": "object", - }, - f'\\{{([ ]?"name"[ ]?:[ ]?({STRING}|null)([ ]?,[ ]?"age"[ ]?:[ ]?({INTEGER}|null))?([ ]?,[ ]?"strength"[ ]?:[ ]?({INTEGER}|null))?|([ ]?"name"[ ]?:[ ]?({STRING}|null)[ ]?,)?[ ]?"age"[ ]?:[ ]?({INTEGER}|null)([ ]?,[ ]?"strength"[ ]?:[ ]?({INTEGER}|null))?|([ ]?"name"[ ]?:[ ]?({STRING}|null)[ ]?,)?([ ]?"age"[ ]?:[ ]?({INTEGER}|null)[ ]?,)?[ ]?"strength"[ ]?:[ ]?({INTEGER}|null))?[ ]?\\}}', - [ - ('{ "name" : "Player" }', True), - ('{ "name" : "Player", "age" : 10, "strength" : 10 }', True), - ('{ "age" : 10, "strength" : 10 }', True), - ("{ }", True), - ], - ), - ], -) -def test_match(schema, regex, examples): - interegular.parse_pattern(regex) - schema = json.dumps(schema) - test_regex = build_regex_from_schema(schema) - assert test_regex == regex - - for string, does_match in examples: - match = re.fullmatch(test_regex, string) - if does_match: - if match is None: - raise ValueError(f"Expected match for '{string}'") - assert match[0] == string - assert match.span() == (0, len(string)) - else: - assert match is None - - -@pytest.mark.parametrize( - "schema,regex,examples", - [ - # UUID - ( - {"title": "Foo", "type": "string", "format": "uuid"}, - UUID, - [ - ("123e4567-e89b-12d3-a456-426614174000", False), - ('"123e4567-e89b-12d3-a456-426614174000"', True), - ('"123e4567-e89b-12d3-a456-42661417400"', False), - ('"123e4567-e89b-12d3-a456-42661417400g"', False), - ('"123e4567-e89b-12d3-a456-42661417400-"', False), - ('""', False), - ], - ), - # DATE-TIME - ( - {"title": "Foo", "type": "string", "format": "date-time"}, - DATE_TIME, - [ - ("2018-11-13T20:20:39Z", False), - ('"2018-11-13T20:20:39Z"', True), - ('"2016-09-18T17:34:02.666Z"', True), - ('"2008-05-11T15:30:00Z"', True), - ('"2021-01-01T00:00:00"', True), - ('"2022-01-10 07:19:30"', False), # missing T - ('"2022-12-10T10-04-29"', False), # incorrect separator - ('"2023-01-01"', False), - ], - ), - # DATE - ( - {"title": "Foo", "type": "string", "format": "date"}, - DATE, - [ - ("2018-11-13", False), - ('"2018-11-13"', True), - ('"2016-09-18"', True), - ('"2008-05-11"', True), - ('"2015-13-01"', False), # incorrect month - ('"2022-01"', False), # missing day - ('"2022/12/01"', False), # incorrect separator" - ], - ), - # TIME - ( - {"title": "Foo", "type": "string", "format": "time"}, - TIME, - [ - ("20:20:39Z", False), - ('"20:20:39Z"', True), - ('"15:30:00Z"', True), - ('"25:30:00"', False), # incorrect hour - ('"15:30"', False), # missing seconds - ('"15:30:00.000"', False), # missing Z - ('"15-30-00"', False), # incorrect separator - ('"15:30:00+01:00"', False), # incorrect separator - ], - ), - ], -) -def test_format(schema, regex, examples): - interegular.parse_pattern(regex) - schema = json.dumps(schema) - test_regex = build_regex_from_schema(schema) - assert test_regex == regex - - for string, does_match in examples: - match = re.fullmatch(test_regex, string) - if does_match: - assert match[0] == string - assert match.span() == (0, len(string)) - else: - assert match is None - - -@pytest.mark.parametrize( - "schema,examples", - [ - # NESTED UUID - ( - { - "title": "Foo", - "type": "object", - "properties": {"uuid": {"type": "string", "format": "uuid"}}, - }, - [ - ('{"uuid": "123e4567-e89b-12d3-a456-426614174000"}', True), - ('{"uuid":"123e4567-e89b-12d3-a456-42661417400"}', False), - ('{"uuid":"123e4567-e89b-12d3-a456-42661417400g"}', False), - ('{"uuid":"123e4567-e89b-12d3-a456-42661417400-"}', False), - ( - '{"uuid":123e4567-e89b-12d3-a456-426614174000}', - False, - ), # missing quotes for value - ('{"uuid":""}', False), - ], - ), - # NESTED DATE-TIME - ( - { - "title": "Foo", - "type": "object", - "properties": {"dateTime": {"type": "string", "format": "date-time"}}, - }, - [ - ('{"dateTime": "2018-11-13T20:20:39Z"}', True), - ('{"dateTime":"2016-09-18T17:34:02.666Z"}', True), - ('{"dateTime":"2008-05-11T15:30:00Z"}', True), - ('{"dateTime":"2021-01-01T00:00:00"}', True), - ('{"dateTime":"2022-01-10 07:19:30"}', False), # missing T - ('{"dateTime":"2022-12-10T10-04-29"}', False), # incorrect separator - ( - '{"dateTime":2018-11-13T20:20:39Z}', - False, - ), # missing quotes for value - ('{"dateTime":"2023-01-01"}', False), - ], - ), - # NESTED DATE - ( - { - "title": "Foo", - "type": "object", - "properties": {"date": {"type": "string", "format": "date"}}, - }, - [ - ('{"date": "2018-11-13"}', True), - ('{"date":"2016-09-18"}', True), - ('{"date":"2008-05-11"}', True), - ('{"date":"2015-13-01"}', False), # incorrect month - ('{"date":"2022-01"}', False), # missing day - ('{"date":"2022/12/01"}', False), # incorrect separator" - ('{"date":2018-11-13}', False), # missing quotes for value - ], - ), - # NESTED TIME - ( - { - "title": "Foo", - "type": "object", - "properties": {"time": {"type": "string", "format": "time"}}, - }, - [ - ('{"time": "20:20:39Z"}', True), - ('{"time":"15:30:00Z"}', True), - ('{"time":"25:30:00"}', False), # incorrect hour - ('{"time":"15:30"}', False), # missing seconds - ('{"time":"15:30:00.000"}', False), # missing Z - ('{"time":"15-30-00"}', False), # incorrect separator - ('{"time":"15:30:00+01:00"}', False), # incorrect separator - ('{"time":20:20:39Z}', False), # missing quotes for value - ], - ), - # Unconstrained Object - ( - { - "title": "Foo", - "type": "object", - }, - [ - ("{}", True), - ('{"a": 1, "b": null}', True), - ('{"a": {"z": {"g": 4}}, "b": null}', True), - ("1234", False), # not an object - ('["a", "a"]', False), # not an array - ], - ), - # Unconstrained Array - ( - { - "type": "array", - }, - [ - ("[1, {}, false]", True), - ("[{}]", True), - ('[{"a": {"z": "q"}, "b": null}]', True), - ('[{"a": [1, 2, true], "b": null}]', True), - ('[{"a": [1, 2, true], "b": {"a": "b"}}, 1, true, [1, [2]]]', True), - # too deep, default unconstrained depth limit = 2 - ( - '[{"a": [1, 2, true], "b": {"a": "b"}}, 1, true, [1, [2, [3]]]]', - False, - ), - ('[{"a": {"z": {"g": 4}}, "b": null}]', False), - ("[[[[1]]]]", False), - # not an array - ("{}", False), - ('{"a": 1, "b": null}', False), - ('{"a": {"z": {"g": 4}}, "b": null}', False), - ("1234", False), # not an array - ('{"a": "a"}', False), # not an array - ], - ), - # No schema / unconstrained value - ( - {}, - [ - ('"aaabbuecuh"', True), # string - ("5.554", True), # number - ("true", True), # boolean - ("null", True), # null - ("5999", True), # integer - ('["a", "b"]', True), # array - ('{"key": {"k2": "value"}}', True), # nested object - ("this isnt valid json", False), - ], - ), - ], -) -def test_format_without_regex(schema, examples): - schema = json.dumps(schema) - test_regex = build_regex_from_schema(schema) - for string, does_match in examples: - match = re.fullmatch(test_regex, string) - if does_match: - assert match[0] == string - assert match.span() == (0, len(string)) - else: - assert match is None +from outlines_core.fsm.json_schema import build_regex_from_schema, to_regex +from pydantic import BaseModel, Field @pytest.mark.parametrize("whitespace_pattern", [None, r"[\n ]*", "abc"])