Skip to content

Commit

Permalink
Fix invalid regex in unconstrained arrays for json_schema.py
Browse files Browse the repository at this point in the history
  • Loading branch information
lapp0 authored and rlouf committed May 27, 2024
1 parent e5c39e2 commit 538f77a
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 8 deletions.
23 changes: 15 additions & 8 deletions outlines/fsm/json_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,15 +297,22 @@ def to_regex(
# Here we need to make the choice to exclude generating list of objects
# if the specification of the object is not given, even though a JSON
# object that contains an object here would be valid under the specification.
types = [
legal_types = [
{"type": "boolean"},
{"type": "null"},
{"type": "number"},
{"type": "integer"},
{"type": "string"},
]
regexes = [to_regex(resolver, t, whitespace_pattern) for t in types]
return rf"\[{whitespace_pattern}({'|'.join(regexes)})(,{whitespace_pattern}({'|'.join(regexes)})){num_repeats}){allow_empty}{whitespace_pattern}\]"
depth = instance.get("depth", 2)
if depth > 0:
legal_types.append({"type": "object", "depth": depth - 1})
legal_types.append({"type": "array", "depth": depth - 1})

regexes = [
to_regex(resolver, t, whitespace_pattern) for t in legal_types
]
return rf"\[{whitespace_pattern}({'|'.join(regexes)})(,{whitespace_pattern}({'|'.join(regexes)})){num_repeats}{allow_empty}{whitespace_pattern}\]"

elif instance_type == "object":
# pattern for json object with values defined by instance["additionalProperties"]
Expand All @@ -328,20 +335,20 @@ def to_regex(
# unset or True, it is unconstrained object.
# We handle this by setting additionalProperties to anyOf: {all types}

legal_values = [
legal_types = [
{"type": "string"},
{"type": "number"},
{"type": "boolean"},
{"type": "null"}
# { "type": "array" }, # TODO: enable arrays within object-types
{"type": "null"},
]

# We set the object depth to 2 to keep the expression finite, but the "depth"
# key is not a true component of the JSON Schema specification.
depth = instance.get("depth", 2)
if depth > 0:
legal_values.append({"type": "object", "depth": depth - 1})
additional_properties = {"anyOf": legal_values}
legal_types.append({"type": "object", "depth": depth - 1})
legal_types.append({"type": "array", "depth": depth - 1})
additional_properties = {"anyOf": legal_types}

value_pattern = to_regex(
resolver, additional_properties, whitespace_pattern
Expand Down
40 changes: 40 additions & 0 deletions tests/fsm/test_json_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,46 @@ def test_format(schema, regex, examples):
('{"time":20:20:39Z}', False), # missing quotes for value
],
),
# Unconstrained Object
(
{
"title": "Foo",
"type": "object",
},
[
("{}", True),
('{"a": 1, "b": null}', True),
('{"a": {"z": {"g": 4}}, "b": null}', True),
("1234", False), # not an object
('["a", "a"]', False), # not an array
],
),
# Unconstrained Array
(
{
"type": "array",
},
[
("[1, {}, false]", True),
("[{}]", True),
('[{"a": {"z": "q"}, "b": null}]', True),
('[{"a": [1, 2, true], "b": null}]', True),
('[{"a": [1, 2, true], "b": {"a": "b"}}, 1, true, [1, [2]]]', True),
# too deep, default unconstrained depth limit = 2
(
'[{"a": [1, 2, true], "b": {"a": "b"}}, 1, true, [1, [2, [3]]]]',
False,
),
('[{"a": {"z": {"g": 4}}, "b": null}]', False),
("[[[[1]]]]", False),
# not an array
("{}", False),
('{"a": 1, "b": null}', False),
('{"a": {"z": {"g": 4}}, "b": null}', False),
("1234", False), # not an array
('{"a": "a"}', False), # not an array
],
),
],
)
def test_format_without_regex(schema, examples):
Expand Down

0 comments on commit 538f77a

Please sign in to comment.