diff --git a/outlines/fsm/json_schema.py b/outlines/fsm/json_schema.py index dbd2baa40..8f3c4815d 100644 --- a/outlines/fsm/json_schema.py +++ b/outlines/fsm/json_schema.py @@ -199,6 +199,15 @@ def to_regex( return rf"({'|'.join(xor_patterns)})" + # Create pattern for Tuples, per JSON Schema spec, `prefixItems` determines types at each idx + elif "prefixItems" in instance: + element_patterns = [ + to_regex(resolver, t, whitespace_pattern) for t in instance["prefixItems"] + ] + comma_split_pattern = rf"{whitespace_pattern},{whitespace_pattern}" + tuple_inner = comma_split_pattern.join(element_patterns) + return rf"\[{whitespace_pattern}{tuple_inner}{whitespace_pattern}\]" + # The enum keyword is used to restrict a value to a fixed set of values. It # must be an array with at least one element, where each element is unique. elif "enum" in instance: diff --git a/tests/fsm/test_json_schema.py b/tests/fsm/test_json_schema.py index b12f9576e..8f8b5d88f 100644 --- a/tests/fsm/test_json_schema.py +++ b/tests/fsm/test_json_schema.py @@ -354,6 +354,15 @@ def test_match_number(pattern, does_match): rf"({STRING}{INTEGER})", [('"a"1', True), ('"a"', False), ('"1"', False)], ), + # Tuple / prefixItems + ( + { + "title": "Foo", + "prefixItems": [{"type": "string"}, {"type": "integer"}], + }, + rf"\[{WHITESPACE}{STRING}{WHITESPACE},{WHITESPACE}{INTEGER}{WHITESPACE}\]", + [('["a", 1]', True), ('["a", 1, 1]', False), ("[]", False)], + ), # Nested schema ( {