Skip to content

Commit

Permalink
Force dates, uuid, datetimes, times to be between quotes
Browse files Browse the repository at this point in the history
  • Loading branch information
navster888 authored and rlouf committed Apr 20, 2024
1 parent 92d79a1 commit 516d7a8
Show file tree
Hide file tree
Showing 2 changed files with 125 additions and 29 deletions.
8 changes: 4 additions & 4 deletions outlines/fsm/json_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@
"null": NULL,
}

DATE_TIME = r"(-?(?:[1-9][0-9]*)?[0-9]{4})-(1[0-2]|0[1-9])-(3[01]|0[1-9]|[12][0-9])T(2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])(\.[0-9]{3})?(Z)?"
DATE = r"(?:\d{4})-(?:0[1-9]|1[0-2])-(?:0[1-9]|[1-2][0-9]|3[0-1])"
TIME = r"(2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])(\\.[0-9]+)?(Z)?"
UUID = r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
DATE_TIME = r'"(-?(?:[1-9][0-9]*)?[0-9]{4})-(1[0-2]|0[1-9])-(3[01]|0[1-9]|[12][0-9])T(2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])(\.[0-9]{3})?(Z)?"'
DATE = r'"(?:\d{4})-(?:0[1-9]|1[0-2])-(?:0[1-9]|[1-2][0-9]|3[0-1])"'
TIME = r'"(2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])(\\.[0-9]+)?(Z)?"'
UUID = r'"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"'

format_to_regex = {
"uuid": UUID,
Expand Down
146 changes: 121 additions & 25 deletions tests/fsm/test_json_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,52 +561,56 @@ def test_match(schema, regex, examples):
{"title": "Foo", "type": "string", "format": "uuid"},
UUID,
[
("123e4567-e89b-12d3-a456-426614174000", True),
("123e4567-e89b-12d3-a456-42661417400", False),
("123e4567-e89b-12d3-a456-42661417400g", False),
("123e4567-e89b-12d3-a456-42661417400-", False),
("", False),
('123e4567-e89b-12d3-a456-426614174000', False),
('"123e4567-e89b-12d3-a456-426614174000"', True),
('"123e4567-e89b-12d3-a456-42661417400"', False),
('"123e4567-e89b-12d3-a456-42661417400g"', False),
('"123e4567-e89b-12d3-a456-42661417400-"', False),
('""', False),
],
),
# DATE-TIME
(
{"title": "Foo", "type": "string", "format": "date-time"},
DATE_TIME,
[
("2018-11-13T20:20:39Z", True),
("2016-09-18T17:34:02.666Z", True),
("2008-05-11T15:30:00Z", True),
("2021-01-01T00:00:00", True),
("2022-01-10 07:19:30", False), # missing T
("2022-12-10T10-04-29", False), # incorrect separator
("2023-01-01", False),
('2018-11-13T20:20:39Z', False),
('"2018-11-13T20:20:39Z"', True),
('"2016-09-18T17:34:02.666Z"', True),
('"2008-05-11T15:30:00Z"', True),
('"2021-01-01T00:00:00"', True),
('"2022-01-10 07:19:30"', False), # missing T
('"2022-12-10T10-04-29"', False), # incorrect separator
('"2023-01-01"', False),
],
),
# DATE
(
{"title": "Foo", "type": "string", "format": "date"},
DATE,
[
("2018-11-13", True),
("2016-09-18", True),
("2008-05-11", True),
("2015-13-01", False), # incorrect month
("2022-01", False), # missing day
("2022/12/01", False), # incorrect separator"
('2018-11-13', False),
('"2018-11-13"', True),
('"2016-09-18"', True),
('"2008-05-11"', True),
('"2015-13-01"', False), # incorrect month
('"2022-01"', False), # missing day
('"2022/12/01"', False), # incorrect separator"
],
),
# TIME
(
{"title": "Foo", "type": "string", "format": "time"},
TIME,
[
("20:20:39Z", True),
("15:30:00Z", True),
("25:30:00", False), # incorrect hour
("15:30", False), # missing seconds
("15:30:00.000", False), # missing Z
("15-30-00", False), # incorrect separator
("15:30:00+01:00", False), # incorrect separator
('20:20:39Z', False),
('"20:20:39Z"', True),
('"15:30:00Z"', True),
('"25:30:00"', False), # incorrect hour
('"15:30"', False), # missing seconds
('"15:30:00.000"', False), # missing Z
('"15-30-00"', False), # incorrect separator
('"15:30:00+01:00"', False), # incorrect separator
],
),
],
Expand All @@ -625,6 +629,98 @@ def test_format(schema, regex, examples):
assert match is None
@pytest.mark.parametrize(
"schema,examples",
[
# NESTED UUID
(
{
"title": "Foo",
"type": "object",
"properties": {"uuid": {"type": "string", "format": "uuid"}},
},
[
('{"uuid": "123e4567-e89b-12d3-a456-426614174000"}', True),
('{"uuid":"123e4567-e89b-12d3-a456-42661417400"}', False),
('{"uuid":"123e4567-e89b-12d3-a456-42661417400g"}', False),
('{"uuid":"123e4567-e89b-12d3-a456-42661417400-"}', False),
(
'{"uuid":123e4567-e89b-12d3-a456-426614174000}',
False,
), # missing quotes for value
('{"uuid":""}', False),
],
),
# NESTED DATE-TIME
(
{
"title": "Foo",
"type": "object",
"properties": {"dateTime": {"type": "string", "format": "date-time"}},
},
[
('{"dateTime": "2018-11-13T20:20:39Z"}', True),
('{"dateTime":"2016-09-18T17:34:02.666Z"}', True),
('{"dateTime":"2008-05-11T15:30:00Z"}', True),
('{"dateTime":"2021-01-01T00:00:00"}', True),
('{"dateTime":"2022-01-10 07:19:30"}', False), # missing T
('{"dateTime":"2022-12-10T10-04-29"}', False), # incorrect separator
(
'{"dateTime":2018-11-13T20:20:39Z}',
False,
), # missing quotes for value
('{"dateTime":"2023-01-01"}', False),
],
),
# NESTED DATE
(
{
"title": "Foo",
"type": "object",
"properties": {"date": {"type": "string", "format": "date"}},
},
[
('{"date": "2018-11-13"}', True),
('{"date":"2016-09-18"}', True),
('{"date":"2008-05-11"}', True),
('{"date":"2015-13-01"}', False), # incorrect month
('{"date":"2022-01"}', False), # missing day
('{"date":"2022/12/01"}', False), # incorrect separator"
('{"date":2018-11-13}', False), # missing quotes for value
],
),
# NESTED TIME
(
{
"title": "Foo",
"type": "object",
"properties": {"time": {"type": "string", "format": "time"}},
},
[
('{"time": "20:20:39Z"}', True),
('{"time":"15:30:00Z"}', True),
('{"time":"25:30:00"}', False), # incorrect hour
('{"time":"15:30"}', False), # missing seconds
('{"time":"15:30:00.000"}', False), # missing Z
('{"time":"15-30-00"}', False), # incorrect separator
('{"time":"15:30:00+01:00"}', False), # incorrect separator
('{"time":20:20:39Z}', False), # missing quotes for value
],
),
],
)
def test_format_without_regex(schema, examples):
schema = json.dumps(schema)
test_regex = build_regex_from_schema(schema)
for string, does_match in examples:
match = re.fullmatch(test_regex, string)
if does_match:
assert match[0] == string
assert match.span() == (0, len(string))
else:
assert match is None
@pytest.mark.parametrize("whitespace_pattern", [None, r"[\n ]?", "abc"])
def test_json_schema_custom_whitespace_pattern(whitespace_pattern):
"""assert whitespace_pattern setting respected"""
Expand Down

0 comments on commit 516d7a8

Please sign in to comment.