Merge pull request #597 from guardrails-ai/list-support

Top Level List Support
guardrails-ai · Mar 5, 2024 · 5ce1d29 · 5ce1d29
2 parents b0b233b + e05040a
commit 5ce1d29
Show file tree

Hide file tree

Showing 22 changed files with 549 additions and 56 deletions.
diff --git a/docs/how_to_guides/structured_data_with_guardrails.mdx b/docs/how_to_guides/structured_data_with_guardrails.mdx
@@ -0,0 +1,219 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# Generate structured data with Guardrails AI
+
+Guardrails AI is effective for generating structured data across from a variety of LLMs. This guide contains 
+the following:
+1. General instructions on generating structured data from Guardrails using `Pydantic` or Markup (i.e. `RAIL`), and
+2. Examples to generate structured data using `Pydantic` or Markup.
+
+## Syntax for generating structured data
+
+There are two ways to generate structured data with Guardrails AI: using `Pydantic` or Markup (i.e. `RAIL`).
+
+1. **Pydantic**: In order to generate structured data with Pydantic models, create a Pydantic model with the desired fields and types, then create a `Guard` object that uses the Pydantic model to generate structured data, and finally call the LLM of your choice with the `guard` object to generate structured data.
+2. **RAIL**: In order to generate structured data with RAIL specs, create a RAIL spec with the desired fields and types, then create a `Guard` object that uses the RAIL spec to generate structured data, and finally call the LLM of your choice with the `guard` object to generate structured data.
+
+Below is the syntax for generating structured data with Guardrails AI using `Pydantic` or Markup (i.e. `RAIL`).
+
+<Tabs>
+  <TabItem value="pydantic" label="Pydantic" default>
+    In order to generate structured data, first create a Pydantic model with the desired fields and types.
+    ```python
+    from pydantic import BaseModel
+
+    class Person(BaseModel):
+        name: str
+        age: int
+        is_employed: bool
+    ```
+
+    Then, create a `Guard` object that uses the Pydantic model to generate structured data.
+    ```python
+    from guardrails import Guard
+
+    guard = Guard.from_pydantic(Person)
+    ```
+
+    Finally, call the LLM of your choice with the `guard` object to generate structured data.
+    ```python
+    import openai
+
+    res = guard(
+        openai.chat.completion.create,
+        model="gpt-3.5-turbo",
+    )
+    ```
+  </TabItem>
+  <TabItem value="rail" label="RAIL">
+    In order to generate structured data, first create a RAIL spec with the desired fields and types.
+    ```xml
+    <rail version="0.1">
+      <output>
+        <string name="name" />
+        <integer name="age" />
+        <boolean name="is_employed" />
+      </output>
+    ```
+
+    Then, create a `Guard` object that uses the RAIL spec to generate structured data.
+    ```python
+    from guardrails import Guard
+
+    guard = Guard.from_s("""
+      <rail version="0.1">
+        <output>
+          <string name="name" />
+          <integer name="age" />
+          <boolean name="is_employed" />
+        </output>
+      </rail>
+    """)
+    ```
+
+    Finally, call the LLM of your choice with the `guard` object to generate structured data.
+    ```python
+    import openai
+
+    res = guard(
+        openai.chat.completion.create,
+        model="gpt-3.5-turbo",
+    )
+    ```
+  </TabItem>
+</Tabs>
+
+## Generate a JSON object with simple types
+
+<Tabs>
+  <TabItem value="json" label="JSON" default>
+    ```json
+    {
+      "name": "John Doe",
+      "age": 30,
+      "is_employed": true
+    }
+    ```
+  </TabItem>
+  <TabItem value="pydantic" label="Pydantic">
+    ```python
+    from pydantic import BaseModel
+
+    class Person(BaseModel):
+        name: str
+        age: int
+        is_employed: bool
+    ```
+  </TabItem>
+  <TabItem value="rail" label="Markup">
+    ```xml
+    <rail version="0.1">
+      <output>
+        <string name="name" />
+        <integer name="age" />
+        <boolean name="is_employed" />
+      </output>
+    </rail>
+    ```
+  </TabItem>
+</Tabs>
+
+
+## Generate a dictionary of nested types
+
+<Tabs>
+  <TabItem value="json" label="JSON" default>
+    ```json
+    {
+      "name": "John Doe",
+      "age": 30,
+      "is_employed": true,
+      "address": {
+        "street": "123 Main St",
+        "city": "Anytown",
+        "zip": "12345"
+      }
+    }
+    ```
+  </TabItem>
+  <TabItem value="pydantic" label="Pydantic">
+    ```python
+    from pydantic import BaseModel
+
+    class Address(BaseModel):
+        street: str
+        city: str
+        zip: str
+
+    class Person(BaseModel):
+        name: str
+        age: int
+        is_employed: bool
+        address: Address
+    ```
+  </TabItem>
+  <TabItem value="rail" label="Markup">
+    ```xml
+    <rail version="0.1">
+      <output>
+        <string name="name" />
+        <integer name="age" />
+        <boolean name="is_employed" />
+        <object name="address">
+          <string name="street" />
+          <string name="city" />
+          <string name="zip" />
+        </object>
+      </output>
+    </rail>
+    ```
+  </TabItem>
+</Tabs>
+
+
+## Generate a list of types
+
+<Tabs>
+  <TabItem value="json" label="JSON" default>
+    ```json
+    [
+      {
+        "name": "John Doe",
+        "age": 30,
+        "is_employed": true
+      },
+      {
+        "name": "Jane Smith",
+        "age": 25,
+        "is_employed": false
+      }
+    ]
+    ```
+  </TabItem>
+  <TabItem value="pydantic" label="Pydantic">
+    ```python
+    from pydantic import BaseModel
+
+    class Person(BaseModel):
+        name: str
+        age: int
+        is_employed: bool
+
+    people = list[Person]
+    ```
+  </TabItem>
+  <TabItem value="rail" label="Markup">
+    ```xml
+    <rail version="0.1">
+      <output type="list">
+        <object>
+          <string name="name" />
+          <integer name="age" />
+          <boolean name="is_employed" />
+        </object>
+      </output>
+    </rail>
+    ```
+  </TabItem>
+</Tabs>
diff --git a/docusaurus/sidebars.js b/docusaurus/sidebars.js
@@ -44,7 +44,7 @@ const sidebars = {
       type: "category",
       label: "How-to Guides",
       collapsed: true,
-      items: ["how_to_guides/logs", "how_to_guides/streaming", "how_to_guides/llm_api_wrappers", "how_to_guides/rail", "how_to_guides/envvars" ],
+      items: ["how_to_guides/logs", "how_to_guides/streaming", "how_to_guides/llm_api_wrappers", "how_to_guides/rail", "how_to_guides/envvars", "how_to_guides/structured_data_with_guardrails" ],
     },
     "the_guard",
     {

diff --git a/guardrails/classes/history/call.py b/guardrails/classes/history/call.py
@@ -366,3 +366,6 @@ def tree(self) -> Tree:
             )
 
         return tree
+
+    def __str__(self) -> str:
+        return pretty_repr(self)
diff --git a/guardrails/classes/history/iteration.py b/guardrails/classes/history/iteration.py
@@ -189,3 +189,6 @@ def create_msg_history_table(
                     style="on #F0FFF0",
                 ),
             )
+
+    def __str__(self) -> str:
+        return pretty_repr(self)
diff --git a/guardrails/classes/output_type.py b/guardrails/classes/output_type.py
@@ -1,3 +1,3 @@
-from typing import Dict, TypeVar
+from typing import Dict, List, TypeVar
 
-OT = TypeVar("OT", str, Dict)
+OT = TypeVar("OT", str, List, Dict)
diff --git a/guardrails/classes/validation_outcome.py b/guardrails/classes/validation_outcome.py
@@ -1,6 +1,7 @@
 from typing import Generic, Iterator, Optional, Tuple, Union, cast
 
 from pydantic import Field
+from rich.pretty import pretty_repr
 
 from guardrails.classes.history import Call, Iteration
 from guardrails.classes.output_type import OT
@@ -9,7 +10,7 @@
 from guardrails.utils.reask_utils import ReAsk
 
 
-class ValidationOutcome(Generic[OT], ArbitraryModel):
+class ValidationOutcome(ArbitraryModel, Generic[OT]):
     raw_llm_output: Optional[str] = Field(
         description="The raw, unchanged output from the LLM call.", default=None
     )
@@ -83,3 +84,6 @@ def __iter__(
     def __getitem__(self, keys):
         """Get a subset of the ValidationOutcome's fields."""
         return iter(getattr(self, k) for k in keys)
+
+    def __str__(self) -> str:
+        return pretty_repr(self)
diff --git a/guardrails/cli/validate.py b/guardrails/cli/validate.py
@@ -1,13 +1,13 @@
 import json
-from typing import Dict, Union
+from typing import Dict, List, Union
 
 import typer
 
 from guardrails import Guard
 from guardrails.cli.guardrails import guardrails
 
 
-def validate_llm_output(rail: str, llm_output: str) -> Union[str, Dict, None]:
+def validate_llm_output(rail: str, llm_output: str) -> Union[str, Dict, List, None]:
     """Validate guardrails.yml file."""
     guard = Guard.from_rail(rail)
     result = guard.parse(llm_output)

diff --git a/guardrails/guard.py b/guardrails/guard.py
@@ -80,7 +80,9 @@ def __init__(
         self,
         rail: Optional[Rail] = None,
         num_reasks: Optional[int] = None,
-        base_model: Optional[Type[BaseModel]] = None,
+        base_model: Optional[
+            Union[Type[BaseModel], Type[List[Type[BaseModel]]]]
+        ] = None,
         tracer: Optional[Tracer] = None,
     ):
         """Initialize the Guard with optional Rail instance, num_reasks, and
@@ -220,6 +222,10 @@ def from_rail(
             return cast(
                 Guard[str], cls(rail=rail, num_reasks=num_reasks, tracer=tracer)
             )
+        elif rail.output_type == "list":
+            return cast(
+                Guard[List], cls(rail=rail, num_reasks=num_reasks, tracer=tracer)
+            )
         return cast(Guard[Dict], cls(rail=rail, num_reasks=num_reasks, tracer=tracer))
 
     @classmethod
@@ -247,12 +253,16 @@ def from_rail_string(
             return cast(
                 Guard[str], cls(rail=rail, num_reasks=num_reasks, tracer=tracer)
             )
+        elif rail.output_type == "list":
+            return cast(
+                Guard[List], cls(rail=rail, num_reasks=num_reasks, tracer=tracer)
+            )
         return cast(Guard[Dict], cls(rail=rail, num_reasks=num_reasks, tracer=tracer))
 
     @classmethod
     def from_pydantic(
         cls,
-        output_class: Type[BaseModel],
+        output_class: Union[Type[BaseModel], Type[List[Type[BaseModel]]]],
         prompt: Optional[str] = None,
         instructions: Optional[str] = None,
         num_reasks: Optional[int] = None,
@@ -272,6 +282,10 @@ def from_pydantic(
             reask_prompt=reask_prompt,
             reask_instructions=reask_instructions,
         )
+        if rail.output_type == "list":
+            return cast(
+                Guard[List], cls(rail, num_reasks=num_reasks, base_model=output_class)
+            )
         return cast(
             Guard[Dict],
             cls(rail, num_reasks=num_reasks, base_model=output_class, tracer=tracer),