outlines-dev · leloykun · Jul 5, 2024 · Jul 5, 2024 · Jul 5, 2024 · Jul 5, 2024
diff --git a/outlines/generate/api.py b/outlines/generate/api.py
@@ -1,4 +1,5 @@
 import datetime
+import warnings
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Iterator, List, Optional, Union
 
@@ -20,13 +21,15 @@ def __init__(
         model,
         sampler,
         device,
+        apply_chat_template: bool = True,
     ):
         self.fsm = fsm
         self.model = model
         self.sampler = sampler
         self.tokenizer = model.tokenizer
         self.device = device
         self.num_samples = sampler.samples
+        self.apply_chat_template = apply_chat_template
 
     def get_generated_token_ids(
         self,
@@ -132,6 +135,7 @@ def __call__(
         max_tokens: Optional[int] = None,
         stop_at: Optional[Union[str, List[str]]] = None,
         rng: Optional["torch.Generator"] = None,
+        apply_chat_template: Optional[bool] = None,
     ) -> Union[FormattedOutput, List[FormattedOutput], List[List[FormattedOutput]]]:
         """Generate the full text sequence.
 
@@ -153,16 +157,25 @@ def __call__(
         rng
             The random number generator. Defaults to a non-seeded `torch.Generator`
             instance.
+        apply_chat_template
+            Whether to apply the chat template to the prompts. Defaults to the value
+            set at init. Only applies to `TransformerTokenizer` for now.
 
         Returns
         -------
         The generation(s), potentially cast to another type.
         """
+        if apply_chat_template is None:
+            apply_chat_template = self.apply_chat_template
+
         import torch
 
         if isinstance(prompts, str):
             prompts = [prompts]
 
+        if apply_chat_template:
+            apply_chat_template_util(self.model, prompts)
+
         if isinstance(stop_at, str):
             stop_at = [stop_at]
 
@@ -250,6 +263,7 @@ def stream(
         max_tokens: Optional[int] = None,
         stop_at: Optional[Union[str, List[str]]] = None,
         rng: Optional["torch.Generator"] = None,
+        apply_chat_template: Optional[bool] = None,
     ) -> Iterator[Union[List[str], str, List[List[str]]]]:
         """Generate the text sequence one token at a time.
 
@@ -270,17 +284,26 @@ def stream(
         rng
             The random number generator. Defaults to a non-seeded `torch.Generator`
             instance.
+        apply_chat_template
+            Whether to apply the chat template to the prompts. Defaults to the value
+            set at init. Only applies to `TransformerTokenizer` for now.
 
         Returns
         -------
         A string or list of strings that contain the generated text.
 
         """
+        if apply_chat_template is None:
+            apply_chat_template = self.apply_chat_template
+
         import torch
 
         if isinstance(prompts, str):
             prompts = [prompts]
 
+        if apply_chat_template:
+            apply_chat_template_util(self.model, prompts)
+
         if isinstance(stop_at, str):
             stop_at = [stop_at]
 
@@ -423,7 +446,9 @@ class SequenceGeneratorAdapter:
 
     """
 
-    def __init__(self, model, logits_processor, sampler):
+    def __init__(
+        self, model, logits_processor, sampler, apply_chat_template: bool = True
+    ):
         self.model = model
         self.logits_processor = logits_processor
 
@@ -444,6 +469,8 @@ def __init__(self, model, logits_processor, sampler):
                 "beam_search", sampler.samples, None, None, 1.0
             )
 
+        self.apply_chat_template = apply_chat_template
+
     def prepare_generation_parameters(
         self,
         max_tokens: Optional[int],
@@ -485,9 +512,15 @@ def __call__(
         max_tokens: Optional[int] = None,
         stop_at: Optional[Union[str, List[str]]] = None,
         seed: Optional[int] = None,
+        apply_chat_template: Optional[bool] = None,
         **model_specific_params,
     ):
         """Generate text from a prompt of list of prompts."""
+        if apply_chat_template is None:
+            apply_chat_template = self.apply_chat_template
+
+        if apply_chat_template:
+            apply_chat_template_util(self.model, prompts)
 
         def format(sequences):
             """Apply formatting to every string in a completion."""
@@ -516,9 +549,14 @@ def stream(
         max_tokens: Optional[int] = None,
         stop_at: Optional[Union[str, List[str]]] = None,
         seed: Optional[int] = None,
+        apply_chat_template: Optional[bool] = None,
         **model_specific_params,
     ):
         """Return a text generator from a prompt or a list of prompts."""
+        if apply_chat_template is None:
+            apply_chat_template = self.apply_chat_template
+        if apply_chat_template:
+            apply_chat_template_util(self.model, prompts)
         generation_params = self.prepare_generation_parameters(
             max_tokens, stop_at, seed
         )
@@ -529,3 +567,22 @@ def stream(
             self.sampling_params,
             **model_specific_params,
         )
+
+
+def apply_chat_template_util(model, prompts: Union[str, List[str]]) -> List[str]:
+    from outlines.models.transformers import TransformerTokenizer
+
+    if isinstance(prompts, str):
+        prompts = [prompts]
+    if not isinstance(model.tokenizer, TransformerTokenizer):
+        warnings.warn(
+            "Chat template is only supported for `Transformer` models for now. The raw prompts will be used instead."
+        )
+        return prompts
+    tokenizer: "TransformerTokenizer" = model.tokenizer
+    if getattr(tokenizer.tokenizer, "chat_template", None) is None:
+        warnings.warn(
+            "The model does not have chat template support. The raw prompts will be used instead. To turn this warning off, either explicitly set the `apply_chat_template` argument to 'False' or assign a value to `model.tokenizer.tokenizer.chat_template`."
+        )
+        return prompts
+    return [tokenizer.apply_chat_template(prompt) for prompt in prompts]
diff --git a/outlines/generate/cfg.py b/outlines/generate/cfg.py
@@ -10,7 +10,12 @@
 
 
 @singledispatch
-def cfg(model, cfg_str: str, sampler: Sampler = multinomial()) -> SequenceGenerator:
+def cfg(
+    model,
+    cfg_str: str,
+    sampler: Sampler = multinomial(),
+    apply_chat_template: bool = True,
+) -> SequenceGenerator:
     """Generate text in the language of a Context-Free Grammar
 
     Arguments
@@ -29,7 +34,7 @@ def cfg(model, cfg_str: str, sampler: Sampler = multinomial()) -> SequenceGenera
     """
     fsm = CFGGuide(cfg_str, model.tokenizer)
     device = model.device
-    generator = SequenceGenerator(fsm, model, sampler, device)
+    generator = SequenceGenerator(fsm, model, sampler, device, apply_chat_template)
 
     return generator
 
@@ -40,6 +45,7 @@ def cfg_unimplemented(
     model,
     cfg_str: str,
     sampler: Sampler = multinomial(),
+    apply_chat_template: bool = True,
 ):
     raise NotImplementedError(
         f"The CFG Logits processor is not available for {type(model)}."
@@ -55,7 +61,9 @@ def cfg_llamacpp(
     from outlines.integrations.llamacpp import CFGLogitsProcessor
 
     logits_processor = CFGLogitsProcessor(cfg_str, model.model)
-    return SequenceGeneratorAdapter(model, logits_processor, sampler)
+    return SequenceGeneratorAdapter(
+        model, logits_processor, sampler, apply_chat_template=False
+    )
 
 
 @cfg.register(OpenAI)

diff --git a/outlines/generate/fsm.py b/outlines/generate/fsm.py
@@ -6,9 +6,12 @@
 
 
 def fsm(
-    model, fsm: interegular.fsm.FSM, sampler: Sampler = multinomial()
+    model,
+    fsm: interegular.fsm.FSM,
+    sampler: Sampler = multinomial(),
+    apply_chat_template: bool = True,
 ) -> SequenceGenerator:
     fsm = RegexGuide.from_interegular_fsm(fsm, model.tokenizer)
     device = model.device
-    generator = SequenceGenerator(fsm, model, sampler, device)
+    generator = SequenceGenerator(fsm, model, sampler, device, apply_chat_template)
     return generator
diff --git a/outlines/generate/regex.py b/outlines/generate/regex.py
@@ -10,7 +10,12 @@
 
 
 @singledispatch
-def regex(model, regex_str: str, sampler: Sampler = multinomial()):
+def regex(
+    model,
+    regex_str: str,
+    sampler: Sampler = multinomial(),
+    apply_chat_template: bool = True,
+):
     """Generate structured text in the language of a regular expression.
 
     Parameters
@@ -33,7 +38,7 @@ def regex(model, regex_str: str, sampler: Sampler = multinomial()):
     fsm = RegexGuide(regex_str, model.tokenizer)
 
     device = model.device
-    generator = SequenceGenerator(fsm, model, sampler, device)
+    generator = SequenceGenerator(fsm, model, sampler, device, apply_chat_template)
 
     return generator
 
@@ -43,11 +48,14 @@ def regex_mlxlm(
     model: MLXLM,
     regex_str: str,
     sampler: Sampler = multinomial(),
+    apply_chat_template: bool = True,
 ):
     from outlines.processors import RegexLogitsProcessor
 
     logits_processor = RegexLogitsProcessor(regex_str, tokenizer=model.tokenizer)
-    return SequenceGeneratorAdapter(model, logits_processor, sampler)
+    return SequenceGeneratorAdapter(
+        model, logits_processor, sampler, apply_chat_template
+    )
 
 
 @regex.register(LlamaCpp)
@@ -59,19 +67,24 @@ def regex_llamacpp(
     from outlines.integrations.llamacpp import RegexLogitsProcessor
 
     logits_processor = RegexLogitsProcessor(regex_str, llm=model.model)
-    return SequenceGeneratorAdapter(model, logits_processor, sampler)
+    return SequenceGeneratorAdapter(
+        model, logits_processor, sampler, apply_chat_template=False
+    )
 
 
 @regex.register(VLLM)
 def regex_vllm(
     model: VLLM,
     regex_str: str,
     sampler: Sampler = multinomial(),
+    apply_chat_template: bool = True,
 ):
     from outlines.integrations.vllm import RegexLogitsProcessor
 
     logits_processor = RegexLogitsProcessor(regex_str, model.model)
-    return SequenceGeneratorAdapter(model, logits_processor, sampler)
+    return SequenceGeneratorAdapter(
+        model, logits_processor, sampler, apply_chat_template
+    )
 
 
 @regex.register(OpenAI)

diff --git a/outlines/generate/text.py b/outlines/generate/text.py
@@ -7,7 +7,9 @@
 
 
 @singledispatch
-def text(model, sampler: Sampler = multinomial()) -> SequenceGenerator:
+def text(
+    model, sampler: Sampler = multinomial(), apply_chat_template: bool = True
+) -> SequenceGenerator:
     """Generate text with a `Transformer` model.
 
     Note
@@ -31,24 +33,28 @@ def text(model, sampler: Sampler = multinomial()) -> SequenceGenerator:
     """
     fsm = StopAtEOSGuide(model.tokenizer)
     device = model.device
-    generator = SequenceGenerator(fsm, model, sampler, device)
+    generator = SequenceGenerator(fsm, model, sampler, device, apply_chat_template)
 
     return generator
 
 
 @text.register(MLXLM)
-def text_mlxlm(model: MLXLM, sampler: Sampler = multinomial()):
-    return SequenceGeneratorAdapter(model, None, sampler)
+def text_mlxlm(
+    model: MLXLM, sampler: Sampler = multinomial(), apply_chat_template: bool = True
+):
+    return SequenceGeneratorAdapter(model, None, sampler, apply_chat_template)
 
 
 @text.register(VLLM)
-def text_vllm(model: VLLM, sampler: Sampler = multinomial()):
-    return SequenceGeneratorAdapter(model, None, sampler)
+def text_vllm(
+    model: VLLM, sampler: Sampler = multinomial(), apply_chat_template: bool = True
+):
+    return SequenceGeneratorAdapter(model, None, sampler, apply_chat_template)
 
 
 @text.register(LlamaCpp)
 def text_llamacpp(model: LlamaCpp, sampler: Sampler = multinomial()):
-    return SequenceGeneratorAdapter(model, None, sampler)
+    return SequenceGeneratorAdapter(model, None, sampler, apply_chat_template=False)
 
 
 @text.register(OpenAI)