Skip to content

Commit

Permalink
Add LlamaSequenceGenerator
Browse files Browse the repository at this point in the history
We currently store the logits processor in the `LlamaCpp` instance. This
causes issues when doing successive generations with different
generators. In this PR we create a new `LlamaSequenceGenerator` instance
every time we create a new generator, and store the logits processor in
this instance which solves the issue.

Fixes #700.
  • Loading branch information
rlouf committed Feb 22, 2024
1 parent 10871cf commit d23807b
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 48 deletions.
10 changes: 7 additions & 3 deletions outlines/generate/cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
from outlines.fsm.fsm import CFGFSM
from outlines.generate.api import SequenceGenerator
from outlines.models import OpenAI
from outlines.models.llamacpp import CFGLogitsProcessor, LlamaCpp
from outlines.models.llamacpp import (
CFGLogitsProcessor,
LlamaCpp,
LlamaSequenceGenerator,
)
from outlines.samplers import Sampler, multinomial


Expand Down Expand Up @@ -45,9 +49,9 @@ def cfg_llamacpp(
)

logits_processor = CFGLogitsProcessor(cfg_str, model.tokenizer)
model.logits_processor = logits_processor
generator = LlamaSequenceGenerator(logits_processor, model)

return model
return generator


@cfg.register(OpenAI)
Expand Down
10 changes: 7 additions & 3 deletions outlines/generate/regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
from outlines.fsm.fsm import RegexFSM
from outlines.generate.api import SequenceGenerator
from outlines.models import OpenAI
from outlines.models.llamacpp import LlamaCpp, RegexLogitsProcessor
from outlines.models.llamacpp import (
LlamaCpp,
LlamaSequenceGenerator,
RegexLogitsProcessor,
)
from outlines.samplers import Sampler, multinomial


Expand Down Expand Up @@ -49,9 +53,9 @@ def regex_llamacpp(
)

logits_processor = RegexLogitsProcessor(regex_str, model.tokenizer)
model.logits_processor = logits_processor
generator = LlamaSequenceGenerator(logits_processor, model)

return model
return generator


@regex.register(OpenAI)
Expand Down
5 changes: 4 additions & 1 deletion outlines/generate/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from outlines.fsm.fsm import StopAtEosFSM
from outlines.generate import SequenceGenerator
from outlines.models import LlamaCpp, OpenAI
from outlines.models.llamacpp import LlamaSequenceGenerator
from outlines.samplers import Sampler, multinomial


Expand Down Expand Up @@ -44,7 +45,9 @@ def text_llamacpp(model: LlamaCpp, sampler: Sampler = multinomial()):
+ "than the multinomial sampler."
)

return model
generator = LlamaSequenceGenerator(None, model)

return generator


@text.register(OpenAI)
Expand Down
21 changes: 13 additions & 8 deletions outlines/models/llamacpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,12 @@
from outlines.fsm.fsm import CFGFSM, FSM, FSMState, RegexFSM


class LlamaCpp:
"""Represents a `llama_cpp` model."""

class LlamaSequenceGenerator:
def __init__(
self, model_path, logits_processor: Optional["LogitsProcessor"] = None, **kwargs
self, logits_processor: Optional["LogitsProcessor"], model: "LlamaCpp"
):
from llama_cpp import Llama

self.model = model.model
self.logits_processor = logits_processor
self.model = Llama(model_path, **kwargs)
self.tokenizer = LlamaCppTokenizer(self)

def __call__(
self,
Expand Down Expand Up @@ -89,6 +84,16 @@ def stream(
)


class LlamaCpp:
"""Represents a `llama_cpp` model."""

def __init__(self, model_path, **kwargs):
from llama_cpp import Llama

self.model = Llama(model_path, **kwargs)
self.tokenizer = LlamaCppTokenizer(self)


class LlamaCppTokenizer:
def __init__(self, model, **kwargs):
self.eos_token_id = model.model.token_eos()
Expand Down
33 changes: 0 additions & 33 deletions tests/models/test_llama_cpp.py

This file was deleted.

0 comments on commit d23807b

Please sign in to comment.