Skip to content

Commit

Permalink
Remove unnecessary vocabulary copies in RegexGuide
Browse files Browse the repository at this point in the history
  • Loading branch information
rlouf committed Apr 20, 2024
1 parent d548d92 commit 4d6ec1f
Showing 1 changed file with 4 additions and 12 deletions.
16 changes: 4 additions & 12 deletions outlines/fsm/guide.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,7 @@ class RegexGuide(Guide):

def __init__(self, regex_string: str, tokenizer):
@cache()
def create_states_mapping(
regex_string: str, cacheable_vocabulary: Tuple[Tuple[str, int], ...]
) -> Tuple[dict, set, set]:
def create_states_mapping(regex_string: str) -> Tuple[dict, set, set]:
"""Create the variables related to the mapping between states and tokens
The parameters of the function are used for caching purpose
"""
Expand Down Expand Up @@ -143,10 +141,7 @@ def create_states_mapping(
self.states_to_token_maps,
self.empty_token_ids,
fsm_finals,
) = create_states_mapping(
regex_string, tuple(sorted(tokenizer.vocabulary.items()))
)
self.vocabulary = list(tokenizer.vocabulary.values())
) = create_states_mapping(regex_string)
self.eos_token_id = tokenizer.eos_token_id
self.final_states = fsm_finals | {-1}

Expand Down Expand Up @@ -218,7 +213,7 @@ def from_interegular_fsm(
from_interegular_instance = cls.__new__(cls)

def create_states_mapping_from_interegular_fsm(
fsm: interegular.fsm.FSM, cacheable_vocabulary: Tuple[Tuple[str, int], ...]
fsm: interegular.fsm.FSM,
) -> Tuple[dict, set]:
"""Create the variables related to the mapping between states and tokens
The parameters of the function are used for caching purpose
Expand All @@ -245,10 +240,7 @@ def create_states_mapping_from_interegular_fsm(
(
from_interegular_instance.states_to_token_maps,
from_interegular_instance.empty_token_ids,
) = create_states_mapping_from_interegular_fsm(
interegular_fsm, tuple(sorted(tokenizer.vocabulary.items()))
)
from_interegular_instance.vocabulary = list(tokenizer.vocabulary.values())
) = create_states_mapping_from_interegular_fsm(interegular_fsm)
from_interegular_instance.eos_token_id = tokenizer.eos_token_id
return from_interegular_instance

Expand Down

0 comments on commit 4d6ec1f

Please sign in to comment.