Skip to content

Commit

Permalink
add prefix space ignored in llama #29625 (#30964)
Browse files Browse the repository at this point in the history
* add prefix space ignored in llama #29625

* adding test with add_prefix_space=False

* ruff

---------

Co-authored-by: Ita Zaporozhets <itazaporozhets@Itas-MBP.localdomain>
  • Loading branch information
2 people authored and Ita Zaporozhets committed May 24, 2024
1 parent f3dd76b commit 60c790b
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/transformers/models/llama/tokenization_llama_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ def __init__(
add_bos_token=add_bos_token,
add_eos_token=add_eos_token,
use_default_system_prompt=use_default_system_prompt,
add_prefix_space=add_prefix_space,
legacy=legacy,
**kwargs,
)
Expand Down
4 changes: 4 additions & 0 deletions tests/models/llama/test_tokenization_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,10 @@ def test_special_token_special_word(self):
self.assertEqual(decoded_tokens, "hello")

def test_no_prefix_space(self):
tokenizer_no_prefix_space = LlamaTokenizerFast.from_pretrained("huggyllama/llama-7b", add_prefix_space=False)
no_prefix_space_tokens = tokenizer_no_prefix_space.tokenize("Hey")
self.assertEqual(no_prefix_space_tokens, ["H", "ey"])

tokenizer = LlamaTokenizerFast.from_pretrained(
"huggyllama/llama-7b", legacy=False, from_slow=True, add_prefix_space=False
)
Expand Down

0 comments on commit 60c790b

Please sign in to comment.