Skip to content

Commit

Permalink
fix(tokenizer): update log order after update (axolotl-ai-cloud#806)
Browse files Browse the repository at this point in the history
  • Loading branch information
NanoCode012 committed Oct 31, 2023
1 parent 949fe9f commit d13f04e
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions src/axolotl/utils/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,6 @@ def load_tokenizer(cfg):
# set a pad_token, but use eos_token so we don't add a new token
tokenizer.pad_token = LLAMA_DEFAULT_EOS_TOKEN

LOG.debug(f"EOS: {tokenizer.eos_token_id} / {tokenizer.eos_token}")
LOG.debug(f"BOS: {tokenizer.bos_token_id} / {tokenizer.bos_token}")
LOG.debug(f"PAD: {tokenizer.pad_token_id} / {tokenizer.pad_token}")
LOG.debug(f"UNK: {tokenizer.unk_token_id} / {tokenizer.unk_token}")

if tokenizer.__class__.__name__ == "GPTNeoXTokenizerFast":
tokenizer.add_special_tokens({"pad_token": "[PAD]"})
os.environ["TOKENIZERS_PARALLELISM"] = "false"
Expand All @@ -98,6 +93,11 @@ def load_tokenizer(cfg):
]
)

LOG.debug(f"EOS: {tokenizer.eos_token_id} / {tokenizer.eos_token}")
LOG.debug(f"BOS: {tokenizer.bos_token_id} / {tokenizer.bos_token}")
LOG.debug(f"PAD: {tokenizer.pad_token_id} / {tokenizer.pad_token}")
LOG.debug(f"UNK: {tokenizer.unk_token_id} / {tokenizer.unk_token}")

return tokenizer


Expand Down

0 comments on commit d13f04e

Please sign in to comment.