diff --git a/gitbook/README.md b/gitbook/README.md index 5c4b4d58a..642bde22a 100644 --- a/gitbook/README.md +++ b/gitbook/README.md @@ -1,2 +1 @@ # Page - diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index d907a194b..2fbbea6c1 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -406,6 +406,20 @@ def load_model( ) model.config.max_position_embeddings = cfg.sequence_len + if ( + hasattr(model.config, "bos_token_id") + and model.config.bos_token_id + and model.config.bos_token_id != tokenizer.bos_token_id + ): + model.config.bos_token_id = tokenizer.bos_token_id + + if ( + hasattr(model.config, "eos_token_id") + and model.config.eos_token_id + and model.config.eos_token_id != tokenizer.eos_token_id + ): + model.config.eos_token_id = tokenizer.eos_token_id + if model.device.type == "cuda": log_gpu_memory_usage(LOG, "after model load", model.device)