Skip to content

Commit

Permalink
update descriptions
Browse files Browse the repository at this point in the history
  • Loading branch information
winglian committed Jan 23, 2024
1 parent 8f04f14 commit efe1fb2
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions src/axolotl/utils/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,23 +143,23 @@ def process_datasets_for_packing(cfg, train_dataset, eval_dataset, tokenizer):
add_length,
num_proc=cfg.dataset_processes,
load_from_cache_file=not cfg.is_preprocess,
desc="Packing (Group By Length)",
desc="Group By Length",
)

if cfg.sample_packing:
train_dataset = train_dataset.map(
add_position_ids,
num_proc=cfg.dataset_processes,
load_from_cache_file=not cfg.is_preprocess,
desc="Packing (Sample Packing)",
desc="Add position_id column (Sample Packing)",
)
if cfg.eval_sample_packing is not False:
if eval_dataset:
eval_dataset = eval_dataset.map(
add_position_ids,
num_proc=cfg.dataset_processes,
load_from_cache_file=not cfg.is_preprocess,
desc="Packing (Sample Packing)",
desc="Add position_id column (Sample Packing)",
)

return train_dataset, eval_dataset
Expand All @@ -174,7 +174,7 @@ def process_pretraining_datasets_for_packing(train_dataset, sequence_len):
)
train_dataset = train_dataset.map(
add_position_ids,
desc="Packing Pretraining Dataset",
desc="Add position_id column (Pretraining Sample Packing)",
)
return train_dataset

Expand Down

0 comments on commit efe1fb2

Please sign in to comment.