Skip to content

Commit

Permalink
log warning sooner if not pre-processed before training
Browse files Browse the repository at this point in the history
  • Loading branch information
winglian committed Jan 20, 2024
1 parent 334f02c commit 90d73fc
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
4 changes: 4 additions & 0 deletions src/axolotl/utils/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,10 @@ def load_tokenized_prepared_datasets(
else:
LOG.info(f"Unable to find prepared dataset in {prepared_ds_path}")
LOG.info("Loading raw datasets...")
if not cfg.is_preprocess:
LOG.warning(
"Processing datasets during training can lead to VRAM instability. Please pre-process your dataset"
)

if cfg.seed:
seed = cfg.seed
Expand Down
4 changes: 0 additions & 4 deletions src/axolotl/utils/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,6 @@ def drop_long_seq(sample, sequence_len=2048):


def process_datasets_for_packing(cfg, train_dataset, eval_dataset, tokenizer):
if cfg.is_preprocess:
LOG.warning(
"Processing datasets during training can lead to VRAM instability. Please pre-process your dataset"
)
drop_long = partial(drop_long_seq, sequence_len=cfg.sequence_len)
with zero_first(is_main_process()):
if cfg.group_by_length:
Expand Down

0 comments on commit 90d73fc

Please sign in to comment.