Skip to content

Commit

Permalink
fix eval_steps to be a sane default (axolotl-ai-cloud#797)
Browse files Browse the repository at this point in the history
* fix eval_steps to be a sane default

* update docs for fractional eval_steps
  • Loading branch information
winglian committed Oct 28, 2023
1 parent 349de32 commit 4270cec
Show file tree
Hide file tree
Showing 23 changed files with 36 additions and 36 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -618,14 +618,14 @@ gradient_accumulation_steps: 1
# The number of samples to include in each batch. This is the number of samples sent to each GPU.
micro_batch_size: 2
eval_batch_size:
num_epochs: 3
num_epochs: 4
warmup_steps: 100
learning_rate: 0.00003
lr_quadratic_warmup:
logging_steps:
save_strategy: # Set to `no` to skip checkpoint saves
save_steps: # Leave empty to save at each epoch
eval_steps: # Leave empty to eval at each epoch
eval_steps: # Leave empty to eval at each epoch, integers for every N steps. decimal for fraction of total steps
save_total_limit: # Checkpoints saved at a time
# Maximum number of iterations to train for. It precedes num_epochs which means that
# if both are set, num_epochs will not be guaranteed.
Expand Down
2 changes: 1 addition & 1 deletion examples/cerebras/qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ flash_attention:
gptq_groupsize:
gptq_model_v1:
warmup_steps: 10
eval_steps: 20
eval_steps: 0.05
save_steps:
debug:
deepspeed:
Expand Down
4 changes: 2 additions & 2 deletions examples/code-llama/13b/lora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ wandb_log_model:

gradient_accumulation_steps: 4
micro_batch_size: 2
num_epochs: 3
num_epochs: 4
optimizer: adamw_bnb_8bit
lr_scheduler: cosine
learning_rate: 0.0002
Expand All @@ -54,7 +54,7 @@ xformers_attention:
flash_attention: true

warmup_steps: 10
eval_steps: 20
eval_steps: 0.05
save_steps:
debug:
deepspeed:
Expand Down
4 changes: 2 additions & 2 deletions examples/code-llama/13b/qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ wandb_log_model:

gradient_accumulation_steps: 4
micro_batch_size: 2
num_epochs: 3
num_epochs: 4
optimizer: paged_adamw_32bit
lr_scheduler: cosine
learning_rate: 0.0002
Expand All @@ -56,7 +56,7 @@ xformers_attention:
flash_attention: true

warmup_steps: 10
eval_steps: 20
eval_steps: 0.05
save_steps:
debug:
deepspeed:
Expand Down
4 changes: 2 additions & 2 deletions examples/code-llama/34b/lora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ wandb_log_model:

gradient_accumulation_steps: 4
micro_batch_size: 2
num_epochs: 3
num_epochs: 4
optimizer: adamw_bnb_8bit
lr_scheduler: cosine
learning_rate: 0.0002
Expand All @@ -54,7 +54,7 @@ xformers_attention:
flash_attention: true

warmup_steps: 10
eval_steps: 20
eval_steps: 0.05
save_steps:
debug:
deepspeed:
Expand Down
4 changes: 2 additions & 2 deletions examples/code-llama/34b/qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ wandb_log_model:

gradient_accumulation_steps: 4
micro_batch_size: 2
num_epochs: 3
num_epochs: 4
optimizer: paged_adamw_32bit
lr_scheduler: cosine
learning_rate: 0.0002
Expand All @@ -56,7 +56,7 @@ xformers_attention:
flash_attention: true

warmup_steps: 10
eval_steps: 20
eval_steps: 0.05
save_steps:
debug:
deepspeed:
Expand Down
4 changes: 2 additions & 2 deletions examples/code-llama/7b/lora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ wandb_log_model:

gradient_accumulation_steps: 4
micro_batch_size: 2
num_epochs: 3
num_epochs: 4
optimizer: adamw_bnb_8bit
lr_scheduler: cosine
learning_rate: 0.0002
Expand All @@ -54,7 +54,7 @@ xformers_attention:
flash_attention: true

warmup_steps: 10
eval_steps: 20
eval_steps: 0.05
save_steps:
debug:
deepspeed:
Expand Down
4 changes: 2 additions & 2 deletions examples/code-llama/7b/qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ wandb_log_model:

gradient_accumulation_steps: 4
micro_batch_size: 2
num_epochs: 3
num_epochs: 4
optimizer: paged_adamw_32bit
lr_scheduler: cosine
learning_rate: 0.0002
Expand All @@ -56,7 +56,7 @@ xformers_attention:
flash_attention: true

warmup_steps: 10
eval_steps: 20
eval_steps: 0.05
save_steps:
debug:
deepspeed:
Expand Down
2 changes: 1 addition & 1 deletion examples/falcon/config-7b-qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ output_dir: ./qlora-out
# decrease if OOM, increase for max VRAM utilization
micro_batch_size: 1
gradient_accumulation_steps: 2
num_epochs: 3
num_epochs: 4
# Optimizer for QLoRA
optimizer: paged_adamw_32bit
torchdistx_path:
Expand Down
2 changes: 1 addition & 1 deletion examples/gptj/qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ flash_attention:
gptq_groupsize:
gptq_model_v1:
warmup_steps: 10
eval_steps: 20
eval_steps: 0.05
save_steps:
debug:
deepspeed:
Expand Down
2 changes: 1 addition & 1 deletion examples/jeopardy-bot/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ wandb_log_model:
output_dir: ./jeopardy-bot-7b
gradient_accumulation_steps: 1
micro_batch_size: 1
num_epochs: 3
num_epochs: 4
optimizer: adamw_bnb_8bit
torchdistx_path:
lr_scheduler: cosine
Expand Down
2 changes: 1 addition & 1 deletion examples/llama-2/gptq-lora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ wandb_log_model:
output_dir: ./model-out
gradient_accumulation_steps: 1
micro_batch_size: 1
num_epochs: 3
num_epochs: 4
optimizer: adamw_torch
adam_beta2: 0.95
adam_eps: 0.00001
Expand Down
4 changes: 2 additions & 2 deletions examples/llama-2/lora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ wandb_log_model:

gradient_accumulation_steps: 4
micro_batch_size: 2
num_epochs: 3
num_epochs: 4
optimizer: adamw_bnb_8bit
lr_scheduler: cosine
learning_rate: 0.0002
Expand All @@ -54,7 +54,7 @@ xformers_attention:
flash_attention: true

warmup_steps: 10
eval_steps: 20
eval_steps: 0.05
eval_table_size:
eval_table_max_new_tokens: 128
save_steps:
Expand Down
4 changes: 2 additions & 2 deletions examples/llama-2/qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ wandb_log_model:

gradient_accumulation_steps: 4
micro_batch_size: 2
num_epochs: 3
num_epochs: 4
optimizer: paged_adamw_32bit
lr_scheduler: cosine
learning_rate: 0.0002
Expand All @@ -56,7 +56,7 @@ xformers_attention:
flash_attention: true

warmup_steps: 10
eval_steps: 20
eval_steps: 0.05
eval_table_size:
save_steps:
debug:
Expand Down
4 changes: 2 additions & 2 deletions examples/llama-2/relora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ wandb_log_model:

gradient_accumulation_steps: 4
micro_batch_size: 4
num_epochs: 3
num_epochs: 4
optimizer: adamw_bnb_8bit
lr_scheduler: cosine
learning_rate: 0.0002
Expand All @@ -60,7 +60,7 @@ xformers_attention:
flash_attention: true

warmup_steps: 10
eval_steps: 20
eval_steps: 0.05
save_steps: 50
debug:
deepspeed:
Expand Down
4 changes: 2 additions & 2 deletions examples/llama-2/tiny-llama.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ wandb_log_model:

gradient_accumulation_steps: 4
micro_batch_size: 2
num_epochs: 3
num_epochs: 4
optimizer: adamw_bnb_8bit
lr_scheduler: cosine
learning_rate: 0.0002
Expand All @@ -54,7 +54,7 @@ xformers_attention:
flash_attention: true

warmup_steps: 10
eval_steps: 20
eval_steps: 0.05
eval_table_size:
save_steps:
debug:
Expand Down
4 changes: 2 additions & 2 deletions examples/mistral/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ wandb_log_model:

gradient_accumulation_steps: 4
micro_batch_size: 2
num_epochs: 3
num_epochs: 4
optimizer: adamw_bnb_8bit
lr_scheduler: cosine
learning_rate: 0.000005
Expand All @@ -46,7 +46,7 @@ xformers_attention:
flash_attention: true

warmup_steps: 10
eval_steps: 20
eval_steps: 0.05
eval_table_size:
eval_table_max_new_tokens: 128
save_steps:
Expand Down
2 changes: 1 addition & 1 deletion examples/mistral/qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ xformers_attention:
flash_attention: true

warmup_steps: 10
eval_steps: 20
eval_steps: 0.05
eval_table_size:
eval_table_max_new_tokens: 128
save_steps:
Expand Down
2 changes: 1 addition & 1 deletion examples/mpt-7b/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ wandb_log_model:
output_dir: ./mpt-alpaca-7b
gradient_accumulation_steps: 1
micro_batch_size: 1
num_epochs: 3
num_epochs: 4
optimizer: adamw_bnb_8bit
torchdistx_path:
lr_scheduler: cosine
Expand Down
4 changes: 2 additions & 2 deletions examples/pythia/lora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ wandb_log_model:
output_dir: ./lora-alpaca-pythia
gradient_accumulation_steps: 1
micro_batch_size: 4
num_epochs: 3
num_epochs: 4
learning_rate: 0.00001
train_on_inputs: false
group_by_length: false
Expand All @@ -33,5 +33,5 @@ early_stopping_patience:
resume_from_checkpoint:
local_rank:
weight_decay: 0.1
eval_steps: 20
eval_steps: 0.05
logging_steps: 1
2 changes: 1 addition & 1 deletion examples/redpajama/config-3b.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ wandb_log_model:
output_dir: ./redpajama-alpaca-3b
batch_size: 4
micro_batch_size: 1
num_epochs: 3
num_epochs: 4
optimizer: adamw_bnb_8bit
torchdistx_path:
lr_scheduler: cosine
Expand Down
2 changes: 1 addition & 1 deletion examples/replit-3b/config-lora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ wandb_log_model:
output_dir: ./lora-replit
batch_size: 8
micro_batch_size: 1
num_epochs: 3
num_epochs: 4
optimizer:
torchdistx_path:
lr_scheduler:
Expand Down
2 changes: 1 addition & 1 deletion examples/xgen-7b/xgen-7b-8k-qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ output_dir: ./qlora-out
# decrease if OOM, increase for max VRAM utilization
micro_batch_size: 1
gradient_accumulation_steps: 1
num_epochs: 3
num_epochs: 4
# Optimizer for QLoRA
optimizer: paged_adamw_32bit
torchdistx_path:
Expand Down

0 comments on commit 4270cec

Please sign in to comment.