Skip to content

Commit

Permalink
new evals_per_epoch and saves_per_epoch to make things cleaner (#944)
Browse files Browse the repository at this point in the history
* new evals_per_epoch and saves_per_epoch to make things cleaner

* update per PR feedback
  • Loading branch information
winglian committed Dec 12, 2023
1 parent f1de29d commit 5f79b82
Show file tree
Hide file tree
Showing 37 changed files with 102 additions and 70 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -691,9 +691,11 @@ warmup_ratio: 0.05 # cannot use with warmup_steps
learning_rate: 0.00003
lr_quadratic_warmup:
logging_steps:
eval_steps: # Leave empty to eval at each epoch, integers for every N steps. decimal for fraction of total steps
evals_per_epoch: # number of times per epoch to run evals, mutually exclusive with eval_steps
save_strategy: # Set to `no` to skip checkpoint saves
save_steps: # Leave empty to save at each epoch
eval_steps: # Leave empty to eval at each epoch, integers for every N steps. decimal for fraction of total steps
saves_per_epoch: # number of times per epoch to save a checkpoint, mutually exclusive with save_steps
save_total_limit: # Checkpoints saved at a time
# Maximum number of iterations to train for. It precedes num_epochs which means that
# if both are set, num_epochs will not be guaranteed.
Expand Down
4 changes: 2 additions & 2 deletions examples/cerebras/btlm-ft.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ gptq_groupsize:
gptq_model_v1:

warmup_steps: 32
eval_steps:
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
save_total_limit:

debug:
Expand Down
4 changes: 2 additions & 2 deletions examples/cerebras/qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ flash_attention:
gptq_groupsize:
gptq_model_v1:
warmup_steps: 10
eval_steps: 0.05
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.1
Expand Down
4 changes: 2 additions & 2 deletions examples/code-llama/13b/lora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ xformers_attention:
flash_attention: true

warmup_steps: 10
eval_steps: 0.05
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
4 changes: 2 additions & 2 deletions examples/code-llama/13b/qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ xformers_attention:
flash_attention: true

warmup_steps: 10
eval_steps: 0.05
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
4 changes: 2 additions & 2 deletions examples/code-llama/34b/lora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ xformers_attention:
flash_attention: true

warmup_steps: 10
eval_steps: 0.05
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
4 changes: 2 additions & 2 deletions examples/code-llama/34b/qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ xformers_attention:
flash_attention: true

warmup_steps: 10
eval_steps: 0.05
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
4 changes: 2 additions & 2 deletions examples/code-llama/7b/lora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ xformers_attention:
flash_attention: true

warmup_steps: 10
eval_steps: 0.05
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
4 changes: 2 additions & 2 deletions examples/code-llama/7b/qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ xformers_attention:
flash_attention: true

warmup_steps: 10
eval_steps: 0.05
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
4 changes: 2 additions & 2 deletions examples/falcon/config-7b-lora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ flash_attention:
gptq_groupsize:
gptq_model_v1:
warmup_steps: 40
eval_steps: 5
save_steps: 43
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
4 changes: 2 additions & 2 deletions examples/falcon/config-7b-qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ flash_attention:
gptq_groupsize:
gptq_model_v1:
warmup_steps: 10
eval_steps: 5
save_steps: 10
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.000001
Expand Down
4 changes: 2 additions & 2 deletions examples/falcon/config-7b.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ flash_attention:
gptq_groupsize:
gptq_model_v1:
warmup_steps: 40
eval_steps: 5
save_steps: 43
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
4 changes: 2 additions & 2 deletions examples/gptj/qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ flash_attention:
gptq_groupsize:
gptq_model_v1:
warmup_steps: 10
eval_steps: 0.05
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.1
Expand Down
4 changes: 2 additions & 2 deletions examples/jeopardy-bot/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ flash_attention:
gptq_groupsize:
gptq_model_v1:
warmup_steps: 20
eval_steps: 110
save_steps: 660
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.1
Expand Down
4 changes: 2 additions & 2 deletions examples/llama-2/fft_optimized.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@ flash_attn_fuse_qkv: false
flash_attn_fuse_mlp: true

warmup_steps: 100
eval_steps: 0.05
evals_per_epoch: 4
eval_table_size:
save_steps:
saves_per_epoch: 1
debug:
deepspeed: #deepspeed/zero2.json # multi-gpu only
weight_decay: 0.1
Expand Down
4 changes: 2 additions & 2 deletions examples/llama-2/gptq-lora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ flash_attention:
sdp_attention:
flash_optimum:
warmup_steps: 100
eval_steps:
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.1
Expand Down
4 changes: 2 additions & 2 deletions examples/llama-2/lora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@ xformers_attention:
flash_attention: true

warmup_steps: 10
eval_steps: 0.05
evals_per_epoch: 4
eval_table_size:
eval_table_max_new_tokens: 128
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
4 changes: 2 additions & 2 deletions examples/llama-2/qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ xformers_attention:
flash_attention: true

warmup_steps: 10
eval_steps: 0.05
evals_per_epoch: 4
eval_table_size:
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
4 changes: 2 additions & 2 deletions examples/llama-2/relora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ xformers_attention:
flash_attention: true

warmup_steps: 10
eval_steps: 0.05
save_steps: 50
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
4 changes: 2 additions & 2 deletions examples/llama-2/tiny-llama.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ xformers_attention:
flash_attention: true

warmup_steps: 10
eval_steps: 0.05
evals_per_epoch: 4
eval_table_size:
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
4 changes: 2 additions & 2 deletions examples/mamba/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,10 @@ xformers_attention:
flash_attention:

warmup_steps: 10
eval_steps:
evals_per_epoch: 4
eval_table_size:
eval_table_max_new_tokens: 128
save_steps: 0.25
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
4 changes: 2 additions & 2 deletions examples/mistral/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,10 @@ xformers_attention:
flash_attention: true

warmup_steps: 10
eval_steps: 0.05
evals_per_epoch: 4
eval_table_size:
eval_table_max_new_tokens: 128
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
4 changes: 2 additions & 2 deletions examples/mistral/mixtral.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,10 @@ loss_watchdog_threshold: 5.0
loss_watchdog_patience: 3

warmup_steps: 10
eval_steps:
evals_per_epoch: 4
eval_table_size:
eval_table_max_new_tokens: 128
save_steps:
saves_per_epoch: 1
debug:
deepspeed: deepspeed/zero2.json
weight_decay: 0.0
Expand Down
4 changes: 2 additions & 2 deletions examples/mistral/qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,10 @@ loss_watchdog_threshold: 5.0
loss_watchdog_patience: 3

warmup_steps: 10
eval_steps: 0.05
evals_per_epoch: 4
eval_table_size:
eval_table_max_new_tokens: 128
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
4 changes: 2 additions & 2 deletions examples/mpt-7b/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ flash_attention:
gptq_groupsize:
gptq_model_v1:
warmup_steps: 20
eval_steps: 110
save_steps: 660
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0001
Expand Down
4 changes: 2 additions & 2 deletions examples/openllama-3b/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ flash_attention: true
gptq_groupsize:
gptq_model_v1:
warmup_steps: 20
eval_steps: 0.05
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.1
Expand Down
4 changes: 2 additions & 2 deletions examples/openllama-3b/lora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ flash_attention: true
gptq_groupsize:
gptq_model_v1:
warmup_steps: 20
eval_steps: 0.05
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.1
Expand Down
4 changes: 2 additions & 2 deletions examples/openllama-3b/qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ flash_attention: true
gptq_groupsize:
gptq_model_v1:
warmup_steps: 20
eval_steps: 0.05
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.1
Expand Down
4 changes: 2 additions & 2 deletions examples/phi/phi-ft.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ xformers_attention:
flash_attention:

warmup_steps: 100
eval_steps: 0.05
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.1
Expand Down
4 changes: 2 additions & 2 deletions examples/phi/phi-qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ xformers_attention:
flash_attention:

warmup_steps: 100
eval_steps: 0.05
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.1
Expand Down
2 changes: 1 addition & 1 deletion examples/pythia/lora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,5 @@ early_stopping_patience:
resume_from_checkpoint:
local_rank:
weight_decay: 0.1
eval_steps: 0.05
evals_per_epoch: 4
logging_steps: 1
4 changes: 2 additions & 2 deletions examples/qwen/lora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,10 @@ xformers_attention:
flash_attention:

warmup_steps: 10
eval_steps: 0.05
evals_per_epoch: 4
eval_table_size:
eval_table_max_new_tokens: 128
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
4 changes: 2 additions & 2 deletions examples/qwen/qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,10 @@ xformers_attention:
flash_attention:

warmup_steps: 10
eval_steps: 0.05
evals_per_epoch: 4
eval_table_size:
eval_table_max_new_tokens: 128
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
4 changes: 2 additions & 2 deletions examples/redpajama/config-3b.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ flash_attention:
gptq_groupsize:
gptq_model_v1:
warmup_steps: 20
eval_steps: 110
save_steps: 660
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0001
Expand Down
4 changes: 2 additions & 2 deletions examples/replit-3b/config-lora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ flash_attention:
gptq_groupsize:
gptq_model_v1:
warmup_steps: 20
eval_steps: 50
save_steps:
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0
Expand Down
Loading

0 comments on commit 5f79b82

Please sign in to comment.