Skip to content

Commit

Permalink
update per PR feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
winglian committed Dec 12, 2023
1 parent 0c053ca commit 5467f13
Show file tree
Hide file tree
Showing 27 changed files with 46 additions and 29 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -691,10 +691,11 @@ warmup_ratio: 0.05 # cannot use with warmup_steps
learning_rate: 0.00003
lr_quadratic_warmup:
logging_steps:
eval_steps: # Leave empty to eval at each epoch, integers for every N steps. decimal for fraction of total steps
evals_per_epoch: # number of times per epoch to run evals, mutually exclusive with eval_steps
save_strategy: # Set to `no` to skip checkpoint saves
save_steps: # Leave empty to save at each epoch
saves_per_epoch: # number of times per epoch to save a checkpoint
eval_steps: # Leave empty to eval at each epoch, integers for every N steps. decimal for fraction of total steps
saves_per_epoch: # number of times per epoch to save a checkpoint, mutually exclusive with save_steps
save_total_limit: # Checkpoints saved at a time
# Maximum number of iterations to train for. It precedes num_epochs which means that
# if both are set, num_epochs will not be guaranteed.
Expand Down
2 changes: 1 addition & 1 deletion examples/cerebras/btlm-ft.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ gptq_model_v1:

warmup_steps: 32
evals_per_epoch: 4
save_steps:
saves_per_epoch: 1
save_total_limit:

debug:
Expand Down
2 changes: 1 addition & 1 deletion examples/cerebras/qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ gptq_groupsize:
gptq_model_v1:
warmup_steps: 10
evals_per_epoch: 4
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.1
Expand Down
2 changes: 1 addition & 1 deletion examples/code-llama/13b/lora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ flash_attention: true

warmup_steps: 10
evals_per_epoch: 4
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
2 changes: 1 addition & 1 deletion examples/code-llama/13b/qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ flash_attention: true

warmup_steps: 10
evals_per_epoch: 4
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
2 changes: 1 addition & 1 deletion examples/code-llama/34b/lora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ flash_attention: true

warmup_steps: 10
evals_per_epoch: 4
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
2 changes: 1 addition & 1 deletion examples/code-llama/34b/qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ flash_attention: true

warmup_steps: 10
evals_per_epoch: 4
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
2 changes: 1 addition & 1 deletion examples/code-llama/7b/lora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ flash_attention: true

warmup_steps: 10
evals_per_epoch: 4
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
2 changes: 1 addition & 1 deletion examples/code-llama/7b/qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ flash_attention: true

warmup_steps: 10
evals_per_epoch: 4
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
2 changes: 1 addition & 1 deletion examples/gptj/qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ gptq_groupsize:
gptq_model_v1:
warmup_steps: 10
evals_per_epoch: 4
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.1
Expand Down
2 changes: 1 addition & 1 deletion examples/llama-2/fft_optimized.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ flash_attn_fuse_mlp: true
warmup_steps: 100
evals_per_epoch: 4
eval_table_size:
save_steps:
saves_per_epoch: 1
debug:
deepspeed: #deepspeed/zero2.json # multi-gpu only
weight_decay: 0.1
Expand Down
2 changes: 1 addition & 1 deletion examples/llama-2/gptq-lora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ sdp_attention:
flash_optimum:
warmup_steps: 100
evals_per_epoch: 4
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.1
Expand Down
2 changes: 1 addition & 1 deletion examples/llama-2/lora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ warmup_steps: 10
evals_per_epoch: 4
eval_table_size:
eval_table_max_new_tokens: 128
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
2 changes: 1 addition & 1 deletion examples/llama-2/qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ flash_attention: true
warmup_steps: 10
evals_per_epoch: 4
eval_table_size:
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
2 changes: 1 addition & 1 deletion examples/llama-2/tiny-llama.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ flash_attention: true
warmup_steps: 10
evals_per_epoch: 4
eval_table_size:
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
2 changes: 1 addition & 1 deletion examples/mistral/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ warmup_steps: 10
evals_per_epoch: 4
eval_table_size:
eval_table_max_new_tokens: 128
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
2 changes: 1 addition & 1 deletion examples/mistral/mixtral.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ warmup_steps: 10
evals_per_epoch: 4
eval_table_size:
eval_table_max_new_tokens: 128
save_steps:
saves_per_epoch: 1
debug:
deepspeed: deepspeed/zero2.json
weight_decay: 0.0
Expand Down
2 changes: 1 addition & 1 deletion examples/mistral/qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ warmup_steps: 10
evals_per_epoch: 4
eval_table_size:
eval_table_max_new_tokens: 128
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
2 changes: 1 addition & 1 deletion examples/openllama-3b/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ gptq_groupsize:
gptq_model_v1:
warmup_steps: 20
evals_per_epoch: 4
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.1
Expand Down
2 changes: 1 addition & 1 deletion examples/openllama-3b/lora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ gptq_groupsize:
gptq_model_v1:
warmup_steps: 20
evals_per_epoch: 4
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.1
Expand Down
2 changes: 1 addition & 1 deletion examples/openllama-3b/qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ gptq_groupsize:
gptq_model_v1:
warmup_steps: 20
evals_per_epoch: 4
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.1
Expand Down
2 changes: 1 addition & 1 deletion examples/phi/phi-ft.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ flash_attention:

warmup_steps: 100
evals_per_epoch: 4
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.1
Expand Down
2 changes: 1 addition & 1 deletion examples/phi/phi-qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ flash_attention:

warmup_steps: 100
evals_per_epoch: 4
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.1
Expand Down
2 changes: 1 addition & 1 deletion examples/qwen/lora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ warmup_steps: 10
evals_per_epoch: 4
eval_table_size:
eval_table_max_new_tokens: 128
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
2 changes: 1 addition & 1 deletion examples/qwen/qlora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ warmup_steps: 10
evals_per_epoch: 4
eval_table_size:
eval_table_max_new_tokens: 128
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.0
Expand Down
2 changes: 1 addition & 1 deletion examples/replit-3b/config-lora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ gptq_groupsize:
gptq_model_v1:
warmup_steps: 20
evals_per_epoch: 4
save_steps:
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0
Expand Down
20 changes: 18 additions & 2 deletions src/axolotl/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,13 @@ def normalize_config(cfg):
cfg.torch_dtype = torch.float32

if cfg.saves_per_epoch:
cfg.save_steps = 1.0 / (cfg.saves_per_epoch * cfg.num_epochs)
save_steps = 1.0 / (cfg.saves_per_epoch * cfg.num_epochs)
if save_steps < 1.0: # prevent saves on every step
cfg.save_steps = save_steps
if cfg.evals_per_epoch:
cfg.eval_steps = 1.0 / (cfg.evals_per_epoch * cfg.num_epochs)
eval_steps = 1.0 / (cfg.evals_per_epoch * cfg.num_epochs)
if eval_steps < 1.0: # prevent evals on every step
cfg.eval_steps = eval_steps

cfg.dataset_processes = cfg.dataset_processes or os.cpu_count()

Expand Down Expand Up @@ -362,10 +366,22 @@ def validate_config(cfg):
raise ValueError(
"save_steps and saves_per_epoch are mutually exclusive and cannot be used together."
)
if cfg.saves_per_epoch and cfg.save_strategy and cfg.save_strategy != "steps":
raise ValueError(
"save_strategy must be empty or set to `steps` when used with saves_per_epoch."
)
if cfg.evals_per_epoch and cfg.eval_steps:
raise ValueError(
"eval_steps and evals_per_epoch are mutually exclusive and cannot be used together."
)
if (
cfg.evals_per_epoch
and cfg.evaluation_strategy
and cfg.evaluation_strategy != "steps"
):
raise ValueError(
"evaluation_strategy must be empty or set to `steps` when used with evals_per_epoch."
)
if cfg.save_strategy and cfg.save_steps and cfg.save_strategy != "steps":
raise ValueError(
"save_strategy and save_steps mismatch. Please set save_strategy to 'steps' or remove save_steps."
Expand Down

0 comments on commit 5467f13

Please sign in to comment.