Skip to content

Commit

Permalink
Merge branch 'main' into cosine_min_lr
Browse files Browse the repository at this point in the history
  • Loading branch information
winglian authored Jan 9, 2024
2 parents b64432e + c3e8165 commit 2eb9f6e
Show file tree
Hide file tree
Showing 13 changed files with 289 additions and 126 deletions.
2 changes: 1 addition & 1 deletion .github/FUNDING.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
github: OpenAccess-AI-Collective # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
patreon: # Replace with a single Patreon username
open_collective: # Replace with a single Open Collective username
ko_fi: # Replace with a single Ko-fi username
ko_fi: axolotl_ai # Replace with a single Ko-fi username
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
liberapay: # Replace with a single Liberapay username
Expand Down
31 changes: 31 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ Features:
- [Badge](#badge-)
- [Community Showcase](#community-showcase)
- [Contributing](#contributing-)
- [Sponsors](#sponsors-)

</td>
<td>
Expand Down Expand Up @@ -1150,3 +1151,33 @@ pre-commit install
# test
pytest tests/
```

## Sponsors 🤝❤

OpenAccess AI Collective is run by volunteer contributors such as [winglian](https://github.com/winglian),
[NanoCode012](https://github.com/NanoCode012), [tmm1](https://github.com/tmm1),
[mhenrichsen](https://github.com/mhenrichsen), [casper-hansen](https://github.com/casper-hansen),
[hamelsmu](https://github.com/hamelsmu) and many more who help us accelerate forward by fixing bugs, answering
community questions and implementing new features. Axolotl needs donations from sponsors for the compute needed to
run our unit & integration tests, troubleshooting community issues, and providing bounties. If you love axolotl,
consider sponsoring the project via [GitHub Sponsors](https://github.com/sponsors/OpenAccess-AI-Collective),
[Ko-fi](https://ko-fi.com/axolotl_ai) or reach out directly to
[wing@openaccessaicollective.org](mailto:wing@openaccessaicollective.org).

---

#### 💎 Diamond Sponsors - [Contact directly](mailto:wing@openaccessaicollective.org)

---

#### 🥇 Gold Sponsors - $5000/mo

---

#### 🥈 Silver Sponsors - $1000/mo

---

#### 🥉 Bronze Sponsors - $500/mo

---
73 changes: 73 additions & 0 deletions examples/phi/phi2-ft.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
base_model: microsoft/phi-2
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
trust_remote_code: true

load_in_8bit: false
load_in_4bit: false
strict: false

datasets:
- path: garage-bAInd/Open-Platypus
type: alpaca

dataset_prepared_path:
val_set_size: 0.05
output_dir: ./phi-sft-out

sequence_len: 2048
sample_packing: false # currently unsupported
pad_to_sequence_len:

adapter:
lora_model_dir:
lora_r: 16
lora_alpha: 32
lora_dropout: 0.1
lora_target_linear: true
lora_fan_in_fan_out:
lora_modules_to_save:
- embd
- lm_head

wandb_project:
wandb_entity:
wandb_watch:
wandb_name:
wandb_log_model:

gradient_accumulation_steps: 1
micro_batch_size: 1
num_epochs: 4
optimizer: paged_adamw_8bit
adam_beta2: 0.95
adam_epsilon: 0.00001
max_grad_norm: 1.0
lr_scheduler: cosine
learning_rate: 1e-5

train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
tf32: true

gradient_checkpointing: true
early_stopping_patience:
resume_from_checkpoint:
local_rank:
logging_steps: 1
xformers_attention:
flash_attention: true

warmup_steps: 100
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.1
fsdp:
fsdp_config:
resize_token_embeddings_to_32x: true
special_tokens:
pad_token: "<|endoftext|>"
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ fire
PyYAML>=6.0
datasets>=2.15.0
flash-attn==2.3.3
fused-dense-lib @ git+https://github.com/Dao-AILab/flash-attention@v2.3.3#subdirectory=csrc/fused_dense_lib
sentencepiece
wandb
einops
Expand Down
4 changes: 4 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def parse_requirements():
_dependency_links.append(url)
elif (
"flash-attn" not in line
and "flash-attention" not in line
and "deepspeed" not in line
and line
and line[0] != "#"
Expand Down Expand Up @@ -51,6 +52,9 @@ def parse_requirements():
"flash-attn": [
"flash-attn==2.3.3",
],
"fused-dense-lib": [
"fused-dense-lib @ git+https://github.com/Dao-AILab/flash-attention@v2.3.3#subdirectory=csrc/fused_dense_lib",
],
"deepspeed": [
"deepspeed",
],
Expand Down
26 changes: 12 additions & 14 deletions src/axolotl/core/trainer_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,10 @@
)
from axolotl.utils.collators import (
BatchSamplerDataCollatorForSeq2Seq,
DataCollatorForSeq2Seq,
MambaDataCollator,
)
from axolotl.utils.samplers import MultipackBatchSampler
from axolotl.utils.samplers import MultipackBatchSampler, get_dataset_lengths
from axolotl.utils.schedulers import get_cosine_schedule_with_quadratic_warmup, get_cosine_schedule_with_min_lr

try:
Expand Down Expand Up @@ -184,12 +185,7 @@ def _get_train_sampler(self) -> Optional[torch.utils.data.Sampler]:
self.args.train_batch_size,
drop_last=True,
batch_max_len=self._train_batch_size * self.args.max_seq_length,
lengths=(
self.train_dataset.data.column("position_ids")
.to_pandas()
.apply(lambda x: x[-1] + 1)
.values
),
lengths=get_dataset_lengths(self.train_dataset),
packing_efficiency_estimate=self.args.sample_packing_efficiency,
)
return super()._get_train_sampler()
Expand All @@ -203,12 +199,7 @@ def _get_eval_sampler(
self.args.per_device_eval_batch_size,
drop_last=True,
batch_max_len=self.args.eval_batch_size * self.args.max_seq_length,
lengths=(
eval_dataset.data.column("position_ids")
.to_pandas()
.apply(lambda x: x[-1] + 1)
.values
),
lengths=get_dataset_lengths(eval_dataset),
packing_efficiency_estimate=self.args.sample_packing_efficiency,
)
return super()._get_eval_sampler(eval_dataset)
Expand Down Expand Up @@ -859,7 +850,14 @@ def build_collator(self, training_args: AxolotlTrainingArguments, **kwargs):
if self.cfg.model_config_type == "mamba":
return MambaDataCollator(tokenizer=self.tokenizer)

return BatchSamplerDataCollatorForSeq2Seq(
if training_args.sample_packing:
return BatchSamplerDataCollatorForSeq2Seq(
self.tokenizer,
return_tensors="pt",
**kwargs,
)

return DataCollatorForSeq2Seq(
self.tokenizer,
return_tensors="pt",
**kwargs,
Expand Down
Loading

0 comments on commit 2eb9f6e

Please sign in to comment.