make the other patches optional

axolotl-ai-cloud · Sep 27, 2023 · 94f03c1 · 94f03c1
1 parent efa0178
commit 94f03c1
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -635,6 +635,8 @@ flash_optimum:
 xformers_attention:
 # whether to use flash attention patch https://github.com/Dao-AILab/flash-attention:
 flash_attention:
+flash_attn_cross_entropy:  # Whether to use flash-attention cross entropy implementation - advanced use only
+flash_attn_rms_norm:  # Whether to use flash-attention rms norm implementation - advanced use only
 # whether to use scaled-dot-product attention
 # https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html
 sdp_attention:

diff --git a/src/axolotl/monkeypatch/llama_attn_hijack_flash.py b/src/axolotl/monkeypatch/llama_attn_hijack_flash.py
@@ -54,7 +54,7 @@ def replace_llama_attn_with_flash_attn(
         )
 
     # skip only if explicitly disabled
-    if cross_entropy is not False:
+    if cross_entropy:
         try:
             from flash_attn.losses.cross_entropy import CrossEntropyLoss
 
@@ -68,7 +68,7 @@ def replace_llama_attn_with_flash_attn(
             )
 
     # skip only if explicitly disabled
-    if rms_norm is not False:
+    if rms_norm:
         try:
             from flash_attn.ops.rms_norm import RMSNorm