feat: add config to disable autounwrap

axolotl-ai-cloud · Jan 11, 2024 · 9f02c8d · 9f02c8d
1 parent 116f34a
commit 9f02c8d
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 2 deletions.
diff --git a/docs/rlhf.md b/docs/rlhf.md
@@ -33,3 +33,12 @@ datasets:
 ```yaml
 rl: ipo
 ```
+
+#### Trl autounwrap for peft
+
+Trl supports autounwrapping peft models, so that a ref model does not need to be additionally loaded, leading to less VRAM needed. This is on by default. To turn it off, pass the following config.
+
+```yaml
+# load ref model when adapter training.
+rl_adapter_ref_model: true
+```
diff --git a/src/axolotl/train.py b/src/axolotl/train.py
@@ -63,10 +63,10 @@ def train(
     model, peft_config = load_model(cfg, tokenizer, inference=cli_args.inference)
     model_ref = None
     if cfg.rl:
-        if cfg.adapter:
+        if cfg.adapter and not cfg.rl_adapter_ref_model:
             # use built-in trl autounwrap
             LOG.debug("Passing model_ref: None to RL trainer")
-            model_ref = None
+            model_ref = None  # explicit setting to None
         else:
             # load the model again for model_ref/baseline
             model_ref, _ = load_model(