axolotl-ai-cloud · winglian · Dec 4, 2023 · Nov 28, 2023 · Nov 28, 2023 · Dec 4, 2023
diff --git a/README.md b/README.md
@@ -612,6 +612,11 @@ eval_sample_packing:
 sample_packing_eff_est:
 total_num_tokens:
 
+# Passed through to transformers when loading the model when launched without accelerate
+device_map:
+# Defines the max memory usage per gpu on the system. Passed through to transformers when loading the model.
+max_memory:
+
 # If you want to use 'lora' or 'qlora' or leave blank to train all parameters in original model
 adapter: lora
 # If you already have a lora model trained that you want to load, put that here.

diff --git a/src/axolotl/utils/config.py b/src/axolotl/utils/config.py
@@ -27,7 +27,7 @@ def get_device():
 
     cfg.device = get_device()
     if cfg.world_size == 1:
-        cfg.device_map = "auto"
+        cfg.device_map = cfg.device_map or "auto"
     else:
         if cfg.device.startswith("cuda"):
             cfg.device_map = {"": torch.cuda.current_device()}

diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py
@@ -216,6 +216,7 @@ def load_model(
     model_kwargs = {}
 
     model_kwargs["device_map"] = cfg.device_map
+    model_kwargs["max_memory"] = cfg.max_memory
     model_kwargs["torch_dtype"] = cfg.torch_dtype
 
     if cfg.model_revision: