lm-sys · ZYHowell · Jun 5, 2023 · May 24, 2023
diff --git a/fastchat/train/train_lora.py b/fastchat/train/train_lora.py
@@ -145,10 +145,22 @@ def train():
         trainer.train()
     trainer.save_state()
 
-    # Save states. Weights might be a placeholder in zero3 and need a gather
-    state_dict = get_peft_state_maybe_zero_3(
+     # check if zero3 mode enabled
+    if trainer.hf_deepspeed_config_orig.is_zero3():
+        # use deepspeed engine internal function to gather state dict
+        # state_dict_zero3 contains whole parameters of base and lora adapters
+        # we will not extract lora parameters since peft save_pretrained will do that
+        # https://github.com/huggingface/peft/blob/3714aa2fff158fdfa637b2b65952580801d890b2/src/peft/peft_model.py#L125
+        # https://github.com/huggingface/peft/blob/3714aa2fff158fdfa637b2b65952580801d890b2/src/peft/utils/save_and_load.py#L19
+        state_dict_zero3 = trainer.model_wrapped._zero3_consolidated_16bit_state_dict()
+        if training_args.local_rank == 0:
+            state_dict = state_dict_zero3
+    else:
+        # in other mode we use original code from fastchat team, to make sure our change is minimum
+        state_dict = get_peft_state_maybe_zero_3(
         model.named_parameters(), lora_args.lora_bias
-    )
+        )   
+
     if training_args.local_rank == 0:
         model.save_pretrained(training_args.output_dir, state_dict=state_dict)