From d3193beac3073a8245231a4075ac08306519c935 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Fri, 27 Oct 2023 18:57:02 -0700
Subject: [PATCH] Fix Deepspeed Zero3 Config (#791)

* Update zero3.json

Take away CPU Offload by default (Slows things down horribly, better off reducing batchsize), and changes LR Scheduler to a properly decaying one

* Update zero3.json

fix something
---
 deepspeed/zero3.json | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/deepspeed/zero3.json b/deepspeed/zero3.json
index 5da9c07e2..99ecbd4bf 100644
--- a/deepspeed/zero3.json
+++ b/deepspeed/zero3.json
@@ -1,14 +1,6 @@
 {
   "zero_optimization": {
     "stage": 3,
-    "offload_optimizer": {
-      "device": "cpu",
-      "pin_memory": true
-    },
-    "offload_param": {
-      "device": "cpu",
-      "pin_memory": true
-    },
     "overlap_comm": true,
     "contiguous_gradients": true,
     "sub_group_size": 0,
@@ -41,12 +33,13 @@
     }
   },
   "scheduler": {
-    "type": "WarmupLR",
+    "type": "WarmupDecayLR",
     "params": {
       "warmup_min_lr": "auto",
       "warmup_max_lr": "auto",
       "warmup_num_steps": "auto",
-      "warmup_type": "linear"
+      "warmup_type": "linear",
+      "total_num_steps": "auto"
     }
   },
   "gradient_accumulation_steps": "auto",