From d3193beac3073a8245231a4075ac08306519c935 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 27 Oct 2023 18:57:02 -0700 Subject: [PATCH] Fix Deepspeed Zero3 Config (#791) * Update zero3.json Take away CPU Offload by default (Slows things down horribly, better off reducing batchsize), and changes LR Scheduler to a properly decaying one * Update zero3.json fix something --- deepspeed/zero3.json | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/deepspeed/zero3.json b/deepspeed/zero3.json index 5da9c07e2..99ecbd4bf 100644 --- a/deepspeed/zero3.json +++ b/deepspeed/zero3.json @@ -1,14 +1,6 @@ { "zero_optimization": { "stage": 3, - "offload_optimizer": { - "device": "cpu", - "pin_memory": true - }, - "offload_param": { - "device": "cpu", - "pin_memory": true - }, "overlap_comm": true, "contiguous_gradients": true, "sub_group_size": 0, @@ -41,12 +33,13 @@ } }, "scheduler": { - "type": "WarmupLR", + "type": "WarmupDecayLR", "params": { "warmup_min_lr": "auto", "warmup_max_lr": "auto", "warmup_num_steps": "auto", - "warmup_type": "linear" + "warmup_type": "linear", + "total_num_steps": "auto" } }, "gradient_accumulation_steps": "auto",