From 8ae9ea1fac7e442403e83c719867decfad385b36 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Fri, 18 Jun 2021 16:13:13 +0200
Subject: [PATCH] try nccl

---
 train.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/train.py b/train.py
index ae70e237a250..e4838e61b503 100644
--- a/train.py
+++ b/train.py
@@ -533,7 +533,7 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
         assert torch.cuda.device_count() > LOCAL_RANK, 'too few GPUS for DDP command'
         torch.cuda.set_device(LOCAL_RANK)
         device = torch.device('cuda', LOCAL_RANK)
-        dist.init_process_group(backend="gloo")  # distributed backend
+        dist.init_process_group(backend="nccl")  # distributed backend
         assert opt.batch_size % WORLD_SIZE == 0, '--batch-size must be multiple of CUDA device count'
         assert not opt.image_weights, '--image-weights argument is not compatible with DDP training'
         opt.batch_size = opt.total_batch_size // WORLD_SIZE