Skip to content

Commit

Permalink
try nccl
Browse files Browse the repository at this point in the history
  • Loading branch information
glenn-jocher committed Jun 18, 2021
1 parent 9c4ac05 commit 8ae9ea1
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion train.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary
assert torch.cuda.device_count() > LOCAL_RANK, 'too few GPUS for DDP command'
torch.cuda.set_device(LOCAL_RANK)
device = torch.device('cuda', LOCAL_RANK)
dist.init_process_group(backend="gloo") # distributed backend
dist.init_process_group(backend="nccl") # distributed backend
assert opt.batch_size % WORLD_SIZE == 0, '--batch-size must be multiple of CUDA device count'
assert not opt.image_weights, '--image-weights argument is not compatible with DDP training'
opt.batch_size = opt.total_batch_size // WORLD_SIZE
Expand Down

0 comments on commit 8ae9ea1

Please sign in to comment.