Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix2 select_device() for Multi-GPU #6461

Merged
merged 6 commits into from
Jan 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions utils/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,12 @@
from utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective
from utils.general import (LOGGER, NUM_THREADS, check_dataset, check_requirements, check_yaml, clean_str,
segments2boxes, xyn2xy, xywh2xyxy, xywhn2xyxy, xyxy2xywhn)
from utils.torch_utils import device_count, torch_distributed_zero_first
from utils.torch_utils import torch_distributed_zero_first

# Parameters
HELP_URL = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
IMG_FORMATS = ['bmp', 'dng', 'jpeg', 'jpg', 'mpo', 'png', 'tif', 'tiff', 'webp'] # include image suffixes
VID_FORMATS = ['asf', 'avi', 'gif', 'm4v', 'mkv', 'mov', 'mp4', 'mpeg', 'mpg', 'wmv'] # include video suffixes
DEVICE_COUNT = max(device_count(), 1) # number of CUDA devices

# Get orientation exif tag
for orientation in ExifTags.TAGS.keys():
Expand Down Expand Up @@ -110,7 +109,8 @@ def create_dataloader(path, imgsz, batch_size, stride, single_cls=False, hyp=Non
prefix=prefix)

batch_size = min(batch_size, len(dataset))
nw = min([os.cpu_count() // DEVICE_COUNT, batch_size if batch_size > 1 else 0, workers]) # number of workers
nd = torch.cuda.device_count() # number of CUDA devices
nw = min([os.cpu_count() // max(nd, 1), batch_size if batch_size > 1 else 0, workers]) # number of workers
sampler = None if rank == -1 else distributed.DistributedSampler(dataset, shuffle=shuffle)
loader = DataLoader if image_weights else InfiniteDataLoader # only DataLoader allows for attribute updates
return loader(dataset,
Expand Down
8 changes: 4 additions & 4 deletions utils/torch_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ def git_describe(path=Path(__file__).parent): # path must be a directory


def device_count():
# Returns number of CUDA devices available. Safe version of torch.cuda.device_count().
# Returns number of CUDA devices available. Safe version of torch.cuda.device_count(). Only works on Linux.
assert platform.system() == 'Linux', 'device_count() function only works on Linux'
try:
cmd = 'nvidia-smi -L | wc -l'
return int(subprocess.run(cmd, shell=True, capture_output=True, check=True).stdout.decode().split()[-1])
Expand All @@ -70,10 +71,9 @@ def select_device(device='', batch_size=0, newline=True):
if cpu:
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False
elif device: # non-cpu device requested
nd = device_count() # number of CUDA devices
assert nd > int(max(device.split(','))), f'Invalid `--device {device}` request, valid devices are 0 - {nd - 1}'
os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable - must be before assert is_available()
assert torch.cuda.is_available(), 'CUDA is not available, use `--device cpu` or do not pass a --device'
assert torch.cuda.is_available() and torch.cuda.device_count() >= len(device.replace(',', '')), \
f"Invalid CUDA '--device {device}' requested, use '--device cpu' or pass valid CUDA device(s)"

cuda = not cpu and torch.cuda.is_available()
if cuda:
Expand Down