You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
import torch
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision import transforms
trainer = Trainer(gpus=[6,7],
num_nodes=4,
distributed_backend='ddp',
progress_bar_refresh_rate=10, max_epochs=10)
trainer.fit(model)
NFO:lightning:GPU available: True, used: True
INFO:lightning:CUDA_VISIBLE_DEVICES: [6,7]
Traceback (most recent call last):
File "", line 1, in
File "/home1/liuxinfang/anaconda3/envs/MomentRetrival/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
exitcode = _main(fd)
File "/home1/liuxinfang/anaconda3/envs/MomentRetrival/lib/python3.7/multiprocessing/spawn.py", line 114, in _main
prepare(preparation_data)
File "/home1/liuxinfang/anaconda3/envs/MomentRetrival/lib/python3.7/multiprocessing/spawn.py", line 225, in prepare
_fixup_main_from_path(data['init_main_from_path'])
File "/home1/liuxinfang/anaconda3/envs/MomentRetrival/lib/python3.7/multiprocessing/spawn.py", line 277, in _fixup_main_from_path
run_name="mp_main")
File "/home1/liuxinfang/anaconda3/envs/MomentRetrival/lib/python3.7/runpy.py", line 263, in run_path
pkg_name=pkg_name, script_name=fname)
File "/home1/liuxinfang/anaconda3/envs/MomentRetrival/lib/python3.7/runpy.py", line 96, in _run_module_code
mod_name, mod_spec, pkg_name, script_name)
File "/home1/liuxinfang/anaconda3/envs/MomentRetrival/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/home1/liuxinfang/projects/minist/model.py", line 87, in
progress_bar_refresh_rate=10, max_epochs=10)
File "/home1/liuxinfang/anaconda3/envs/MomentRetrival/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 438, in init
self.data_parallel_device_ids = parse_gpu_ids(self.gpus)
File "/home1/liuxinfang/anaconda3/envs/MomentRetrival/lib/python3.7/site-packages/pytorch_lightning/trainer/distrib_parts.py", line 712, in parse_gpu_ids
gpus = sanitize_gpu_ids(gpus)
File "/home1/liuxinfang/anaconda3/envs/MomentRetrival/lib/python3.7/site-packages/pytorch_lightning/trainer/distrib_parts.py", line 678, in sanitize_gpu_ids
""")
pytorch_lightning.utilities.exceptions.MisconfigurationException:
You requested GPUs: [6, 7]
But your machine only has: [0, 1]
Actually my machine has 8 gpus, since gpu 0,1 are used by other users, i need to use 6,7 with enough memory . The code performs normally with single gpu 6 or 7, but failed with more than one gpus.
The text was updated successfully, but these errors were encountered:
Actually my machine has 8 GPUs since GPU 0,1 are used by other users, I need to use 6,7 with enough memory. The code performs normally with single GPU 6 or 7 but failed with more than one GPUs.
import os
import torch
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision import transforms
import pytorch_lightning as pl
class CoolSystem(pl.LightningModule):
from pytorch_lightning import Trainer
model = CoolSystem()
most basic trainer, uses good defaults
trainer = Trainer(gpus=[6,7],
num_nodes=4,
distributed_backend='ddp',
progress_bar_refresh_rate=10, max_epochs=10)
trainer.fit(model)
NFO:lightning:GPU available: True, used: True
INFO:lightning:CUDA_VISIBLE_DEVICES: [6,7]
Traceback (most recent call last):
File "", line 1, in
File "/home1/liuxinfang/anaconda3/envs/MomentRetrival/lib/python3.7/multiprocessing/spawn.py", line 105, in spawn_main
exitcode = _main(fd)
File "/home1/liuxinfang/anaconda3/envs/MomentRetrival/lib/python3.7/multiprocessing/spawn.py", line 114, in _main
prepare(preparation_data)
File "/home1/liuxinfang/anaconda3/envs/MomentRetrival/lib/python3.7/multiprocessing/spawn.py", line 225, in prepare
_fixup_main_from_path(data['init_main_from_path'])
File "/home1/liuxinfang/anaconda3/envs/MomentRetrival/lib/python3.7/multiprocessing/spawn.py", line 277, in _fixup_main_from_path
run_name="mp_main")
File "/home1/liuxinfang/anaconda3/envs/MomentRetrival/lib/python3.7/runpy.py", line 263, in run_path
pkg_name=pkg_name, script_name=fname)
File "/home1/liuxinfang/anaconda3/envs/MomentRetrival/lib/python3.7/runpy.py", line 96, in _run_module_code
mod_name, mod_spec, pkg_name, script_name)
File "/home1/liuxinfang/anaconda3/envs/MomentRetrival/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/home1/liuxinfang/projects/minist/model.py", line 87, in
progress_bar_refresh_rate=10, max_epochs=10)
File "/home1/liuxinfang/anaconda3/envs/MomentRetrival/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 438, in init
self.data_parallel_device_ids = parse_gpu_ids(self.gpus)
File "/home1/liuxinfang/anaconda3/envs/MomentRetrival/lib/python3.7/site-packages/pytorch_lightning/trainer/distrib_parts.py", line 712, in parse_gpu_ids
gpus = sanitize_gpu_ids(gpus)
File "/home1/liuxinfang/anaconda3/envs/MomentRetrival/lib/python3.7/site-packages/pytorch_lightning/trainer/distrib_parts.py", line 678, in sanitize_gpu_ids
""")
pytorch_lightning.utilities.exceptions.MisconfigurationException:
You requested GPUs: [6, 7]
But your machine only has: [0, 1]
Actually my machine has 8 gpus, since gpu 0,1 are used by other users, i need to use 6,7 with enough memory . The code performs normally with single gpu 6 or 7, but failed with more than one gpus.
The text was updated successfully, but these errors were encountered: