From 804836a8d8facd93c678800d43ecd5050b43bef1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sat, 16 May 2020 03:16:53 +0200 Subject: [PATCH 1/3] remove unused device attribute --- pytorch_lightning/trainer/distrib_data_parallel.py | 1 - pytorch_lightning/trainer/distrib_parts.py | 5 ----- pytorch_lightning/trainer/trainer.py | 1 - 3 files changed, 7 deletions(-) diff --git a/pytorch_lightning/trainer/distrib_data_parallel.py b/pytorch_lightning/trainer/distrib_data_parallel.py index 37f9fa9194db9..ae55d2447079d 100644 --- a/pytorch_lightning/trainer/distrib_data_parallel.py +++ b/pytorch_lightning/trainer/distrib_data_parallel.py @@ -360,7 +360,6 @@ def ddp_train(self, process_idx, model): # copy model to each gpu if self.on_gpu: self.root_gpu = process_idx - self._device = torch.device('cuda', self.root_gpu) torch.cuda.set_device(self.root_gpu) model.cuda(self.root_gpu) diff --git a/pytorch_lightning/trainer/distrib_parts.py b/pytorch_lightning/trainer/distrib_parts.py index 2825027c83aa1..312940860cc39 100644 --- a/pytorch_lightning/trainer/distrib_parts.py +++ b/pytorch_lightning/trainer/distrib_parts.py @@ -432,7 +432,6 @@ def copy_trainer_model_properties(self, model): m.use_tpu = self.use_tpu m.tpu_local_core_rank = self.tpu_local_core_rank m.tpu_global_core_rank = self.tpu_global_core_rank - m._device = self._device def transfer_batch_to_tpu(self, batch): return self.__transfer_data_to_device(batch, device='tpu') @@ -488,7 +487,6 @@ def __transfer_data_to_device(self, batch, device, gpu_id=None): def single_gpu_train(self, model): model.cuda(self.root_gpu) - self._device = torch.device('cuda', self.root_gpu) # CHOOSE OPTIMIZER # allow for lr schedulers as well @@ -505,7 +503,6 @@ def single_gpu_train(self, model): def tpu_train(self, tpu_core_idx, model): # put model on tpu model.to(xm.xla_device()) - self._device = xm.xla_device() # get the appropriate tpu ranks self.tpu_local_core_rank = xm.get_local_ordinal() @@ -545,7 +542,6 @@ def dp_train(self, model): self.optimizers, self.lr_schedulers, self.optimizer_frequencies = self.init_optimizers(model) model.cuda(self.root_gpu) - self._device = torch.device('cuda', self.root_gpu) # hack forward to do autocast for the user model_autocast_original_forward = model.forward @@ -585,7 +581,6 @@ def horovod_train(self, model): assert self.root_gpu == hvd.local_rank() torch.cuda.set_device(self.root_gpu) model.cuda(self.root_gpu) - self._device = torch.device('cuda', self.root_gpu) # avoid duplicating progress bar if hvd.rank() != 0 and self.progress_bar_callback is not None: diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 361faa9715a89..f7260134e69b3 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -473,7 +473,6 @@ def __init__( # distributed backend choice self.distributed_backend = distributed_backend self.set_distributed_mode(distributed_backend) - self._device = torch.device('cpu') # override dist backend when using tpus if self.on_tpu: From b80a58c078b465b484ac56280444c0d2d6c93784 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sat, 16 May 2020 03:20:16 +0200 Subject: [PATCH 2/3] dtype --- pytorch_lightning/core/lightning.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index de0e2bcb0075f..82488bb4b3e05 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -72,7 +72,8 @@ def __init__(self, *args, **kwargs): self.hparams = None #: Current dtype - self._dtype = torch.FloatTensor + self._dtype = torch.float + #: device reference self._device = torch.device('cpu') From 6af94b532f248e367c1d65e07ce453d1c4b0b66a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sat, 16 May 2020 06:44:32 +0200 Subject: [PATCH 3/3] move on_gpu to model --- pytorch_lightning/core/lightning.py | 12 ++++++++---- pytorch_lightning/trainer/distrib_parts.py | 1 - 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 82488bb4b3e05..784ae9c3a45fa 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -53,10 +53,6 @@ def __init__(self, *args, **kwargs): self.logger = None self.example_input_array = None - #: True if your model is currently running on GPUs. - #: Useful to set flags around the LightningModule for different CPU vs GPU behavior. - self.on_gpu = False - #: True if using dp self.use_dp = False @@ -77,6 +73,14 @@ def __init__(self, *args, **kwargs): #: device reference self._device = torch.device('cpu') + @property + def on_gpu(self): + """ + True if your model is currently running on GPUs. + Useful to set flags around the LightningModule for different CPU vs GPU behavior. + """ + return self.device.type == 'cuda' + def print(self, *args, **kwargs) -> None: r""" Prints only from process 0. Use this in any distributed mode to log only once. diff --git a/pytorch_lightning/trainer/distrib_parts.py b/pytorch_lightning/trainer/distrib_parts.py index 312940860cc39..31865fc643ecc 100644 --- a/pytorch_lightning/trainer/distrib_parts.py +++ b/pytorch_lightning/trainer/distrib_parts.py @@ -422,7 +422,6 @@ def copy_trainer_model_properties(self, model): for m in [model, ref_model]: m.trainer = self - m.on_gpu = self.on_gpu m.use_dp = self.use_dp m.use_ddp2 = self.use_ddp2 m.use_ddp = self.use_ddp