Lightning-AI · tchaton · Feb 12, 2021 · Dec 22, 2020 · Dec 22, 2020 · Dec 22, 2020
@@ -15,7 +15,7 @@
 import os
 import platform
 import time
-from typing import Type, Union
+from typing import Type
 
 import pytest
 import torch
@@ -32,10 +32,8 @@
 @pytest.mark.skipif(platform.system() == "Windows", reason="Distributed training is not supported on Windows")
 @pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
 def test_ddp_sharded_plugin_correctness_one_gpu():
-    plugin_parity_test(
+    sharded_parity_test(
         gpus=1,
-        accelerator='ddp_spawn',
-        plugin=DDPShardedPlugin(),
         model_cls=SeedTrainLoaderModel,
     )
 
@@ -45,11 +43,9 @@ def test_ddp_sharded_plugin_correctness_one_gpu():
 @pytest.mark.skipif(platform.system() == "Windows", reason="Distributed training is not supported on Windows")
 @pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
 def test_ddp_sharded_plugin_correctness_amp_one_gpu():
-    plugin_parity_test(
+    sharded_parity_test(
         gpus=1,
         precision=16,
-        accelerator='ddp_spawn',
-        plugin=DDPShardedPlugin(),
         model_cls=SeedTrainLoaderModel,
     )
 
@@ -59,10 +55,8 @@ def test_ddp_sharded_plugin_correctness_amp_one_gpu():
 @pytest.mark.skipif(platform.system() == "Windows", reason="Distributed training is not supported on Windows")
 @pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
 def test_ddp_sharded_plugin_correctness_multi_gpu():
-    plugin_parity_test(
+    sharded_parity_test(
         gpus=2,
-        accelerator='ddp_spawn',
-        plugin=DDPShardedPlugin(),
         model_cls=SeedTrainLoaderModel,
         max_percent_speed_diff=0.25,  # todo: Increase speed diff since only 2 GPUs sharding 2 optimizers
     )
@@ -73,11 +67,9 @@ def test_ddp_sharded_plugin_correctness_multi_gpu():
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
 @pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
 def test_ddp_sharded_plugin_correctness_amp_multi_gpu():
-    plugin_parity_test(
+    sharded_parity_test(
         gpus=2,
         precision=16,
-        accelerator='ddp_spawn',
-        plugin=DDPShardedPlugin(),
         model_cls=SeedTrainLoaderModel,
         max_percent_speed_diff=0.25,  # todo: Increase speed diff since only 2 GPUs sharding 2 optimizers
     )
@@ -88,11 +80,9 @@ def test_ddp_sharded_plugin_correctness_amp_multi_gpu():
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
 @pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
 def test_ddp_string_sharded_plugin_correctness_amp_multi_gpu():
-    plugin_parity_test(
+    sharded_parity_test(
         gpus=2,
         precision=16,
-        accelerator='ddp_spawn',
-        plugin='ddp_sharded',
         model_cls=SeedTrainLoaderModel,
         max_percent_speed_diff=0.25,  # todo: Increase speed diff since only 2 GPUs sharding 2 optimizers
     )
@@ -105,11 +95,9 @@ def test_ddp_string_sharded_plugin_correctness_amp_multi_gpu():
 )
 @DDPLauncher.run("--accelerator ddp --gpus 2 --precision 32")
 def test_ddp_sharded_plugin_correctness_multi_gpu_ddp(tmpdir, args=None):
-    plugin_parity_test(
+    sharded_parity_test(
         gpus=args.gpus,
         precision=args.precision,
-        accelerator=args.accelerator,
-        plugin=DDPShardedPlugin(),
         model_cls=SeedTrainLoaderModel,
     )
 
@@ -121,11 +109,9 @@ def test_ddp_sharded_plugin_correctness_multi_gpu_ddp(tmpdir, args=None):
 )
 @DDPLauncher.run("--accelerator ddp --gpus 2  --precision 16")
 def test_ddp_sharded_plugin_correctness_amp_multi_gpu_ddp(tmpdir, args=None):
-    plugin_parity_test(
+    sharded_parity_test(
         gpus=args.gpus,
         precision=args.precision,
-        accelerator=args.accelerator,
-        plugin=DDPShardedPlugin(),
         model_cls=SeedTrainLoaderModel,
     )
 
@@ -138,10 +124,8 @@ def test_ddp_sharded_plugin_correctness_multi_gpu_multi_optim():
     """
         Ensures same results using multiple optimizers across multiple GPUs
     """
-    plugin_parity_test(
-        plugin=DDPShardedPlugin(),
+    sharded_parity_test(
         gpus=2,
-        accelerator='ddp_spawn',
         model_cls=SeedTrainLoaderMultipleOptimizersModel,
         max_percent_speed_diff=0.25,  # todo: Increase speed diff since only 2 GPUs sharding 2 optimizers
     )
@@ -155,10 +139,8 @@ def test_ddp_sharded_plugin_correctness_multi_gpu_multi_optim_manual(tmpdir):
     """
         Ensures using multiple optimizers across multiple GPUs with manual optimization
     """
-    plugin_parity_test(
-        plugin=DDPShardedPlugin(),
+    sharded_parity_test(
         gpus=2,
-        accelerator='ddp_spawn',
         model_cls=SeedTrainLoaderManualModel,
         max_percent_speed_diff=0.25,  # todo: Increase speed diff since only 2 GPUs sharding 2 optimizers
     )
@@ -273,9 +255,7 @@ def plugin_parity_test(
 
     Args:
         model_cls: Model class to use for test.
-        plugin: Plugin to parity test.
         seed: Seed for generators. Note that this does not handle the seed for data-loading on multi-process.
-        accelerator: Accelerator type for test.
         gpus: Number of GPUS to enable.
         precision: Whether to use AMP or normal FP32 training.
         max_percent_speed_diff: The maximum speed difference compared to normal DDP training.
@@ -293,7 +273,7 @@ def plugin_parity_test(
         max_epochs=1,
         gpus=gpus,
         precision=precision,
-        accelerator=accelerator,
+        accelerator='ddp_spawn',
     )
 
     max_memory_ddp, ddp_time = record_ddp_fit_model_stats(trainer=trainer, model=ddp_model, use_cuda=use_cuda)
@@ -307,8 +287,7 @@ def plugin_parity_test(
         max_epochs=1,
         gpus=gpus,
         precision=precision,
-        accelerator=accelerator,
-        plugins=[plugin],
+        accelerator='ddp_sharded_spawn',
     )
 
     max_memory_custom, custom_model_time = record_ddp_fit_model_stats(

@@ -1,25 +1,4 @@
-# Copyright The PyTorch Lightning team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from pytorch_lightning.accelerators.legacy.accelerator import Accelerator  # noqa: F401
-from pytorch_lightning.accelerators.legacy.cpu_accelerator import CPUAccelerator  # noqa: F401
-from pytorch_lightning.accelerators.legacy.ddp2_accelerator import DDP2Accelerator  # noqa: F401
-from pytorch_lightning.accelerators.legacy.ddp_accelerator import DDPAccelerator  # noqa: F401
-from pytorch_lightning.accelerators.legacy.ddp_cpu_hpc_accelerator import DDPCPUHPCAccelerator  # noqa: F401
-from pytorch_lightning.accelerators.legacy.ddp_cpu_spawn_accelerator import DDPCPUSpawnAccelerator  # noqa: F401
-from pytorch_lightning.accelerators.legacy.ddp_hpc_accelerator import DDPHPCAccelerator  # noqa: F401
-from pytorch_lightning.accelerators.legacy.ddp_spawn_accelerator import DDPSpawnAccelerator  # noqa: F401
-from pytorch_lightning.accelerators.legacy.dp_accelerator import DataParallelAccelerator  # noqa: F401
-from pytorch_lightning.accelerators.legacy.gpu_accelerator import GPUAccelerator  # noqa: F401
-from pytorch_lightning.accelerators.legacy.horovod_accelerator import HorovodAccelerator  # noqa: F401
-from pytorch_lightning.accelerators.legacy.tpu_accelerator import TPUAccelerator  # noqa: F401
+from pytorch_lightning.accelerators.accelerator import Accelerator
+from pytorch_lightning.accelerators.cpu import CPUAccelerator
+from pytorch_lightning.accelerators.gpu import GPUAccelerator
+from pytorch_lightning.accelerators.tpu import TPUAccelerator
@@ -204,6 +204,23 @@ def validation_step_end(self, output):
         """
         return output
 
+    def predict(self, args):
+        """The prediction step.
+
+        Args:
+            args: the arguments for the models predict step. Can consist of the following:
+                batch (:class:`~torch.Tensor` | (:class:`~torch.Tensor`, ...) | [:class:`~torch.Tensor`, ...]):
+                    The output of your :class:`~torch.utils.data.DataLoader`. A tensor, tuple or list.
+                batch_idx (int): Integer displaying index of this batch
+                optimizer_idx (int): When using multiple optimizers, this argument will also be present.
+                hiddens(:class:`~torch.Tensor`): Passed in if
+                    :paramref:`~pytorch_lightning.trainer.trainer.Trainer.truncated_bptt_steps` > 0.
+
+        """
+        batch = self.to_device(args[0])
+        args[0] = batch
+        return self.training_type_plugin.predict(*args)
+
     def process_dataloader(
         self, dataloader: Union[Iterable, torch.utils.data.DataLoader]
     ) -> Union[Iterable, torch.utils.data.DataLoader]:
@@ -244,45 +261,35 @@ def backward(
     def optimizer_step(
         self,
         optimizer: torch.optim.Optimizer,
-        current_epoch: int,
-        batch_idx: int,
         opt_idx: int,
         lambda_closure: Callable,
+        **kwargs
     ):
         """performs the actual optimizer step.
 
         Args:
             optimizer: the optimizer performing the step
-            current_epoch: current training epoch
-            batch_idx: index of the current batch
             opt_idx: index of the current optimizer
             lambda_closure: closure calculating the loss value
 
         """
-        model_ref = self.lightning_module
-        is_lbfgs = isinstance(optimizer, torch.optim.LBFGS)
-        native_amp = (
-            isinstance(self.precision_plugin, MixedPrecisionPlugin) and self.precision_plugin.backend == AMPType.NATIVE
-        )
 
         self.precision_plugin.pre_optimizer_step(optimizer, opt_idx)
         self.training_type_plugin.pre_optimizer_step(optimizer, opt_idx)
 
-        # model hook
-        res = model_ref.optimizer_step(
-            epoch=current_epoch,
-            batch_idx=batch_idx,
-            optimizer=optimizer,
-            optimizer_idx=opt_idx,
-            optimizer_closure=lambda_closure,
-            on_tpu=False,  # TPUAccelerator class sets this as True
-            using_native_amp=native_amp,
-            using_lbfgs=is_lbfgs,
-        )
+        optimizer.step(closure=lambda_closure, **kwargs)
 
         self.precision_plugin.post_optimizer_step(optimizer, opt_idx)
         self.training_type_plugin.post_optimizer_step(optimizer, opt_idx)
-        return res
+
+        if self.rpc_enabled and self.training_type_plugin.is_main_rpc_process:
+
+                # Initialize optimizer step on main process
+                self.training_type_plugin.worker_optimizer_step(
+                    model=self.lightning_module,
+                    opt_idx=opt_idx,
+                    **kwargs
+                )
 
     def optimizer_zero_grad(
         self, current_epoch: int, batch_idx: int, optimizer: torch.optim.Optimizer, opt_idx: int
@@ -374,3 +381,6 @@ def optimizer_state(self, optimizer: Optimizer) -> dict:
 
     def on_save(self, checkpoint):
         return checkpoint
+
+    def barrier(self, name: Optional[str] = None) -> None:
+        self.training_type_plugin.barrier(name=name)