From 41aeb7713422d34b2db1463d9cf6168637cf5bc8 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Tue, 2 Mar 2021 11:56:21 +0000
Subject: [PATCH 1/6] Ensure we check deepspeed/sharded in multinode

---
 .../connectors/accelerator_connector.py       | 10 +++++-----
 .../test_accelerator_connector.py             | 19 +++++++++++++++++++
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py
index 30bfbe2d963db..e60cb5abd0ab2 100644
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@@ -536,12 +536,12 @@ def set_distributed_mode(self, distributed_backend: Optional[str] = None):
         if self.distributed_backend == "horovod":
             self._set_horovod_backend()
 
-        # throw error to force user ddp or ddp2 choice
-        _ddp = (DistributedType.DDP, DistributedType.DDP_SPAWN, DistributedType.DDP2)
-        if (self.num_nodes > 1 and self._distrib_type not in _ddp):
+        using_valid_distributed = self.use_ddp or self.use_ddp2
+        if self.num_nodes > 1 and not using_valid_distributed:
+            # throw error to force user to choose a supported distributed type such as ddp or ddp2
             raise MisconfigurationException(
-                'DataParallel does not support num_nodes > 1. Switching to DistributedDataParallel for you. '
-                'To silence this warning set `accelerator="ddp"` or `accelerator="ddp2"`'
+                'Your chosen distributed type does not support num_nodes > 1. '
+                'Please set accelerator=ddp or accelerator=ddp2.'
             )
 
         rank_zero_info(f'GPU available: {torch.cuda.is_available()}, used: {self._device_type == DeviceType.GPU}')
diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py
index 50c9ccd47dfed..42eaa0089a37f 100644
--- a/tests/accelerators/test_accelerator_connector.py
+++ b/tests/accelerators/test_accelerator_connector.py
@@ -28,6 +28,8 @@
     DDPPlugin,
     DDPShardedPlugin,
     DDPSpawnPlugin,
+    DDPSpawnShardedPlugin,
+    DeepSpeedPlugin,
     PrecisionPlugin,
     SingleDevicePlugin,
 )
@@ -415,3 +417,20 @@ def test_plugin_accelerator_choice(accelerator, plugin):
 
     trainer = Trainer(plugins=plugin, num_processes=2)
     assert isinstance(trainer.accelerator.training_type_plugin, DDPShardedPlugin)
+
+
+@pytest.mark.parametrize(
+    ["accelerator", "plugin"],
+    [('ddp', DDPPlugin), ('ddp_spawn', DDPSpawnPlugin), ('ddp_sharded', DDPShardedPlugin),
+     ('ddp_sharded_spawn', DDPSpawnShardedPlugin), ('deepspeed', DeepSpeedPlugin)],
+)
+@mock.patch('torch.cuda.is_available', return_value=True)
+@mock.patch('torch.cuda.device_count', return_value=2)
+def test_accelerator_choice_multi_node_gpu(mock_available, mock_device_count, accelerator, plugin, tmpdir):
+    trainer = Trainer(
+        accelerator=accelerator,
+        default_root_dir=tmpdir,
+        num_nodes=2,
+        gpus=2,
+    )
+    assert isinstance(trainer.training_type_plugin, plugin)

From d6a56c73c9d6f8252c7280cf22fb6447f3087be0 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Tue, 2 Mar 2021 11:59:22 +0000
Subject: [PATCH 2/6] Add CHANGELOG.md

---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d98b646d0e6f1..502da63a4903f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -86,6 +86,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed duplicate logs appearing in console when using the python logging module ([#5509](https://github.com/PyTorchLightning/pytorch-lightning/pull/5509), [#6275](https://github.com/PyTorchLightning/pytorch-lightning/pull/6275))
 
 
+- Fix error thrown when using valid distributed mode in multi node ([#6297](https://github.com/PyTorchLightning/pytorch-lightning/pull/6297)
+
+
 ## [1.2.1] - 2021-02-23
 
 ### Fixed

From 9f0300803f9e3c631e7b23734f8658d16bd36622 Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Tue, 2 Mar 2021 11:59:32 +0000
Subject: [PATCH 3/6] Add CHANGELOG.md

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 502da63a4903f..679fa375bff81 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -86,7 +86,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed duplicate logs appearing in console when using the python logging module ([#5509](https://github.com/PyTorchLightning/pytorch-lightning/pull/5509), [#6275](https://github.com/PyTorchLightning/pytorch-lightning/pull/6275))
 
 
-- Fix error thrown when using valid distributed mode in multi node ([#6297](https://github.com/PyTorchLightning/pytorch-lightning/pull/6297)
+- Fixed error thrown when using valid distributed mode in multi node ([#6297](https://github.com/PyTorchLightning/pytorch-lightning/pull/6297)
 
 
 ## [1.2.1] - 2021-02-23

From a9ae51967ddfafcf719fe68a27f629155d506bcb Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Tue, 2 Mar 2021 12:44:39 +0000
Subject: [PATCH 4/6] Drop mock, use actual multi-gpu node

---
 tests/accelerators/test_accelerator_connector.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py
index 42eaa0089a37f..949e577f88bb3 100644
--- a/tests/accelerators/test_accelerator_connector.py
+++ b/tests/accelerators/test_accelerator_connector.py
@@ -34,6 +34,7 @@
     SingleDevicePlugin,
 )
 from pytorch_lightning.plugins.environments import ClusterEnvironment, SLURMEnvironment, TorchElasticEnvironment
+from pytorch_lightning.utilities import _DEEPSPEED_AVAILABLE
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.helpers.boring_model import BoringModel
 from tests.helpers.runif import RunIf
@@ -424,9 +425,8 @@ def test_plugin_accelerator_choice(accelerator, plugin):
     [('ddp', DDPPlugin), ('ddp_spawn', DDPSpawnPlugin), ('ddp_sharded', DDPShardedPlugin),
      ('ddp_sharded_spawn', DDPSpawnShardedPlugin), ('deepspeed', DeepSpeedPlugin)],
 )
-@mock.patch('torch.cuda.is_available', return_value=True)
-@mock.patch('torch.cuda.device_count', return_value=2)
-def test_accelerator_choice_multi_node_gpu(mock_available, mock_device_count, accelerator, plugin, tmpdir):
+@RunIf(pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available."), min_gpus=2, skip_windows=True)
+def test_accelerator_choice_multi_node_gpu(accelerator, plugin, tmpdir):
     trainer = Trainer(
         accelerator=accelerator,
         default_root_dir=tmpdir,

From 38d7f8783f694253cc63d203d84d36e9a3311d25 Mon Sep 17 00:00:00 2001
From: Carlos Mocholi <carlossmocholi@gmail.com>
Date: Tue, 2 Mar 2021 13:55:55 +0100
Subject: [PATCH 5/6] Address comment

---
 .../accelerators/test_accelerator_connector.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py
index 949e577f88bb3..31bbb8a0b878d 100644
--- a/tests/accelerators/test_accelerator_connector.py
+++ b/tests/accelerators/test_accelerator_connector.py
@@ -420,12 +420,18 @@ def test_plugin_accelerator_choice(accelerator, plugin):
     assert isinstance(trainer.accelerator.training_type_plugin, DDPShardedPlugin)
 
 
-@pytest.mark.parametrize(
-    ["accelerator", "plugin"],
-    [('ddp', DDPPlugin), ('ddp_spawn', DDPSpawnPlugin), ('ddp_sharded', DDPShardedPlugin),
-     ('ddp_sharded_spawn', DDPSpawnShardedPlugin), ('deepspeed', DeepSpeedPlugin)],
-)
-@RunIf(pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available."), min_gpus=2, skip_windows=True)
+@pytest.mark.parametrize(["accelerator", "plugin"], [
+    ('ddp', DDPPlugin),
+    ('ddp_spawn', DDPSpawnPlugin),
+    ('ddp_sharded', DDPShardedPlugin),
+    ('ddp_sharded_spawn', DDPSpawnShardedPlugin),
+    pytest.param(
+        'deepspeed',
+        DeepSpeedPlugin,
+        marks=pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
+    ),
+])
+@RunIf(min_gpus=2, skip_windows=True)
 def test_accelerator_choice_multi_node_gpu(accelerator, plugin, tmpdir):
     trainer = Trainer(
         accelerator=accelerator,

From 4e6396ab4055e0aa2aa94919dbc894ffb0a1008f Mon Sep 17 00:00:00 2001
From: SeanNaren <sean@grid.ai>
Date: Tue, 2 Mar 2021 13:01:23 +0000
Subject: [PATCH 6/6] Add back mock

---
 tests/accelerators/test_accelerator_connector.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py
index 31bbb8a0b878d..cd2b3041e7673 100644
--- a/tests/accelerators/test_accelerator_connector.py
+++ b/tests/accelerators/test_accelerator_connector.py
@@ -431,8 +431,9 @@ def test_plugin_accelerator_choice(accelerator, plugin):
         marks=pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
     ),
 ])
-@RunIf(min_gpus=2, skip_windows=True)
-def test_accelerator_choice_multi_node_gpu(accelerator, plugin, tmpdir):
+@mock.patch('torch.cuda.is_available', return_value=True)
+@mock.patch('torch.cuda.device_count', return_value=2)
+def test_accelerator_choice_multi_node_gpu(mock_is_available, mock_device_count, accelerator, plugin, tmpdir):
     trainer = Trainer(
         accelerator=accelerator,
         default_root_dir=tmpdir,