From 10b16dbfabcca473c41249ccaaa1afefb3f72f4d Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Tue, 12 May 2020 06:54:23 -0400
Subject: [PATCH] made ddp the default if no backend specified with multiple
 GPUs (#1789)

* made ddp the default if no backend specified with multiple GPUs

* fix

* spawn

Co-authored-by: Jirka <jirka.borovec@seznam.cz>
---
 docs/source/multi_gpu.rst                          | 2 ++
 pytorch_lightning/trainer/distrib_data_parallel.py | 4 ++--
 tests/models/test_gpu.py                           | 1 +
 tests/trainer/test_trainer.py                      | 2 +-
 4 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/docs/source/multi_gpu.rst b/docs/source/multi_gpu.rst
index 9e32b2b0bba96..b7ebcce15687a 100644
--- a/docs/source/multi_gpu.rst
+++ b/docs/source/multi_gpu.rst
@@ -132,6 +132,8 @@ Lightning allows multiple ways of training
 - Horovod (`distributed_backend='horovod'`) (multi-machine, multi-gpu, configured at runtime)
 - TPUs (`num_tpu_cores=8|x`) (tpu or TPU pod)
 
+.. note:: If you request multiple GPUs without setting a mode, ddp will be automatically used.
+
 Data Parallel (dp)
 ^^^^^^^^^^^^^^^^^^
 `DataParallel <https://pytorch.org/docs/stable/nn.html#torch.nn.DataParallel>`_ splits a batch across k GPUs. That is, if you have a batch of 32 and use dp with 2 gpus,
diff --git a/pytorch_lightning/trainer/distrib_data_parallel.py b/pytorch_lightning/trainer/distrib_data_parallel.py
index 4bf0c7ff4d56f..bd97d5ca339b0 100644
--- a/pytorch_lightning/trainer/distrib_data_parallel.py
+++ b/pytorch_lightning/trainer/distrib_data_parallel.py
@@ -203,8 +203,8 @@ def set_distributed_mode(self, distributed_backend):
             elif self.num_gpus > 1:
                 rank_zero_warn('You requested multiple GPUs but did not specify a backend, e.g.'
                                ' Trainer(distributed_backend=dp) (or ddp, ddp2).'
-                               ' Setting distributed_backend=dp for you.')
-                self.use_dp = True
+                               ' Setting distributed_backend=ddp for you.')
+                self.use_ddp = True
         elif distributed_backend == "dp":
             # do nothing if num_gpus == 0
             if self.num_gpus == 1:
diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py
index 6eafc19d863ee..f75b0a1f1a582 100644
--- a/tests/models/test_gpu.py
+++ b/tests/models/test_gpu.py
@@ -130,6 +130,7 @@ def assert_pred_same():
     trainer.fit(model)
 
 
+@pytest.mark.spawn
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
 def test_multi_gpu_none_backend(tmpdir):
     """Make sure when using multiple GPUs the user can't use `distributed_backend = None`."""
diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py
index 40783b1ad84e2..e6cdc65338f2c 100644
--- a/tests/trainer/test_trainer.py
+++ b/tests/trainer/test_trainer.py
@@ -712,7 +712,7 @@ def test_gpu_choice(tmpdir):
     ),
     pytest.param(
         dict(distributed_backend=None, gpus=2),
-        dict(use_dp=True, use_ddp=False, use_ddp2=False, num_gpus=2, on_gpu=True, single_gpu=False, num_processes=1),
+        dict(use_dp=False, use_ddp=True, use_ddp2=False, num_gpus=2, on_gpu=True, single_gpu=False, num_processes=1),
         marks=[pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Multiple GPUs needed")]
     ),
     pytest.param(