From 4d2b081968f57b2c0dfb2b70a5a24d5c2975e1ec Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Tue, 14 Jul 2020 21:03:20 -0400
Subject: [PATCH 001/168] r

---
 tests/base/model_valid_epoch_ends.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/base/model_valid_epoch_ends.py b/tests/base/model_valid_epoch_ends.py
index 5170527397548..a7295aa9caef0 100644
--- a/tests/base/model_valid_epoch_ends.py
+++ b/tests/base/model_valid_epoch_ends.py
@@ -35,7 +35,6 @@ def validation_epoch_end__multiple_dataloaders(self, outputs):
         Args:
             outputs: list of individual outputs of each validation step
         """
-
         # if returned a scalar from validation_step, outputs is a list of tensor scalars
         # we return just the average in this case (if we want)
         def _mean(res, key):

From 4513eb33ebfba692248691006d1da4fb77ee25f1 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Wed, 15 Jul 2020 07:19:24 -0400
Subject: [PATCH 002/168] r

---
 pytorch_lightning/core/step_result.py | 177 ++++++++++++++++++++++++++
 1 file changed, 177 insertions(+)
 create mode 100644 pytorch_lightning/core/step_result.py

diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
new file mode 100644
index 0000000000000..ff347974b25e5
--- /dev/null
+++ b/pytorch_lightning/core/step_result.py
@@ -0,0 +1,177 @@
+from typing import Optional, Dict
+from torch import Tensor
+import torch
+
+
+class Result(Dict):
+
+    def __init__(
+            self,
+            minimize: Optional[Tensor] = None,
+            early_stop_on: Tensor = None,
+            checkpoint_on: Tensor = None,
+            hiddens: Optional[Tensor] = None
+    ):
+
+        super().__init__()
+
+        self.early_stop_on = early_stop_on
+        self.checkpoint_on = checkpoint_on
+
+        self.hiddens = hiddens
+        self.minimize = minimize
+
+    def log(
+            self,
+            name,
+            value,
+            prog_bar=False,
+            logger=True,
+            reduce_on_batch_end=False,
+            reduce_on_epoch_end=True,
+            reduce_fx=torch.mean
+    ):
+        if 'meta' not in self:
+            self.__setitem__('meta', {})
+        self.__set_meta(name, value, prog_bar, logger, reduce_on_batch_end, reduce_on_epoch_end, reduce_fx)
+
+        # set the value
+        self.__setitem__(name, value)
+
+    def __set_meta(self, name, value, prog_bar, logger, reduce_on_batch_end, reduce_on_epoch_end, reduce_fx):
+        # set the meta for the item
+        meta_value = value
+        if isinstance(meta_value, torch.Tensor):
+            meta_value = meta_value.detach()
+        meta = dict(
+            prog_bar=prog_bar,
+            logger=logger,
+            reduce_on_batch_end=reduce_on_batch_end,
+            reduce_on_epoch_end=reduce_on_epoch_end,
+            reduce_fx=reduce_fx,
+            value=meta_value
+        )
+        self['meta'][name] = meta
+
+    @property
+    def hiddens(self):
+        return self._hiddens
+
+    @hiddens.setter
+    def hiddens(self, x):
+        if x is not None:
+            assert isinstance(x, Tensor), 'hiddens must be a torch.Tensor'
+            self._hiddens = x
+            self.__setitem__('hiddens', x)
+
+    @property
+    def checkpoint_on(self):
+        # use minimize as default if no checkpoint_on is passed
+        if 'checkpoint_on' not in self:
+            minimize = self.__getitem__('minimize')
+            self.__setitem__('checkpoint_on', minimize)
+
+        return self.__getitem__('checkpoint_on')
+
+    @checkpoint_on.setter
+    def checkpoint_on(self, x):
+        if x is not None:
+            assert isinstance(x, Tensor), 'checkpoint_on must be a torch.Tensor'
+            self.__setitem__('checkpoint_on', x.detach())
+
+    @property
+    def early_stop_on(self):
+        # use minimize as default if no checkpoint_on is passed
+        if 'early_stop_on' not in self:
+            minimize = self.__getitem__('minimize')
+            self.__setitem__('early_stop_on', minimize)
+
+        return self.__getitem__('early_stop_on')
+
+    @early_stop_on.setter
+    def early_stop_on(self, x):
+        if x is not None:
+            assert isinstance(x, Tensor), 'early_stop_on must be a torch.Tensor'
+            self.__setitem__('early_stop_on', x.detach())
+
+    @property
+    def minimize(self):
+        return self.__getitem__('minimize')
+
+    @minimize.setter
+    def minimize(self, x):
+        if x is not None:
+            assert isinstance(x, Tensor), 'metric to minimize must be a torch.Tensor'
+            m = 'the metric to minimize must have a computational graph. Minimize ' \
+                'can only be used in training_end, training_step_end, training_epoch_end'
+            assert x.grad_fn is not None, m
+            self.__setitem__('minimize', x)
+
+    def __repr__(self):
+        copy = self.copy()
+        del copy['meta']
+
+        return str(copy)
+
+    def __str__(self):
+        copy = self.copy()
+        del copy['meta']
+
+        return str(copy)
+
+
+class TrainResult(Result):
+
+    def __init__(
+            self,
+            minimize: Optional[Tensor] = None,
+            early_stop_on: Tensor = None,
+            checkpoint_on: Tensor = None,
+            hiddens: Optional[Tensor] = None
+    ):
+
+        super().__init__(minimize, early_stop_on, checkpoint_on, hiddens)
+
+    def log(
+            self,
+            name,
+            value,
+            prog_bar=False,
+            logger=True,
+            reduce_on_batch_end=True,
+            reduce_on_epoch_end=False,
+            reduce_fx=torch.mean
+    ):
+        super().log(name, value, prog_bar, logger, reduce_on_batch_end, reduce_on_epoch_end, reduce_fx)
+
+
+class EvalResult(Result):
+
+    def __init__(
+            self,
+            early_stop_on: Tensor = None,
+            checkpoint_on: Tensor = None,
+            hiddens: Optional[Tensor] = None
+    ):
+
+        super().__init__(None, early_stop_on, checkpoint_on, hiddens)
+
+    def log(
+            self,
+            name,
+            value,
+            prog_bar=False,
+            logger=True,
+            reduce_on_batch_end=False,
+            reduce_on_epoch_end=True,
+            reduce_fx=torch.mean
+    ):
+        super().log(name, value, prog_bar, logger, reduce_on_batch_end, reduce_on_epoch_end, reduce_fx)
+
+
+if __name__ == '__main__':
+    import torch
+    result = EvalResult()
+    result.log('some', 123)
+    print(result)
+    result.minimize = torch.tensor(1)
\ No newline at end of file

From 5cc01ffc36dc011389af2c6e17232e46a81131a8 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Wed, 15 Jul 2020 12:26:04 -0400
Subject: [PATCH 003/168] r

---
 pytorch_lightning/__init__.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pytorch_lightning/__init__.py b/pytorch_lightning/__init__.py
index 1413d8d62cc27..09783e6d18382 100644
--- a/pytorch_lightning/__init__.py
+++ b/pytorch_lightning/__init__.py
@@ -55,6 +55,7 @@
     from pytorch_lightning.trainer import Trainer
     from pytorch_lightning.utilities.seed import seed_everything
     from pytorch_lightning import metrics
+    from pytorch_lightning.core.step_result import TrainResult, EvalResult
 
     __all__ = [
         'Trainer',
@@ -62,7 +63,9 @@
         'Callback',
         'data_loader',
         'seed_everything',
-        'metrics'
+        'metrics',
+        'EvalResult',
+        'TrainResult'
     ]
 
     # necessary for regular bolts imports. Skip exception since bolts is not always installed

From c747f802b138f0fbf95e0706d3cef5557610f38b Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Wed, 15 Jul 2020 13:12:28 -0400
Subject: [PATCH 004/168] patched optimizer closure with sr

---
 pytorch_lightning/core/step_result.py      | 14 +++++++-
 pytorch_lightning/trainer/training_loop.py | 42 +++++++++++++++-------
 tests/base/deterministic_model.py          | 12 +++++++
 3 files changed, 55 insertions(+), 13 deletions(-)

diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
index ff347974b25e5..6e4f4552c5a23 100644
--- a/pytorch_lightning/core/step_result.py
+++ b/pytorch_lightning/core/step_result.py
@@ -1,6 +1,7 @@
 from typing import Optional, Dict
 from torch import Tensor
 import torch
+from copy import copy
 
 
 class Result(Dict):
@@ -18,7 +19,7 @@ def __init__(
         self.early_stop_on = early_stop_on
         self.checkpoint_on = checkpoint_on
 
-        self.hiddens = hiddens
+        self._hiddens = hiddens
         self.minimize = minimize
 
     def log(
@@ -107,6 +108,11 @@ def minimize(self, x):
             assert x.grad_fn is not None, m
             self.__setitem__('minimize', x)
 
+    def detach(self):
+        for k, v in self.items():
+            if isinstance(v, torch.Tensor) and v.grad_fn is not None:
+                self.__setitem__(k, v.detach())
+
     def __repr__(self):
         copy = self.copy()
         del copy['meta']
@@ -119,6 +125,12 @@ def __str__(self):
 
         return str(copy)
 
+    def __copy__(self):
+        newone = type(self)()
+        for k, v in self.items():
+            newone[k] = copy(v)
+        return newone
+
 
 class TrainResult(Result):
 
diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py
index fa493f2e1b09a..dc069aad1b553 100644
--- a/pytorch_lightning/trainer/training_loop.py
+++ b/pytorch_lightning/trainer/training_loop.py
@@ -153,6 +153,7 @@ def training_step(self, batch, batch_idx):
 import torch
 from torch.utils.data import DataLoader
 import torch.distributed as torch_distrib
+from copy import copy
 
 from pytorch_lightning import _logger as log
 from pytorch_lightning.callbacks.base import Callback
@@ -164,6 +165,7 @@ def training_step(self, batch, batch_idx):
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.parsing import AttributeDict
 from pytorch_lightning.utilities.memory import recursive_detach
+from pytorch_lightning.core.step_result import EvalResult, TrainResult, Result
 
 try:
     from apex import amp
@@ -780,26 +782,38 @@ def optimizer_closure(self, split_batch, batch_idx, opt_idx, optimizer, hiddens)
             # ----------------------------
             # format and reduce outputs accordingly
             training_step_output_for_epoch_end = training_step_output
-            training_step_output = self.process_output(training_step_output, train=True)
-
-            # TODO: temporary part of structured results PR
-            training_step_output = AttributeDict(
-                batch_loss=training_step_output[0],
-                pbar_on_batch_end=training_step_output[1],
-                log_metrics=training_step_output[2],
-                callback_metrics=training_step_output[3],
-                hiddens=training_step_output[4],
-            )
+            is_result_obj = isinstance(training_step_output, Result)
+
+            # don't allow EvalResult in the training_step
+            if isinstance(training_step_output, EvalResult):
+                raise MisconfigurationException('training_step cannot return EvalResult, '
+                                                'use a dict or TrainResult instead')
+
+            # handle regular dicts
+            if not is_result_obj:
+                training_step_output = self.process_output(training_step_output, train=True)
+
+                training_step_output = AttributeDict(
+                    batch_loss=training_step_output[0],
+                    pbar_on_batch_end=training_step_output[1],
+                    log_metrics=training_step_output[2],
+                    callback_metrics=training_step_output[3],
+                    hiddens=training_step_output[4],
+                )
 
             # if the user decides to finally reduce things in epoch_end, save raw output without graphs
             if isinstance(training_step_output_for_epoch_end, torch.Tensor):
                 training_step_output_for_epoch_end = training_step_output_for_epoch_end.detach()
+            elif is_result_obj:
+                training_step_output_for_epoch_end = copy(training_step_output)
+                training_step_output_for_epoch_end.detach()
             else:
                 training_step_output_for_epoch_end = recursive_detach(training_step_output_for_epoch_end)
 
         # accumulate loss
         # (if accumulate_grad_batches = 1 no effect)
-        closure_loss = training_step_output.batch_loss / self.accumulate_grad_batches
+        closure_loss = training_step_output.minimize if is_result_obj else training_step_output.batch_loss
+        closure_loss = closure_loss / self.accumulate_grad_batches
 
         # the loss will get scaled for amp. avoid any modifications to it
         untouched_loss = closure_loss.detach().clone()
@@ -829,7 +843,11 @@ def optimizer_closure(self, split_batch, batch_idx, opt_idx, optimizer, hiddens)
 
             # once backward has been applied, release graph
             closure_loss = closure_loss.detach()
-            training_step_output.batch_loss = training_step_output.batch_loss.detach()
+
+            if is_result_obj:
+                training_step_output.detach()
+            else:
+                training_step_output.batch_loss = training_step_output.batch_loss.detach()
 
         if self.use_horovod:
             # Synchronize Horovod to ensure gradient manipulations (e.g., loss scaling) are valid
diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index a4988673c60a4..9315ef625a5ff 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -2,6 +2,7 @@
 import torch
 from torch import nn
 from torch.utils.data import Dataset, DataLoader
+from pytorch_lightning import TrainResult
 
 from pytorch_lightning.core.lightning import LightningModule
 
@@ -97,6 +98,17 @@ def training_epoch_end_scalar(self, outputs):
         prototype_loss = outputs[0]
         return prototype_loss
 
+    # --------------------------
+    # Result returns
+    # --------------------------
+    def training_step_result_return(self, batch, batch_idx):
+        acc = self.step(batch, batch_idx)
+
+        result = TrainResult(minimize=acc)
+        result.log('log_acc1', torch.tensor(12).type_as(acc), reduce_on_epoch_end=True)
+
+        return result
+
     # --------------------------
     # dictionary returns
     # --------------------------

From ed9b4f8d1341c4795b98aa2f398548977265c446 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Wed, 15 Jul 2020 13:47:51 -0400
Subject: [PATCH 005/168] patched optimizer closure with sr

---
 pytorch_lightning/core/step_result.py      | 35 ++++++++++++++++++++++
 pytorch_lightning/trainer/training_loop.py | 20 ++++++++++---
 2 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
index 6e4f4552c5a23..bd2265f49cb6a 100644
--- a/pytorch_lightning/core/step_result.py
+++ b/pytorch_lightning/core/step_result.py
@@ -108,6 +108,41 @@ def minimize(self, x):
             assert x.grad_fn is not None, m
             self.__setitem__('minimize', x)
 
+    @property
+    def callback_metrics(self):
+        result = {
+            'early_stop_on': self.early_stop_on,
+            'checkpoint_on': self.checkpoint_on
+        }
+
+        return result
+
+    @property
+    def batch_log_metrics(self):
+        """
+        Gets the metrics to log at the end of the batch step
+        """
+        result = {}
+
+        meta = self['meta']
+        for k, options in meta.items():
+            if options['logger']:
+                result[k] = options['value']
+        return result
+
+    @property
+    def batch_pbar_metrics(self):
+        """
+        Gets the metrics to log at the end of the batch step
+        """
+        result = {}
+
+        meta = self['meta']
+        for k, options in meta.items():
+            if options['prog_bar']:
+                result[k] = options['value']
+        return result
+
     def detach(self):
         for k, v in self.items():
             if isinstance(v, torch.Tensor) and v.grad_fn is not None:
diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py
index dc069aad1b553..a061c55fb69a6 100644
--- a/pytorch_lightning/trainer/training_loop.py
+++ b/pytorch_lightning/trainer/training_loop.py
@@ -624,7 +624,7 @@ def run_training_batch(self, batch, batch_idx):
                             param.requires_grad = True
 
                 # -------------------
-                # calculate loss
+                # calculate loss (train step + train step end)
                 # -------------------
                 opt_closure_result = self.optimizer_closure(
                     split_batch,
@@ -633,14 +633,26 @@ def run_training_batch(self, batch, batch_idx):
                     optimizer,
                     self.hiddens
                 )
+                is_result_obj = isinstance(opt_closure_result.training_step_output, Result)
 
                 # ------------------------------
                 # POST forward bookkeeping
                 # ------------------------------
                 batch_callback_metrics.append(opt_closure_result.training_step_output.callback_metrics)
-                batch_log_metrics.append(opt_closure_result.training_step_output.log_metrics)
 
-                self.add_progress_bar_metrics(opt_closure_result.training_step_output.pbar_on_batch_end)
+                # add metrics to loggers
+                if is_result_obj:
+                    metrics_to_log = opt_closure_result.training_step_output.batch_log_metrics
+                else:
+                    metrics_to_log = opt_closure_result.training_step_output.log_metrics
+                batch_log_metrics.append(metrics_to_log)
+
+                # add metrics to progress bar
+                if is_result_obj:
+                    metrics_for_pbar = opt_closure_result.training_step_output.batch_pbar_metrics
+                else:
+                    metrics_for_pbar = opt_closure_result.training_step_output.pbar_on_batch_end
+                self.add_progress_bar_metrics(metrics_for_pbar)
 
                 # track hiddens
                 self.hiddens = opt_closure_result.hiddens
@@ -766,7 +778,7 @@ def optimizer_closure(self, split_batch, batch_idx, opt_idx, optimizer, hiddens)
         wrap the forward step in a closure so second order methods work
         """
         # ---------------------------
-        # FORWARD
+        # FORWARD (TRAINING STEP + TRAIN STEP END)
         # ---------------------------
         with self.profiler.profile('model_forward'):
             if self.use_amp and NATIVE_AMP_AVALAIBLE and not self.use_tpu:

From 3f98d18e7258d9951118677350da96548510d3f0 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Wed, 15 Jul 2020 13:49:20 -0400
Subject: [PATCH 006/168] patched optimizer closure with sr

---
 .../test_trainer_steps_result_return.py       | 49 +++++++++++++++++++
 1 file changed, 49 insertions(+)
 create mode 100644 tests/trainer/test_trainer_steps_result_return.py

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
new file mode 100644
index 0000000000000..f610c64e1cef9
--- /dev/null
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -0,0 +1,49 @@
+"""
+Tests to ensure that the training loop works with a dict
+"""
+from pytorch_lightning import Trainer
+from tests.base.deterministic_model import DeterministicModel
+
+
+def test_training_step_result(tmpdir):
+    """
+    Tests that only training_step can be used
+    """
+    model = DeterministicModel()
+    model.training_step = model.training_step_result_return
+    model.val_dataloader = None
+
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        fast_dev_run=True,
+        weights_summary=None,
+    )
+    trainer.fit(model)
+
+    # make sure correct steps were called
+    assert model.training_step_called
+    assert not model.training_step_end_called
+    assert not model.training_epoch_end_called
+
+    # make sure training outputs what is expected
+    for batch_idx, batch in enumerate(model.train_dataloader()):
+        break
+
+    out = trainer.run_training_batch(batch, batch_idx)
+    assert out.signal == 0
+    assert out.batch_log_metrics['log_acc1'] == 12.0
+    assert out.batch_log_metrics['log_acc2'] == 7.0
+
+    train_step_out = out.training_step_output_for_epoch_end
+    pbar_metrics = train_step_out['progress_bar']
+    assert 'log' in train_step_out
+    assert 'progress_bar' in train_step_out
+    assert train_step_out['train_step_test'] == 549
+    assert pbar_metrics['pbar_acc1'] == 17.0
+    assert pbar_metrics['pbar_acc2'] == 19.0
+
+    # make sure the optimizer closure returns the correct things
+    opt_closure_result = trainer.optimizer_closure(batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens)
+    assert opt_closure_result['loss'] == (42.0 * 3) + (15.0 * 3)
+
+test_training_step_result('')
\ No newline at end of file

From 8352a56bab9c0b827864ce9e1c26b058b9395d92 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Wed, 15 Jul 2020 14:11:01 -0400
Subject: [PATCH 007/168] added train step structured result

---
 pytorch_lightning/trainer/logging.py          |  4 ++
 pytorch_lightning/trainer/trainer.py          |  3 ++
 tests/base/deterministic_model.py             |  5 ++-
 .../test_trainer_steps_result_return.py       | 37 ++++++++++++++-----
 4 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/pytorch_lightning/trainer/logging.py b/pytorch_lightning/trainer/logging.py
index 35f5d5d35b9ca..17b48aeface66 100644
--- a/pytorch_lightning/trainer/logging.py
+++ b/pytorch_lightning/trainer/logging.py
@@ -1,3 +1,4 @@
+import os
 from abc import ABC
 from typing import Union, Iterable
 
@@ -73,6 +74,9 @@ def log_metrics(self, metrics, grad_norm_dic, step=None):
             self.logger.agg_and_log_metrics(scalar_metrics, step=step)
             self.logger.save()
 
+            if 'PL_DEV_DEBUG' in os.environ:
+                self.debug_logged_metrics.append(scalar_metrics)
+
     def add_progress_bar_metrics(self, metrics):
         for k, v in metrics.items():
             if isinstance(v, torch.Tensor):
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index 1f611ab7ac57c..b626280f17b5e 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -616,6 +616,9 @@ def __init__(
 
         self.on_colab_kaggle = os.getenv('COLAB_GPU') or os.getenv('KAGGLE_URL_BASE')
 
+        # for debugging purposes only, track the logged metrics
+        self.debug_logged_metrics = []
+
         # Callback system
         self.on_init_end()
 
diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index 9315ef625a5ff..e55d2dff33385 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -105,8 +105,11 @@ def training_step_result_return(self, batch, batch_idx):
         acc = self.step(batch, batch_idx)
 
         result = TrainResult(minimize=acc)
-        result.log('log_acc1', torch.tensor(12).type_as(acc), reduce_on_epoch_end=True)
+        result.log('log_and_pbar_acc1', torch.tensor(12).type_as(acc), reduce_on_epoch_end=True, prog_bar=True)
+        result.log('log_acc2', torch.tensor(7).type_as(acc), reduce_on_epoch_end=True)
+        result.log('pbar_acc3', torch.tensor(17).type_as(acc), reduce_on_epoch_end=True, logger=False, prog_bar=True)
 
+        self.training_step_called = True
         return result
 
     # --------------------------
diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index f610c64e1cef9..891a4a4311747 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -1,14 +1,19 @@
 """
 Tests to ensure that the training loop works with a dict
 """
+import os
 from pytorch_lightning import Trainer
 from tests.base.deterministic_model import DeterministicModel
+from pytorch_lightning.core.step_result import Result, TrainResult, EvalResult
 
 
 def test_training_step_result(tmpdir):
     """
     Tests that only training_step can be used
     """
+    # enable internal debugging actions
+    os.environ['PL_DEV_DEBUG'] = '1'
+
     model = DeterministicModel()
     model.training_step = model.training_step_result_return
     model.val_dataloader = None
@@ -31,19 +36,33 @@ def test_training_step_result(tmpdir):
 
     out = trainer.run_training_batch(batch, batch_idx)
     assert out.signal == 0
-    assert out.batch_log_metrics['log_acc1'] == 12.0
+    assert out.batch_log_metrics['log_and_pbar_acc1'] == 12.0
     assert out.batch_log_metrics['log_acc2'] == 7.0
 
     train_step_out = out.training_step_output_for_epoch_end
-    pbar_metrics = train_step_out['progress_bar']
-    assert 'log' in train_step_out
-    assert 'progress_bar' in train_step_out
-    assert train_step_out['train_step_test'] == 549
-    assert pbar_metrics['pbar_acc1'] == 17.0
-    assert pbar_metrics['pbar_acc2'] == 19.0
+    assert isinstance(train_step_out, TrainResult)
+
+    assert 'minimize' in train_step_out
+    assert 'log_and_pbar_acc1' in train_step_out
+    assert 'log_acc2' in train_step_out
+
+    # make sure we are using the correct metrics for callbacks
+    assert trainer.callback_metrics['early_stop_on'] == 171
+    assert trainer.callback_metrics['checkpoint_on'] == 171
+
+    # make sure pbar metrics are correct
+    assert trainer.progress_bar_metrics['log_and_pbar_acc1'] == 12
+    assert trainer.progress_bar_metrics['pbar_acc3'] == 17
+    assert 'log_acc2' not in trainer.progress_bar_metrics
+
+    # make sure correct metrics are logged
+    assert len(trainer.debug_logged_metrics) == 1
+    logged_metrics = trainer.debug_logged_metrics[0]
+    assert logged_metrics['log_and_pbar_acc1'] == 12.0
+    assert logged_metrics['log_acc2'] == 7.0
+    assert 'pbar_acc3' not in logged_metrics
+    assert len(logged_metrics) == 3
 
     # make sure the optimizer closure returns the correct things
     opt_closure_result = trainer.optimizer_closure(batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens)
     assert opt_closure_result['loss'] == (42.0 * 3) + (15.0 * 3)
-
-test_training_step_result('')
\ No newline at end of file

From 7d453d4f3234e7144bffbdf18bb11d6fa37b9c01 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Wed, 15 Jul 2020 14:11:31 -0400
Subject: [PATCH 008/168] added train step structured result

---
 tests/trainer/test_trainer_steps_result_return.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 891a4a4311747..961f5adfc17d0 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -9,7 +9,8 @@
 
 def test_training_step_result(tmpdir):
     """
-    Tests that only training_step can be used
+    Tests that only training_step can be used with TrainResult
+    Makes sure that things are routed to pbar, loggers and loss accordingly
     """
     # enable internal debugging actions
     os.environ['PL_DEV_DEBUG'] = '1'

From 23403ce18430b1781b8feb80e4332beb26dc7e4a Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Wed, 15 Jul 2020 14:12:31 -0400
Subject: [PATCH 009/168] added train step structured result

---
 tests/trainer/test_trainer_steps_result_return.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 961f5adfc17d0..ada6ffe1916fb 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -67,3 +67,11 @@ def test_training_step_result(tmpdir):
     # make sure the optimizer closure returns the correct things
     opt_closure_result = trainer.optimizer_closure(batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens)
     assert opt_closure_result['loss'] == (42.0 * 3) + (15.0 * 3)
+
+
+def test_training_step_epoch_end_result(tmpdir):
+    """
+    Makes sure training_step and epoch_end can be used with Results (without batch_end)
+    """
+    # TODO: implement
+    pass

From 9bc77ac10fbad6a15c24a2447c08f9908b0a74cb Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Wed, 15 Jul 2020 14:27:08 -0400
Subject: [PATCH 010/168] added train step structured result

---
 tests/trainer/test_trainer_steps_result_return.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index ada6ffe1916fb..9db8fae8893d7 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -7,6 +7,12 @@
 from pytorch_lightning.core.step_result import Result, TrainResult, EvalResult
 
 
+# TODOs:
+# make checkpoint and early stopping use the correct metrics
+# make sure step_ends receive a plain dict
+# same for epoch_end
+# make sure to auto-reduce when no epoch_end is implemented
+
 def test_training_step_result(tmpdir):
     """
     Tests that only training_step can be used with TrainResult

From 9cdaf8fb396e5133bea13c4503d9ac16bc09f5b4 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Thu, 16 Jul 2020 06:54:15 -0400
Subject: [PATCH 011/168] added train step structured result

---
 pytorch_lightning/core/step_result.py         | 54 +++++++++++++++++--
 pytorch_lightning/trainer/training_loop.py    |  4 ++
 tests/base/deterministic_model.py             | 19 +++++++
 .../test_trainer_steps_result_return.py       | 25 ++++++++-
 4 files changed, 97 insertions(+), 5 deletions(-)

diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
index bd2265f49cb6a..5847cce9cb184 100644
--- a/pytorch_lightning/core/step_result.py
+++ b/pytorch_lightning/core/step_result.py
@@ -22,6 +22,15 @@ def __init__(
         self._hiddens = hiddens
         self.minimize = minimize
 
+    def __getattr__(self, key):
+        try:
+            return self[key]
+        except KeyError:
+            raise AttributeError(f'Missing attribute "{key}"')
+
+    def __setattr__(self, key, val):
+        self[key] = val
+
     def log(
             self,
             name,
@@ -149,10 +158,12 @@ def detach(self):
                 self.__setitem__(k, v.detach())
 
     def __repr__(self):
-        copy = self.copy()
-        del copy['meta']
+        self_copy = self.copy()
 
-        return str(copy)
+        if 'meta' in self_copy:
+            del self_copy['meta']
+
+        return str(self_copy)
 
     def __str__(self):
         copy = self.copy()
@@ -166,6 +177,42 @@ def __copy__(self):
             newone[k] = copy(v)
         return newone
 
+    @classmethod
+    def gather(cls, outputs):
+        meta = outputs[0]['meta']
+        result = Result()
+        result = recursive_gather(outputs, result)
+        recursive_stack(result)
+        result['meta'] = meta
+        return result
+
+
+def recursive_gather(outputs, result=None):
+    for out in outputs:
+        if 'meta' in out:
+            del out['meta']
+
+        for k, v in out.items():
+            if isinstance(v, dict):
+                v = recursive_gather([v], result)
+
+            if k not in result:
+                result[k] = []
+
+            result[k].append(v)
+
+    return result
+
+
+def recursive_stack(result):
+    for k, v in result.items():
+        if isinstance(v, dict):
+            recursive_stack(v)
+
+        if isinstance(v, list) and len(v) > 0 and isinstance(v[0], torch.Tensor):
+            v = torch.stack(v)
+            result[k] = v
+
 
 class TrainResult(Result):
 
@@ -219,6 +266,7 @@ def log(
 if __name__ == '__main__':
     import torch
     result = EvalResult()
+    result.minimize = 2
     result.log('some', 123)
     print(result)
     result.minimize = torch.tensor(1)
\ No newline at end of file
diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py
index a061c55fb69a6..58540af8d1d85 100644
--- a/pytorch_lightning/trainer/training_loop.py
+++ b/pytorch_lightning/trainer/training_loop.py
@@ -531,6 +531,10 @@ def run_training_epoch_end(self, epoch_output):
         model = self.get_model()
         if self.is_overridden('training_epoch_end', model=model):
             self.global_step += 1
+
+            if isinstance(epoch_output[0], Result):
+                epoch_output = Result.gather(epoch_output)
+
             epoch_output = model.training_epoch_end(epoch_output)
             _processed_outputs = self.process_output(epoch_output)
             log_epoch_metrics = _processed_outputs[2]
diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index e55d2dff33385..b46f18e7b9c4d 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -112,6 +112,25 @@ def training_step_result_return(self, batch, batch_idx):
         self.training_step_called = True
         return result
 
+    def training_epoch_end_return(self, outputs):
+        """
+        There should be an array of scalars without graphs that are all 171 (4 of them)
+        """
+        self.training_epoch_end_called = True
+
+        if self.use_dp or self.use_ddp2:
+            pass
+        else:
+            # only saw 4 batches
+            assert len(outputs) == 4
+            for batch_out in outputs:
+                assert batch_out == 171
+                assert batch_out.grad_fn is None
+                assert isinstance(batch_out, torch.Tensor)
+
+        prototype_loss = outputs[0]
+        return prototype_loss
+
     # --------------------------
     # dictionary returns
     # --------------------------
diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 9db8fae8893d7..b532ed7729e7c 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -74,10 +74,31 @@ def test_training_step_result(tmpdir):
     opt_closure_result = trainer.optimizer_closure(batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens)
     assert opt_closure_result['loss'] == (42.0 * 3) + (15.0 * 3)
 
+    # TODO: test that it gets reduced on epoch end
+    # TODO: test that on batch end gets reduced
+
 
 def test_training_step_epoch_end_result(tmpdir):
     """
     Makes sure training_step and epoch_end can be used with Results (without batch_end)
     """
-    # TODO: implement
-    pass
+    os.environ['PL_DEV_DEBUG'] = '1'
+
+    model = DeterministicModel()
+    model.training_step = model.training_step_result_return
+    model.training_epoch_end = model.training_epoch_end_return
+    model.val_dataloader = None
+
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_epochs=1,
+        weights_summary=None,
+    )
+    trainer.fit(model)
+
+    # make sure correct steps were called
+    assert model.training_step_called
+    assert not model.training_step_end_called
+    assert model.training_epoch_end_called
+
+test_training_step_epoch_end_result('')
\ No newline at end of file

From 9309b9e1cd115941940f62323e1f5ad9d9d99395 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Thu, 16 Jul 2020 07:19:36 -0400
Subject: [PATCH 012/168] added train step structured result

---
 pytorch_lightning/core/step_result.py | 104 +++++++++++---------------
 1 file changed, 43 insertions(+), 61 deletions(-)

diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
index 5847cce9cb184..3178f4c9f86ed 100644
--- a/pytorch_lightning/core/step_result.py
+++ b/pytorch_lightning/core/step_result.py
@@ -19,18 +19,57 @@ def __init__(
         self.early_stop_on = early_stop_on
         self.checkpoint_on = checkpoint_on
 
-        self._hiddens = hiddens
+        self.hiddens = hiddens
         self.minimize = minimize
 
+        if minimize is not None and early_stop_on is None:
+            self.early_stop_on = minimize.detach()
+        if minimize is not None and checkpoint_on is None:
+            self.checkpoint_on = minimize.detach()
+
     def __getattr__(self, key):
         try:
-            return self[key]
+            if key == 'callback_metrics':
+                return self.callback_metrics()
+            elif key == 'batch_log_metrics':
+                return self.batch_log_metrics()
+            elif key == 'batch_pbar_metrics':
+                return self.batch_pbar_metrics()
+            else:
+                return self[key]
         except KeyError:
             raise AttributeError(f'Missing attribute "{key}"')
 
     def __setattr__(self, key, val):
+        # ensure reserve keys are tensors and detached
+        if key in {'hiddens', 'checkpoint_on', 'early_stop_on'}:
+            self._assert_tensor_metric(key, val)
+            val = val.detach()
+
+        # ensure minimize is a tensor and has grads
+        elif key == 'minimize':
+            err = 'Minimize can only be used in training_end, training_step_end, training_epoch_end'
+            self._assert_grad_tensor_metric(key, val, err)
+
+        # ensure anything else that is a tensor is detached
+        elif isinstance(val, torch.Tensor):
+            val = val.detach()
+
         self[key] = val
 
+    def _assert_tensor_metric(self, name, x):
+        if x is not None:
+            assert isinstance(x, Tensor), f'{name} must be a torch.Tensor'
+
+    def _assert_grad_tensor_metric(self, name, x, additional_err: str = None):
+        if x is not None:
+            assert isinstance(x, Tensor), f'{name} must be a torch.Tensor'
+            m = f'{name} must have a computational graph.'
+
+            if additional_err:
+                m += f' {additional_err}'
+            assert x.grad_fn is not None, m
+
     def log(
             self,
             name,
@@ -63,61 +102,6 @@ def __set_meta(self, name, value, prog_bar, logger, reduce_on_batch_end, reduce_
         )
         self['meta'][name] = meta
 
-    @property
-    def hiddens(self):
-        return self._hiddens
-
-    @hiddens.setter
-    def hiddens(self, x):
-        if x is not None:
-            assert isinstance(x, Tensor), 'hiddens must be a torch.Tensor'
-            self._hiddens = x
-            self.__setitem__('hiddens', x)
-
-    @property
-    def checkpoint_on(self):
-        # use minimize as default if no checkpoint_on is passed
-        if 'checkpoint_on' not in self:
-            minimize = self.__getitem__('minimize')
-            self.__setitem__('checkpoint_on', minimize)
-
-        return self.__getitem__('checkpoint_on')
-
-    @checkpoint_on.setter
-    def checkpoint_on(self, x):
-        if x is not None:
-            assert isinstance(x, Tensor), 'checkpoint_on must be a torch.Tensor'
-            self.__setitem__('checkpoint_on', x.detach())
-
-    @property
-    def early_stop_on(self):
-        # use minimize as default if no checkpoint_on is passed
-        if 'early_stop_on' not in self:
-            minimize = self.__getitem__('minimize')
-            self.__setitem__('early_stop_on', minimize)
-
-        return self.__getitem__('early_stop_on')
-
-    @early_stop_on.setter
-    def early_stop_on(self, x):
-        if x is not None:
-            assert isinstance(x, Tensor), 'early_stop_on must be a torch.Tensor'
-            self.__setitem__('early_stop_on', x.detach())
-
-    @property
-    def minimize(self):
-        return self.__getitem__('minimize')
-
-    @minimize.setter
-    def minimize(self, x):
-        if x is not None:
-            assert isinstance(x, Tensor), 'metric to minimize must be a torch.Tensor'
-            m = 'the metric to minimize must have a computational graph. Minimize ' \
-                'can only be used in training_end, training_step_end, training_epoch_end'
-            assert x.grad_fn is not None, m
-            self.__setitem__('minimize', x)
-
-    @property
     def callback_metrics(self):
         result = {
             'early_stop_on': self.early_stop_on,
@@ -126,7 +110,6 @@ def callback_metrics(self):
 
         return result
 
-    @property
     def batch_log_metrics(self):
         """
         Gets the metrics to log at the end of the batch step
@@ -139,7 +122,6 @@ def batch_log_metrics(self):
                 result[k] = options['value']
         return result
 
-    @property
     def batch_pbar_metrics(self):
         """
         Gets the metrics to log at the end of the batch step
@@ -265,8 +247,8 @@ def log(
 
 if __name__ == '__main__':
     import torch
-    result = EvalResult()
-    result.minimize = 2
+    result = TrainResult()
+    result.hiddens = torch.tensor(1)
     result.log('some', 123)
     print(result)
     result.minimize = torch.tensor(1)
\ No newline at end of file

From 824113054835b3e0c865103301da700a8e56abd6 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Thu, 16 Jul 2020 07:29:04 -0400
Subject: [PATCH 013/168] added train step structured result

---
 pytorch_lightning/core/step_result.py | 30 +++++++++++++++------------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
index 3178f4c9f86ed..ec8e8459224de 100644
--- a/pytorch_lightning/core/step_result.py
+++ b/pytorch_lightning/core/step_result.py
@@ -16,11 +16,14 @@ def __init__(
 
         super().__init__()
 
-        self.early_stop_on = early_stop_on
-        self.checkpoint_on = checkpoint_on
-
-        self.hiddens = hiddens
-        self.minimize = minimize
+        if early_stop_on is not None:
+            self.early_stop_on = early_stop_on
+        if checkpoint_on is not None:
+            self.checkpoint_on = checkpoint_on
+        if hiddens is not None:
+            self.hiddens = hiddens
+        if minimize is not None:
+            self.minimize = minimize
 
         if minimize is not None and early_stop_on is None:
             self.early_stop_on = minimize.detach()
@@ -30,21 +33,22 @@ def __init__(
     def __getattr__(self, key):
         try:
             if key == 'callback_metrics':
-                return self.callback_metrics()
+                return self.get_callback_metrics()
             elif key == 'batch_log_metrics':
-                return self.batch_log_metrics()
+                return self.get_batch_log_metrics()
             elif key == 'batch_pbar_metrics':
-                return self.batch_pbar_metrics()
+                return self.get_batch_pbar_metrics()
             else:
                 return self[key]
         except KeyError:
-            raise AttributeError(f'Missing attribute "{key}"')
+            return None
 
     def __setattr__(self, key, val):
         # ensure reserve keys are tensors and detached
         if key in {'hiddens', 'checkpoint_on', 'early_stop_on'}:
             self._assert_tensor_metric(key, val)
-            val = val.detach()
+            if val is not None:
+                val = val.detach()
 
         # ensure minimize is a tensor and has grads
         elif key == 'minimize':
@@ -102,7 +106,7 @@ def __set_meta(self, name, value, prog_bar, logger, reduce_on_batch_end, reduce_
         )
         self['meta'][name] = meta
 
-    def callback_metrics(self):
+    def get_callback_metrics(self):
         result = {
             'early_stop_on': self.early_stop_on,
             'checkpoint_on': self.checkpoint_on
@@ -110,7 +114,7 @@ def callback_metrics(self):
 
         return result
 
-    def batch_log_metrics(self):
+    def get_batch_log_metrics(self):
         """
         Gets the metrics to log at the end of the batch step
         """
@@ -122,7 +126,7 @@ def batch_log_metrics(self):
                 result[k] = options['value']
         return result
 
-    def batch_pbar_metrics(self):
+    def get_batch_pbar_metrics(self):
         """
         Gets the metrics to log at the end of the batch step
         """

From ceeedc21767bbb4a9be42b77917518bf9ff1d6ef Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Thu, 16 Jul 2020 07:33:52 -0400
Subject: [PATCH 014/168] added train step structured result

---
 pytorch_lightning/core/step_result.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
index ec8e8459224de..7883335a02aba 100644
--- a/pytorch_lightning/core/step_result.py
+++ b/pytorch_lightning/core/step_result.py
@@ -140,7 +140,7 @@ def get_batch_pbar_metrics(self):
 
     def detach(self):
         for k, v in self.items():
-            if isinstance(v, torch.Tensor) and v.grad_fn is not None:
+            if isinstance(v, torch.Tensor):
                 self.__setitem__(k, v.detach())
 
     def __repr__(self):

From 6bbe6d83e84c455ce1b2e199fc3a17f8913ab3fa Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Thu, 16 Jul 2020 08:08:15 -0400
Subject: [PATCH 015/168] added train step structured result

---
 pytorch_lightning/core/step_result.py      |  2 +-
 pytorch_lightning/trainer/training_loop.py | 22 +++++++++++++++-------
 tests/base/deterministic_model.py          | 19 ++++++++++---------
 3 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
index 7883335a02aba..ece9d34c30187 100644
--- a/pytorch_lightning/core/step_result.py
+++ b/pytorch_lightning/core/step_result.py
@@ -166,7 +166,7 @@ def __copy__(self):
     @classmethod
     def gather(cls, outputs):
         meta = outputs[0]['meta']
-        result = Result()
+        result = cls()
         result = recursive_gather(outputs, result)
         recursive_stack(result)
         result['meta'] = meta
diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py
index 58540af8d1d85..1375e848ad24b 100644
--- a/pytorch_lightning/trainer/training_loop.py
+++ b/pytorch_lightning/trainer/training_loop.py
@@ -533,21 +533,29 @@ def run_training_epoch_end(self, epoch_output):
             self.global_step += 1
 
             if isinstance(epoch_output[0], Result):
-                epoch_output = Result.gather(epoch_output)
+                epoch_output = epoch_output[0].__class__.gather(epoch_output)
 
             epoch_output = model.training_epoch_end(epoch_output)
-            _processed_outputs = self.process_output(epoch_output)
-            log_epoch_metrics = _processed_outputs[2]
-            callback_epoch_metrics = _processed_outputs[3]
 
+            if isinstance(epoch_output, Result):
+                epoch_log_metrics = epoch_output.epoch_log_metrics
+                epoch_progress_bar_metrics = epoch_output.epoch_progress_bar_metrics
+                epoch_callback_metrics = epoch_output.epoch_callback_metrics
+            else:
+                _processed_outputs = self.process_output(epoch_output)
+                epoch_progress_bar_metrics = _processed_outputs[1]
+                epoch_log_metrics = _processed_outputs[2]
+                epoch_callback_metrics = _processed_outputs[3]
+
+            # TODO: do all of this for the user when no training_epoch end is defined and they used a result
             # add the metrics to the loggers
-            self.log_metrics(log_epoch_metrics, {})
+            self.log_metrics(epoch_log_metrics, {})
 
             # add metrics to callbacks
-            self.callback_metrics.update(callback_epoch_metrics)
+            self.callback_metrics.update(epoch_callback_metrics)
 
             # add metrics to progress_bar
-            self.add_progress_bar_metrics(_processed_outputs[1])
+            self.add_progress_bar_metrics(epoch_progress_bar_metrics)
 
     def sync_horovod(self):
         if self.use_horovod:
diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index b46f18e7b9c4d..bc70652dd49f8 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -112,7 +112,7 @@ def training_step_result_return(self, batch, batch_idx):
         self.training_step_called = True
         return result
 
-    def training_epoch_end_return(self, outputs):
+    def training_epoch_end_return(self, result):
         """
         There should be an array of scalars without graphs that are all 171 (4 of them)
         """
@@ -122,14 +122,15 @@ def training_epoch_end_return(self, outputs):
             pass
         else:
             # only saw 4 batches
-            assert len(outputs) == 4
-            for batch_out in outputs:
-                assert batch_out == 171
-                assert batch_out.grad_fn is None
-                assert isinstance(batch_out, torch.Tensor)
-
-        prototype_loss = outputs[0]
-        return prototype_loss
+            assert isinstance(result, TrainResult)
+            assert len(result.minimize) == 4
+            assert self.count_num_graphs(result) == 0
+            assert result.minimize.mean() == 171
+
+        result.log_acc2 = result.log_acc2.mean()
+        result.log_and_pbar_acc1 = result.log_and_pbar_acc1.mean()
+        result.pbar_acc3 = result.pbar_acc3.mean()
+        return result
 
     # --------------------------
     # dictionary returns

From c56ea84c0dd7d8b25e87a8ae693f4c58dc0ef1a2 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Thu, 16 Jul 2020 22:03:44 -0400
Subject: [PATCH 016/168] added train step structured result

---
 pytorch_lightning/core/step_result.py      | 23 ++++++++++++++++------
 pytorch_lightning/trainer/training_loop.py | 12 +++++++++++
 tests/base/deterministic_model.py          |  3 ---
 3 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
index ece9d34c30187..58cb7d75da61a 100644
--- a/pytorch_lightning/core/step_result.py
+++ b/pytorch_lightning/core/step_result.py
@@ -23,6 +23,8 @@ def __init__(
         if hiddens is not None:
             self.hiddens = hiddens
         if minimize is not None:
+            err = 'Minimize can only be used in training_end, training_step_end, training_epoch_end'
+            self._assert_grad_tensor_metric('minimize', minimize, err)
             self.minimize = minimize
 
         if minimize is not None and early_stop_on is None:
@@ -38,6 +40,8 @@ def __getattr__(self, key):
                 return self.get_batch_log_metrics()
             elif key == 'batch_pbar_metrics':
                 return self.get_batch_pbar_metrics()
+            elif key == 'epoch_log_metrics':
+                return self.get_epoch_log_metrics()
             else:
                 return self[key]
         except KeyError:
@@ -50,13 +54,8 @@ def __setattr__(self, key, val):
             if val is not None:
                 val = val.detach()
 
-        # ensure minimize is a tensor and has grads
-        elif key == 'minimize':
-            err = 'Minimize can only be used in training_end, training_step_end, training_epoch_end'
-            self._assert_grad_tensor_metric(key, val, err)
-
         # ensure anything else that is a tensor is detached
-        elif isinstance(val, torch.Tensor):
+        elif isinstance(val, torch.Tensor) and key != 'minimize':
             val = val.detach()
 
         self[key] = val
@@ -126,6 +125,18 @@ def get_batch_log_metrics(self):
                 result[k] = options['value']
         return result
 
+    def get_epoch_log_metrics(self):
+        """
+        Gets the metrics to log at the end of the batch step
+        """
+        result = {}
+
+        meta = self['meta']
+        for k, options in meta.items():
+            if options['logger']:
+                result[k] = options['value']
+        return result
+
     def get_batch_pbar_metrics(self):
         """
         Gets the metrics to log at the end of the batch step
diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py
index 1375e848ad24b..f1128062ff153 100644
--- a/pytorch_lightning/trainer/training_loop.py
+++ b/pytorch_lightning/trainer/training_loop.py
@@ -532,11 +532,23 @@ def run_training_epoch_end(self, epoch_output):
         if self.is_overridden('training_epoch_end', model=model):
             self.global_step += 1
 
+            # remove the protected keys so the user doesn't have to deal with them
             if isinstance(epoch_output[0], Result):
                 epoch_output = epoch_output[0].__class__.gather(epoch_output)
+                minimize = epoch_output.minimize
+                early_stop_on = epoch_output.early_stop_on
+                checkpoint_on = epoch_output.checkpoint_on
+                del epoch_output['minimize']
+                del epoch_output['early_stop_on']
+                del epoch_output['checkpoint_on']
 
             epoch_output = model.training_epoch_end(epoch_output)
 
+            if isinstance(epoch_output, Result):
+                epoch_output.minimize = minimize.mean()
+                epoch_output.early_stop_on = early_stop_on.mean()
+                epoch_output.checkpoint_on = checkpoint_on.mean()
+
             if isinstance(epoch_output, Result):
                 epoch_log_metrics = epoch_output.epoch_log_metrics
                 epoch_progress_bar_metrics = epoch_output.epoch_progress_bar_metrics
diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index bc70652dd49f8..b617935417bdc 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -123,9 +123,6 @@ def training_epoch_end_return(self, result):
         else:
             # only saw 4 batches
             assert isinstance(result, TrainResult)
-            assert len(result.minimize) == 4
-            assert self.count_num_graphs(result) == 0
-            assert result.minimize.mean() == 171
 
         result.log_acc2 = result.log_acc2.mean()
         result.log_and_pbar_acc1 = result.log_and_pbar_acc1.mean()

From 9df0e16e19392e6805c69ac51291e20022543ce5 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sat, 18 Jul 2020 10:53:08 -0400
Subject: [PATCH 017/168] added train step structured result

---
 pytorch_lightning/trainer/training_loop.py | 41 ++++++++++++++++------
 1 file changed, 31 insertions(+), 10 deletions(-)

diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py
index f1128062ff153..9fff1618243e4 100644
--- a/pytorch_lightning/trainer/training_loop.py
+++ b/pytorch_lightning/trainer/training_loop.py
@@ -529,11 +529,20 @@ def run_on_epoch_end_hook(self, model):
 
     def run_training_epoch_end(self, epoch_output):
         model = self.get_model()
+        is_result_obj = isinstance(epoch_output[0], Result)
+
+        epoch_log_metrics = {}
+        epoch_callback_metrics = {}
+        epoch_progress_bar_metrics = {}
+
+        # --------------------------
+        # EPOCH END STEP IF DEFINED
+        # --------------------------
         if self.is_overridden('training_epoch_end', model=model):
             self.global_step += 1
 
             # remove the protected keys so the user doesn't have to deal with them
-            if isinstance(epoch_output[0], Result):
+            if is_result_obj:
                 epoch_output = epoch_output[0].__class__.gather(epoch_output)
                 minimize = epoch_output.minimize
                 early_stop_on = epoch_output.early_stop_on
@@ -542,8 +551,10 @@ def run_training_epoch_end(self, epoch_output):
                 del epoch_output['early_stop_on']
                 del epoch_output['checkpoint_on']
 
+            # run training_epoch_end
             epoch_output = model.training_epoch_end(epoch_output)
 
+            # with a result we put back the main metrics and compute means
             if isinstance(epoch_output, Result):
                 epoch_output.minimize = minimize.mean()
                 epoch_output.early_stop_on = early_stop_on.mean()
@@ -559,15 +570,25 @@ def run_training_epoch_end(self, epoch_output):
                 epoch_log_metrics = _processed_outputs[2]
                 epoch_callback_metrics = _processed_outputs[3]
 
-            # TODO: do all of this for the user when no training_epoch end is defined and they used a result
-            # add the metrics to the loggers
-            self.log_metrics(epoch_log_metrics, {})
-
-            # add metrics to callbacks
-            self.callback_metrics.update(epoch_callback_metrics)
-
-            # add metrics to progress_bar
-            self.add_progress_bar_metrics(epoch_progress_bar_metrics)
+        # --------------------------
+        # Structured Result (auto epoch end)
+        # --------------------------
+        elif is_result_obj:
+            # TODO: reduce outputs for user
+            pass
+
+        # --------------------------
+        # track results
+        # --------------------------
+        # TODO: do all of this for the user when no training_epoch end is defined and they used a result
+        # add the metrics to the loggers
+        self.log_metrics(epoch_log_metrics, {})
+
+        # add metrics to callbacks
+        self.callback_metrics.update(epoch_callback_metrics)
+
+        # add metrics to progress_bar
+        self.add_progress_bar_metrics(epoch_progress_bar_metrics)
 
     def sync_horovod(self):
         if self.use_horovod:

From 331fe558338f62f88abf8d4c061b8de8218d7f82 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sat, 18 Jul 2020 11:01:52 -0400
Subject: [PATCH 018/168] added train step structured result

---
 pytorch_lightning/core/step_result.py      | 14 ++++++++++++++
 pytorch_lightning/trainer/training_loop.py |  4 ++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
index 58cb7d75da61a..73560dcc74da9 100644
--- a/pytorch_lightning/core/step_result.py
+++ b/pytorch_lightning/core/step_result.py
@@ -42,6 +42,8 @@ def __getattr__(self, key):
                 return self.get_batch_pbar_metrics()
             elif key == 'epoch_log_metrics':
                 return self.get_epoch_log_metrics()
+            elif key == 'epoch_pbar_metrics':
+                return self.get_epoch_pbar_metrics()
             else:
                 return self[key]
         except KeyError:
@@ -137,6 +139,18 @@ def get_epoch_log_metrics(self):
                 result[k] = options['value']
         return result
 
+    def get_epoch_pbar_metrics(self):
+        """
+        Gets the metrics to log at the end of the batch step
+        """
+        result = {}
+
+        meta = self['meta']
+        for k, options in meta.items():
+            if options['prog_bar']:
+                result[k] = options['value']
+        return result
+
     def get_batch_pbar_metrics(self):
         """
         Gets the metrics to log at the end of the batch step
diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py
index 9fff1618243e4..4a0c34366bf2d 100644
--- a/pytorch_lightning/trainer/training_loop.py
+++ b/pytorch_lightning/trainer/training_loop.py
@@ -562,8 +562,8 @@ def run_training_epoch_end(self, epoch_output):
 
             if isinstance(epoch_output, Result):
                 epoch_log_metrics = epoch_output.epoch_log_metrics
-                epoch_progress_bar_metrics = epoch_output.epoch_progress_bar_metrics
-                epoch_callback_metrics = epoch_output.epoch_callback_metrics
+                epoch_progress_bar_metrics = epoch_output.epoch_pbar_metrics
+                epoch_callback_metrics = epoch_output.callback_metrics
             else:
                 _processed_outputs = self.process_output(epoch_output)
                 epoch_progress_bar_metrics = _processed_outputs[1]

From 0c8afc08966233ba9a290670aab8ea4ff2524427 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sat, 18 Jul 2020 11:15:44 -0400
Subject: [PATCH 019/168] added train step structured result

---
 pytorch_lightning/core/step_result.py             |  8 ++++----
 tests/base/deterministic_model.py                 |  9 ++++++---
 tests/trainer/test_trainer_steps_result_return.py | 12 ++++++++++++
 3 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
index 73560dcc74da9..6ca4944cc6e57 100644
--- a/pytorch_lightning/core/step_result.py
+++ b/pytorch_lightning/core/step_result.py
@@ -124,7 +124,7 @@ def get_batch_log_metrics(self):
         meta = self['meta']
         for k, options in meta.items():
             if options['logger']:
-                result[k] = options['value']
+                result[k] = self[k]
         return result
 
     def get_epoch_log_metrics(self):
@@ -136,7 +136,7 @@ def get_epoch_log_metrics(self):
         meta = self['meta']
         for k, options in meta.items():
             if options['logger']:
-                result[k] = options['value']
+                result[k] = self[k]
         return result
 
     def get_epoch_pbar_metrics(self):
@@ -148,7 +148,7 @@ def get_epoch_pbar_metrics(self):
         meta = self['meta']
         for k, options in meta.items():
             if options['prog_bar']:
-                result[k] = options['value']
+                result[k] = self[k]
         return result
 
     def get_batch_pbar_metrics(self):
@@ -160,7 +160,7 @@ def get_batch_pbar_metrics(self):
         meta = self['meta']
         for k, options in meta.items():
             if options['prog_bar']:
-                result[k] = options['value']
+                result[k] = self[k]
         return result
 
     def detach(self):
diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index b617935417bdc..cc5c366c7c8ec 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -124,9 +124,12 @@ def training_epoch_end_return(self, result):
             # only saw 4 batches
             assert isinstance(result, TrainResult)
 
-        result.log_acc2 = result.log_acc2.mean()
-        result.log_and_pbar_acc1 = result.log_and_pbar_acc1.mean()
-        result.pbar_acc3 = result.pbar_acc3.mean()
+        result.log_acc2 = result.log_acc2.mean() + 11
+        result.log_and_pbar_acc1 = result.log_and_pbar_acc1.mean() + 11
+        result.pbar_acc3 = result.pbar_acc3.mean() + 11
+        result.log('epoch_end_log_acc', torch.tensor(1212).type_as(result.pbar_acc3), logger=True)
+        result.log('epoch_end_pbar_acc', torch.tensor(1213).type_as(result.pbar_acc3), logger=False, prog_bar=True)
+        result.log('epoch_end_log_pbar_acc', torch.tensor(1214).type_as(result.pbar_acc3), logger=True, prog_bar=True)
         return result
 
     # --------------------------
diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index b532ed7729e7c..8c83fa83276ba 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -101,4 +101,16 @@ def test_training_step_epoch_end_result(tmpdir):
     assert not model.training_step_end_called
     assert model.training_epoch_end_called
 
+    # make sure correct metrics were logged
+    logged_metrics = trainer.debug_logged_metrics[-1]
+    assert logged_metrics['log_and_pbar_acc1'] == 23.0
+    assert logged_metrics['log_acc2'] == 18.0
+    assert logged_metrics['epoch_end_log_acc'] == 1212.0
+    assert logged_metrics['epoch_end_log_pbar_acc'] == 1214.0
+    assert 'epoch_end_pbar_acc' not in logged_metrics
+
+    assert trainer.callback_metrics['early_stop_on'] == 171
+    assert trainer.callback_metrics['checkpoint_on'] == 171
+
+
 test_training_step_epoch_end_result('')
\ No newline at end of file

From 7f8d72d9fa145a9e82b5fd3a86f5bfdb9fe4612f Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sat, 18 Jul 2020 11:17:58 -0400
Subject: [PATCH 020/168] added train step structured result

---
 tests/trainer/test_trainer_steps_result_return.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 8c83fa83276ba..69003a40244f3 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -109,6 +109,15 @@ def test_training_step_epoch_end_result(tmpdir):
     assert logged_metrics['epoch_end_log_pbar_acc'] == 1214.0
     assert 'epoch_end_pbar_acc' not in logged_metrics
 
+    # make sure pbar metrics are correct
+    assert trainer.progress_bar_metrics['log_and_pbar_acc1'] == 23.0
+    assert trainer.progress_bar_metrics['pbar_acc3'] == 28.0
+    assert trainer.progress_bar_metrics['epoch_end_pbar_acc'] == 1213.0
+    assert trainer.progress_bar_metrics['epoch_end_log_pbar_acc'] == 1214.0
+    assert 'epoch_end_log_acc' not in trainer.progress_bar_metrics
+    assert 'log_acc2' not in trainer.progress_bar_metrics
+
+    # make sure callback metrics didn't change
     assert trainer.callback_metrics['early_stop_on'] == 171
     assert trainer.callback_metrics['checkpoint_on'] == 171
 

From 8254f8e382e0c7826c9323539ac2389c02b61d7a Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sat, 18 Jul 2020 11:18:20 -0400
Subject: [PATCH 021/168] added train step structured result

---
 tests/trainer/test_trainer_steps_result_return.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 69003a40244f3..335981314c13f 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -120,6 +120,3 @@ def test_training_step_epoch_end_result(tmpdir):
     # make sure callback metrics didn't change
     assert trainer.callback_metrics['early_stop_on'] == 171
     assert trainer.callback_metrics['checkpoint_on'] == 171
-
-
-test_training_step_epoch_end_result('')
\ No newline at end of file

From 7c8a32e72d1be64047d1e6b62b12602de4794cba Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sat, 18 Jul 2020 12:38:04 -0400
Subject: [PATCH 022/168] added train step structured result

---
 pytorch_lightning/core/step_result.py         | 64 ++++++++++++++----
 pytorch_lightning/trainer/training_loop.py    | 16 +++--
 tests/base/deterministic_model.py             | 31 +++++++--
 .../test_trainer_steps_result_return.py       | 65 ++++++++++++-------
 4 files changed, 132 insertions(+), 44 deletions(-)

diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
index 6ca4944cc6e57..b13bffa29fe25 100644
--- a/pytorch_lightning/core/step_result.py
+++ b/pytorch_lightning/core/step_result.py
@@ -32,6 +32,12 @@ def __init__(
         if minimize is not None and checkpoint_on is None:
             self.checkpoint_on = minimize.detach()
 
+        self['meta'] = {
+            '_internal': {
+                '_reduce_on_epoch': False
+            }
+        }
+
     def __getattr__(self, key):
         try:
             if key == 'callback_metrics':
@@ -81,18 +87,18 @@ def log(
             value,
             prog_bar=False,
             logger=True,
-            reduce_on_batch_end=False,
-            reduce_on_epoch_end=True,
+            on_step=False,
+            on_epoch=True,
             reduce_fx=torch.mean
     ):
         if 'meta' not in self:
             self.__setitem__('meta', {})
-        self.__set_meta(name, value, prog_bar, logger, reduce_on_batch_end, reduce_on_epoch_end, reduce_fx)
+        self.__set_meta(name, value, prog_bar, logger, on_step, on_epoch, reduce_fx)
 
         # set the value
         self.__setitem__(name, value)
 
-    def __set_meta(self, name, value, prog_bar, logger, reduce_on_batch_end, reduce_on_epoch_end, reduce_fx):
+    def __set_meta(self, name, value, prog_bar, logger, on_step, on_epoch, reduce_fx):
         # set the meta for the item
         meta_value = value
         if isinstance(meta_value, torch.Tensor):
@@ -100,13 +106,17 @@ def __set_meta(self, name, value, prog_bar, logger, reduce_on_batch_end, reduce_
         meta = dict(
             prog_bar=prog_bar,
             logger=logger,
-            reduce_on_batch_end=reduce_on_batch_end,
-            reduce_on_epoch_end=reduce_on_epoch_end,
+            on_step=on_step,
+            on_epoch=on_epoch,
             reduce_fx=reduce_fx,
             value=meta_value
         )
         self['meta'][name] = meta
 
+        # track whether any input requires reduction on epoch end
+        internal = self['meta']['_internal']
+        internal['_reduce_on_epoch'] = max(internal['_reduce_on_epoch'], on_epoch)
+
     def get_callback_metrics(self):
         result = {
             'early_stop_on': self.early_stop_on,
@@ -123,6 +133,8 @@ def get_batch_log_metrics(self):
 
         meta = self['meta']
         for k, options in meta.items():
+            if k == '_internal':
+                continue
             if options['logger']:
                 result[k] = self[k]
         return result
@@ -135,6 +147,8 @@ def get_epoch_log_metrics(self):
 
         meta = self['meta']
         for k, options in meta.items():
+            if k == '_internal':
+                continue
             if options['logger']:
                 result[k] = self[k]
         return result
@@ -147,6 +161,8 @@ def get_epoch_pbar_metrics(self):
 
         meta = self['meta']
         for k, options in meta.items():
+            if k == '_internal':
+                continue
             if options['prog_bar']:
                 result[k] = self[k]
         return result
@@ -159,6 +175,8 @@ def get_batch_pbar_metrics(self):
 
         meta = self['meta']
         for k, options in meta.items():
+            if k == '_internal':
+                continue
             if options['prog_bar']:
                 result[k] = self[k]
         return result
@@ -197,6 +215,28 @@ def gather(cls, outputs):
         result['meta'] = meta
         return result
 
+    @classmethod
+    def reduce_on_epoch_end(cls, outputs):
+        meta = outputs[0]['meta']
+        result = cls()
+        result = recursive_gather(outputs, result)
+        recursive_stack(result)
+
+        for k, option in meta.items():
+            if k == '_internal':
+                continue
+
+            if option['on_epoch']:
+                fx = option['reduce_fx']
+                result[k] = fx(result[k])
+
+        result['meta'] = meta
+        return result
+
+    @property
+    def should_reduce_on_epoch_end(self):
+        return self['meta']['_internal']['_reduce_on_epoch']
+
 
 def recursive_gather(outputs, result=None):
     for out in outputs:
@@ -243,11 +283,11 @@ def log(
             value,
             prog_bar=False,
             logger=True,
-            reduce_on_batch_end=True,
-            reduce_on_epoch_end=False,
+            on_step=True,
+            on_epoch=False,
             reduce_fx=torch.mean
     ):
-        super().log(name, value, prog_bar, logger, reduce_on_batch_end, reduce_on_epoch_end, reduce_fx)
+        super().log(name, value, prog_bar, logger, on_step, on_epoch, reduce_fx)
 
 
 class EvalResult(Result):
@@ -267,11 +307,11 @@ def log(
             value,
             prog_bar=False,
             logger=True,
-            reduce_on_batch_end=False,
-            reduce_on_epoch_end=True,
+            on_step=False,
+            on_epoch=True,
             reduce_fx=torch.mean
     ):
-        super().log(name, value, prog_bar, logger, reduce_on_batch_end, reduce_on_epoch_end, reduce_fx)
+        super().log(name, value, prog_bar, logger, on_step, on_epoch, reduce_fx)
 
 
 if __name__ == '__main__':
diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py
index 4a0c34366bf2d..b2831859dfd1b 100644
--- a/pytorch_lightning/trainer/training_loop.py
+++ b/pytorch_lightning/trainer/training_loop.py
@@ -455,7 +455,9 @@ def run_training_epoch(self):
 
             # only track outputs when user implements training_epoch_end
             # otherwise we will build up unnecessary memory
-            if self.is_overridden('training_epoch_end', model=self.get_model()):
+            step_out = batch_output.training_step_output_for_epoch_end
+            should_auto_reduce_train_result = isinstance(step_out, Result) and step_out.should_reduce_on_epoch_end
+            if self.is_overridden('training_epoch_end', model=self.get_model()) or should_auto_reduce_train_result:
                 epoch_output.append(batch_output.training_step_output_for_epoch_end)
 
             # update LR schedulers
@@ -529,7 +531,7 @@ def run_on_epoch_end_hook(self, model):
 
     def run_training_epoch_end(self, epoch_output):
         model = self.get_model()
-        is_result_obj = isinstance(epoch_output[0], Result)
+        is_result_obj = len(epoch_output) > 0 and isinstance(epoch_output[0], Result)
 
         epoch_log_metrics = {}
         epoch_callback_metrics = {}
@@ -574,13 +576,17 @@ def run_training_epoch_end(self, epoch_output):
         # Structured Result (auto epoch end)
         # --------------------------
         elif is_result_obj:
-            # TODO: reduce outputs for user
-            pass
+            epoch_output = epoch_output[0].__class__.reduce_on_epoch_end(epoch_output)
+            epoch_output.minimize = epoch_output.minimize.mean()
+            epoch_output.early_stop_on = epoch_output.early_stop_on.mean()
+            epoch_output.checkpoint_on = epoch_output.checkpoint_on.mean()
+            epoch_log_metrics = epoch_output.epoch_log_metrics
+            epoch_progress_bar_metrics = epoch_output.epoch_pbar_metrics
+            epoch_callback_metrics = epoch_output.callback_metrics
 
         # --------------------------
         # track results
         # --------------------------
-        # TODO: do all of this for the user when no training_epoch end is defined and they used a result
         # add the metrics to the loggers
         self.log_metrics(epoch_log_metrics, {})
 
diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index cc5c366c7c8ec..61e63cb569703 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -101,13 +101,36 @@ def training_epoch_end_scalar(self, outputs):
     # --------------------------
     # Result returns
     # --------------------------
-    def training_step_result_return(self, batch, batch_idx):
+    def training_step_result_log_step_only(self, batch, batch_idx):
         acc = self.step(batch, batch_idx)
+        result = TrainResult(minimize=acc)
+
+        # step only metrics
+        result.log('step_log_and_pbar_acc1', torch.tensor(11).type_as(acc), prog_bar=True)
+        result.log('step_log_acc2', torch.tensor(12).type_as(acc))
+        result.log('step_pbar_acc3', torch.tensor(13).type_as(acc), logger=False, prog_bar=True)
+
+        self.training_step_called = True
+        return result
+
+    def training_step_result_log_epoch_only(self, batch, batch_idx):
+        acc = self.step(batch, batch_idx)
+        result = TrainResult(minimize=acc)
+
+        result.log('epoch_log_and_pbar_acc1', torch.tensor(14).type_as(acc), on_epoch=True, prog_bar=True)
+        result.log('epoch_log_acc2', torch.tensor(15).type_as(acc), on_epoch=True)
+        result.log('epoch_pbar_acc3', torch.tensor(16).type_as(acc), on_epoch=True, logger=False, prog_bar=True)
 
+        self.training_step_called = True
+        return result
+
+    def training_step_result_log_epoch_and_step(self, batch, batch_idx):
+        acc = self.step(batch, batch_idx)
         result = TrainResult(minimize=acc)
-        result.log('log_and_pbar_acc1', torch.tensor(12).type_as(acc), reduce_on_epoch_end=True, prog_bar=True)
-        result.log('log_acc2', torch.tensor(7).type_as(acc), reduce_on_epoch_end=True)
-        result.log('pbar_acc3', torch.tensor(17).type_as(acc), reduce_on_epoch_end=True, logger=False, prog_bar=True)
+
+        result.log('step_epoch_log_and_pbar_acc1', torch.tensor(17).type_as(acc), on_epoch=True, prog_bar=True)
+        result.log('step_epoch_log_acc2', torch.tensor(18).type_as(acc), on_epoch=True)
+        result.log('step_epoch_pbar_acc3', torch.tensor(19).type_as(acc), on_epoch=True, logger=False, prog_bar=True)
 
         self.training_step_called = True
         return result
diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 335981314c13f..db49147beca80 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -13,7 +13,7 @@
 # same for epoch_end
 # make sure to auto-reduce when no epoch_end is implemented
 
-def test_training_step_result(tmpdir):
+def test_training_step_result_log_step_only(tmpdir):
     """
     Tests that only training_step can be used with TrainResult
     Makes sure that things are routed to pbar, loggers and loss accordingly
@@ -22,7 +22,9 @@ def test_training_step_result(tmpdir):
     os.environ['PL_DEV_DEBUG'] = '1'
 
     model = DeterministicModel()
-    model.training_step = model.training_step_result_return
+    model.training_step = model.training_step_result_log_step_only
+    model.training_step_end = None
+    model.training_epoch_end = None
     model.val_dataloader = None
 
     trainer = Trainer(
@@ -37,46 +39,63 @@ def test_training_step_result(tmpdir):
     assert not model.training_step_end_called
     assert not model.training_epoch_end_called
 
+    # make sure correct metrics are logged
+    assert len(trainer.debug_logged_metrics) == 2
+    logged_metrics = trainer.debug_logged_metrics[0]
+    assert logged_metrics['step_log_and_pbar_acc1'] == 11.0
+    assert logged_metrics['step_log_acc2'] == 12.0
+    assert 'step_pbar_acc3' not in logged_metrics
+    assert len(logged_metrics) == 3
+
+    # make sure we are using the correct metrics for callbacks
+    assert trainer.callback_metrics['early_stop_on'] == 171
+    assert trainer.callback_metrics['checkpoint_on'] == 171
+
+    # make sure pbar metrics are correct ang log metrics did not leak
+    assert trainer.progress_bar_metrics['step_log_and_pbar_acc1'] == 11
+    assert trainer.progress_bar_metrics['step_pbar_acc3'] == 13
+    assert 'step_log_acc2' not in trainer.progress_bar_metrics
+
     # make sure training outputs what is expected
     for batch_idx, batch in enumerate(model.train_dataloader()):
         break
 
     out = trainer.run_training_batch(batch, batch_idx)
     assert out.signal == 0
-    assert out.batch_log_metrics['log_and_pbar_acc1'] == 12.0
-    assert out.batch_log_metrics['log_acc2'] == 7.0
+    assert out.batch_log_metrics['step_log_and_pbar_acc1'] == 11.0
+    assert out.batch_log_metrics['step_log_acc2'] == 12.0
 
     train_step_out = out.training_step_output_for_epoch_end
     assert isinstance(train_step_out, TrainResult)
 
     assert 'minimize' in train_step_out
-    assert 'log_and_pbar_acc1' in train_step_out
-    assert 'log_acc2' in train_step_out
-
-    # make sure we are using the correct metrics for callbacks
-    assert trainer.callback_metrics['early_stop_on'] == 171
-    assert trainer.callback_metrics['checkpoint_on'] == 171
-
-    # make sure pbar metrics are correct
-    assert trainer.progress_bar_metrics['log_and_pbar_acc1'] == 12
-    assert trainer.progress_bar_metrics['pbar_acc3'] == 17
-    assert 'log_acc2' not in trainer.progress_bar_metrics
-
-    # make sure correct metrics are logged
-    assert len(trainer.debug_logged_metrics) == 1
-    logged_metrics = trainer.debug_logged_metrics[0]
-    assert logged_metrics['log_and_pbar_acc1'] == 12.0
-    assert logged_metrics['log_acc2'] == 7.0
-    assert 'pbar_acc3' not in logged_metrics
-    assert len(logged_metrics) == 3
+    assert 'step_log_and_pbar_acc1' in train_step_out
+    assert 'step_log_acc2' in train_step_out
 
     # make sure the optimizer closure returns the correct things
     opt_closure_result = trainer.optimizer_closure(batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens)
     assert opt_closure_result['loss'] == (42.0 * 3) + (15.0 * 3)
 
+test_training_step_result_log_step_only('')
+
+def test_training_step_auto_reduce(tmpdir):
     # TODO: test that it gets reduced on epoch end
     # TODO: test that on batch end gets reduced
 
+    os.environ['PL_DEV_DEBUG'] = '1'
+
+    model = DeterministicModel()
+    model.training_step = model.training_step_result_return
+    model.val_dataloader = None
+
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_epochs=1,
+        weights_summary=None,
+    )
+    trainer.fit(model)
+
+
 
 def test_training_step_epoch_end_result(tmpdir):
     """

From 692731342d69dde45d3fc8c102cf69ad6e85dcd1 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sat, 18 Jul 2020 12:46:56 -0400
Subject: [PATCH 023/168] added train step structured result

---
 tests/base/deterministic_model.py             |  6 +--
 .../test_trainer_steps_result_return.py       | 37 +++++++++++--------
 2 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index 61e63cb569703..334c322c2d6a8 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -106,9 +106,9 @@ def training_step_result_log_step_only(self, batch, batch_idx):
         result = TrainResult(minimize=acc)
 
         # step only metrics
-        result.log('step_log_and_pbar_acc1', torch.tensor(11).type_as(acc), prog_bar=True)
-        result.log('step_log_acc2', torch.tensor(12).type_as(acc))
-        result.log('step_pbar_acc3', torch.tensor(13).type_as(acc), logger=False, prog_bar=True)
+        result.log(f'step_log_and_pbar_acc1_b{batch_idx}', torch.tensor(11).type_as(acc), prog_bar=True)
+        result.log(f'step_log_acc2_b{batch_idx}', torch.tensor(12).type_as(acc))
+        result.log(f'step_pbar_acc3_b{batch_idx}', torch.tensor(13).type_as(acc), logger=False, prog_bar=True)
 
         self.training_step_called = True
         return result
diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index db49147beca80..1b7a700a75944 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -17,6 +17,8 @@ def test_training_step_result_log_step_only(tmpdir):
     """
     Tests that only training_step can be used with TrainResult
     Makes sure that things are routed to pbar, loggers and loss accordingly
+
+    Makes sure pbar and logs happen on step only when requested
     """
     # enable internal debugging actions
     os.environ['PL_DEV_DEBUG'] = '1'
@@ -27,9 +29,13 @@ def test_training_step_result_log_step_only(tmpdir):
     model.training_epoch_end = None
     model.val_dataloader = None
 
+    batches = 3
     trainer = Trainer(
         default_root_dir=tmpdir,
-        fast_dev_run=True,
+        limit_train_batches=batches,
+        limit_val_batches=batches,
+        row_log_interval=1,
+        max_epochs=1,
         weights_summary=None,
     )
     trainer.fit(model)
@@ -39,22 +45,23 @@ def test_training_step_result_log_step_only(tmpdir):
     assert not model.training_step_end_called
     assert not model.training_epoch_end_called
 
-    # make sure correct metrics are logged
-    assert len(trainer.debug_logged_metrics) == 2
-    logged_metrics = trainer.debug_logged_metrics[0]
-    assert logged_metrics['step_log_and_pbar_acc1'] == 11.0
-    assert logged_metrics['step_log_acc2'] == 12.0
-    assert 'step_pbar_acc3' not in logged_metrics
-    assert len(logged_metrics) == 3
+    # make sure correct metrics are logged (one per batch step as requested)
+    assert len(trainer.debug_logged_metrics) == batches + 1
+    for batch_idx, logged_metrics in enumerate(trainer.debug_logged_metrics[:-1]):
+        assert logged_metrics[f'step_log_and_pbar_acc1_b{batch_idx}'] == 11.0
+        assert logged_metrics[f'step_log_acc2_b{batch_idx}'] == 12.0
+        assert f'step_pbar_acc3_b{batch_idx}' not in logged_metrics
+        assert len(logged_metrics) == 3
 
     # make sure we are using the correct metrics for callbacks
     assert trainer.callback_metrics['early_stop_on'] == 171
     assert trainer.callback_metrics['checkpoint_on'] == 171
 
     # make sure pbar metrics are correct ang log metrics did not leak
-    assert trainer.progress_bar_metrics['step_log_and_pbar_acc1'] == 11
-    assert trainer.progress_bar_metrics['step_pbar_acc3'] == 13
-    assert 'step_log_acc2' not in trainer.progress_bar_metrics
+    for batch_idx in range(batches):
+        assert trainer.progress_bar_metrics[f'step_log_and_pbar_acc1_b{batch_idx}'] == 11
+        assert trainer.progress_bar_metrics[f'step_pbar_acc3_b{batch_idx}'] == 13
+        assert f'step_log_acc2_b{batch_idx}' not in trainer.progress_bar_metrics
 
     # make sure training outputs what is expected
     for batch_idx, batch in enumerate(model.train_dataloader()):
@@ -62,15 +69,15 @@ def test_training_step_result_log_step_only(tmpdir):
 
     out = trainer.run_training_batch(batch, batch_idx)
     assert out.signal == 0
-    assert out.batch_log_metrics['step_log_and_pbar_acc1'] == 11.0
-    assert out.batch_log_metrics['step_log_acc2'] == 12.0
+    assert out.batch_log_metrics[f'step_log_and_pbar_acc1_b{batch_idx}'] == 11.0
+    assert out.batch_log_metrics[f'step_log_acc2_b{batch_idx}'] == 12.0
 
     train_step_out = out.training_step_output_for_epoch_end
     assert isinstance(train_step_out, TrainResult)
 
     assert 'minimize' in train_step_out
-    assert 'step_log_and_pbar_acc1' in train_step_out
-    assert 'step_log_acc2' in train_step_out
+    assert f'step_log_and_pbar_acc1_b{batch_idx}' in train_step_out
+    assert f'step_log_acc2_b{batch_idx}' in train_step_out
 
     # make sure the optimizer closure returns the correct things
     opt_closure_result = trainer.optimizer_closure(batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens)

From 1c78a5b0de21066edb25a65a3ed843cfeeda376a Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sat, 18 Jul 2020 13:08:45 -0400
Subject: [PATCH 024/168] added train step structured result

---
 pytorch_lightning/core/step_result.py         |  8 +-
 tests/base/deterministic_model.py             |  6 +-
 .../test_trainer_steps_result_return.py       | 75 +++++++++++++++++++
 3 files changed, 82 insertions(+), 7 deletions(-)

diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
index b13bffa29fe25..ec1f6a3e1014e 100644
--- a/pytorch_lightning/core/step_result.py
+++ b/pytorch_lightning/core/step_result.py
@@ -135,7 +135,7 @@ def get_batch_log_metrics(self):
         for k, options in meta.items():
             if k == '_internal':
                 continue
-            if options['logger']:
+            if options['logger'] and options['on_step']:
                 result[k] = self[k]
         return result
 
@@ -149,7 +149,7 @@ def get_epoch_log_metrics(self):
         for k, options in meta.items():
             if k == '_internal':
                 continue
-            if options['logger']:
+            if options['logger'] and options['on_epoch']:
                 result[k] = self[k]
         return result
 
@@ -163,7 +163,7 @@ def get_epoch_pbar_metrics(self):
         for k, options in meta.items():
             if k == '_internal':
                 continue
-            if options['prog_bar']:
+            if options['prog_bar'] and options['on_epoch']:
                 result[k] = self[k]
         return result
 
@@ -177,7 +177,7 @@ def get_batch_pbar_metrics(self):
         for k, options in meta.items():
             if k == '_internal':
                 continue
-            if options['prog_bar']:
+            if options['prog_bar'] and options['on_step']:
                 result[k] = self[k]
         return result
 
diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index 334c322c2d6a8..78420a47c94db 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -117,9 +117,9 @@ def training_step_result_log_epoch_only(self, batch, batch_idx):
         acc = self.step(batch, batch_idx)
         result = TrainResult(minimize=acc)
 
-        result.log('epoch_log_and_pbar_acc1', torch.tensor(14).type_as(acc), on_epoch=True, prog_bar=True)
-        result.log('epoch_log_acc2', torch.tensor(15).type_as(acc), on_epoch=True)
-        result.log('epoch_pbar_acc3', torch.tensor(16).type_as(acc), on_epoch=True, logger=False, prog_bar=True)
+        result.log(f'epoch_log_and_pbar_acc1_e{self.current_epoch}', torch.tensor(14).type_as(acc), on_epoch=True, prog_bar=True, on_step=False)
+        result.log(f'epoch_log_acc2_e{self.current_epoch}', torch.tensor(15).type_as(acc), on_epoch=True, on_step=False)
+        result.log(f'epoch_pbar_acc3_e{self.current_epoch}', torch.tensor(16).type_as(acc), on_epoch=True, logger=False, prog_bar=True, on_step=False)
 
         self.training_step_called = True
         return result
diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 1b7a700a75944..b317a51a453be 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -83,7 +83,82 @@ def test_training_step_result_log_step_only(tmpdir):
     opt_closure_result = trainer.optimizer_closure(batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens)
     assert opt_closure_result['loss'] == (42.0 * 3) + (15.0 * 3)
 
+
+def test_training_step_result_log_epoch_only(tmpdir):
+    """
+    Tests that only training_step can be used with TrainResult
+    Makes sure that things are routed to pbar, loggers and loss accordingly
+
+    Makes sure pbar and logs happen on epoch only when requested
+    """
+    # enable internal debugging actions
+    os.environ['PL_DEV_DEBUG'] = '1'
+
+    model = DeterministicModel()
+    model.training_step = model.training_step_result_log_epoch_only
+    model.training_step_end = None
+    model.training_epoch_end = None
+    model.val_dataloader = None
+
+    epochs = 3
+    batches = 2
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        limit_train_batches=batches,
+        limit_val_batches=batches,
+        row_log_interval=1,
+        max_epochs=epochs,
+        weights_summary=None,
+    )
+    trainer.fit(model)
+
+    # make sure correct steps were called
+    assert model.training_step_called
+    assert not model.training_step_end_called
+    assert not model.training_epoch_end_called
+
+    # make sure correct metrics are logged (one per batch step as requested)
+    assert len(trainer.debug_logged_metrics) == epochs * (batches + 1)
+    epoch_metrics = [x for x in trainer.debug_logged_metrics if len(x) > 1]
+    assert len(epoch_metrics) == epochs
+    for batch_idx, logged_metrics in enumerate(epoch_metrics):
+        assert logged_metrics[f'epoch_log_and_pbar_acc1_e{batch_idx}'] == 14.0
+        assert logged_metrics[f'epoch_log_acc2_e{batch_idx}'] == 15.0
+        assert f'epoch_pbar_acc3_e{batch_idx}' not in logged_metrics
+        assert len(logged_metrics) == 3
+
+    # make sure we are using the correct metrics for callbacks
+    assert trainer.callback_metrics['early_stop_on'] == 171
+    assert trainer.callback_metrics['checkpoint_on'] == 171
+
+    # make sure pbar metrics are correct ang log metrics did not leak
+    for epoch_idx in range(epochs):
+        assert trainer.progress_bar_metrics[f'epoch_log_and_pbar_acc1_e{epoch_idx}'] == 14
+        assert trainer.progress_bar_metrics[f'epoch_pbar_acc3_e{epoch_idx}'] == 16
+        assert f'epoch_log_acc2_e{epoch_idx}' not in trainer.progress_bar_metrics
+
+    # make sure training outputs what is expected
+    for batch_idx, batch in enumerate(model.train_dataloader()):
+        break
+
+    out = trainer.run_training_batch(batch, batch_idx)
+    assert out.signal == 0
+    assert len(out.batch_log_metrics) == 0
+
+    train_step_out = out.training_step_output_for_epoch_end
+    assert isinstance(train_step_out, TrainResult)
+
+    assert 'minimize' in train_step_out
+    assert f'epoch_log_and_pbar_acc1_e{trainer.current_epoch}' in train_step_out
+    assert f'epoch_log_acc2_e{trainer.current_epoch}' in train_step_out
+
+    # make sure the optimizer closure returns the correct things
+    opt_closure_result = trainer.optimizer_closure(batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens)
+    assert opt_closure_result['loss'] == (42.0 * 3) + (15.0 * 3)
+
 test_training_step_result_log_step_only('')
+test_training_step_result_log_epoch_only('')
+print('a')
 
 def test_training_step_auto_reduce(tmpdir):
     # TODO: test that it gets reduced on epoch end

From 5c67538811a598efde23def9af60fb6269f56130 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sat, 18 Jul 2020 13:10:58 -0400
Subject: [PATCH 025/168] added train step structured result

---
 .../test_trainer_steps_result_return.py       | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index b317a51a453be..c32d698fe90ad 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -156,8 +156,28 @@ def test_training_step_result_log_epoch_only(tmpdir):
     opt_closure_result = trainer.optimizer_closure(batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens)
     assert opt_closure_result['loss'] == (42.0 * 3) + (15.0 * 3)
 
+
+def test_training_step_result_log_step_and_epoch(tmpdir):
+    """
+    Tests that only training_step can be used with TrainResult
+    Makes sure that things are routed to pbar, loggers and loss accordingly
+
+    Makes sure pbar and logs happen on epoch only when requested
+    """
+    # enable internal debugging actions
+    os.environ['PL_DEV_DEBUG'] = '1'
+
+    model = DeterministicModel()
+    model.training_step = model.training_step_result_log_epoch_and_step
+    model.training_step_end = None
+    model.training_epoch_end = None
+    model.val_dataloader = None
+    # TODO
+
+
 test_training_step_result_log_step_only('')
 test_training_step_result_log_epoch_only('')
+test_training_step_result_log_step_and_epoch('')
 print('a')
 
 def test_training_step_auto_reduce(tmpdir):

From 870259cd026ebfc930685358c9160cff58dc709a Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sat, 18 Jul 2020 14:21:16 -0400
Subject: [PATCH 026/168] added train step structured result

---
 pytorch_lightning/trainer/training_loop.py    |  7 +-
 tests/base/deterministic_model.py             |  9 ++-
 .../test_trainer_steps_result_return.py       | 66 +++++++++++++++++--
 3 files changed, 70 insertions(+), 12 deletions(-)

diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py
index b2831859dfd1b..053c3c166843b 100644
--- a/pytorch_lightning/trainer/training_loop.py
+++ b/pytorch_lightning/trainer/training_loop.py
@@ -588,7 +588,8 @@ def run_training_epoch_end(self, epoch_output):
         # track results
         # --------------------------
         # add the metrics to the loggers
-        self.log_metrics(epoch_log_metrics, {})
+        if epoch_log_metrics and len(epoch_log_metrics) > 0:
+            self.log_metrics(epoch_log_metrics, {})
 
         # add metrics to callbacks
         self.callback_metrics.update(epoch_callback_metrics)
@@ -611,7 +612,9 @@ def save_train_loop_metrics_to_loggers(self, batch_idx, batch_output):
         should_log_metrics = batch_idx % self.row_log_interval == 0 or self.should_stop
         if should_log_metrics or self.fast_dev_run:
             # logs user requested information to logger
-            self.log_metrics(batch_output.batch_log_metrics, batch_output.grad_norm_dic)
+            metrics = batch_output.batch_log_metrics
+            if len(metrics) > 0:
+                self.log_metrics(metrics, batch_output.grad_norm_dic)
 
     def save_loggers_in_training_loop(self, batch_idx):
         # when loggers should save to disk
diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index 78420a47c94db..fc1d8793ce184 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -128,9 +128,12 @@ def training_step_result_log_epoch_and_step(self, batch, batch_idx):
         acc = self.step(batch, batch_idx)
         result = TrainResult(minimize=acc)
 
-        result.log('step_epoch_log_and_pbar_acc1', torch.tensor(17).type_as(acc), on_epoch=True, prog_bar=True)
-        result.log('step_epoch_log_acc2', torch.tensor(18).type_as(acc), on_epoch=True)
-        result.log('step_epoch_pbar_acc3', torch.tensor(19).type_as(acc), on_epoch=True, logger=False, prog_bar=True)
+        val_1 = (5 + batch_idx) * (self.current_epoch + 1)
+        val_2 = (6 + batch_idx) * (self.current_epoch + 1)
+        val_3 = (7 + batch_idx) * (self.current_epoch + 1)
+        result.log(f'step_epoch_log_and_pbar_acc1', torch.tensor(val_1).type_as(acc), on_epoch=True, prog_bar=True)
+        result.log(f'step_epoch_log_acc2', torch.tensor(val_2).type_as(acc), on_epoch=True)
+        result.log(f'step_epoch_pbar_acc3', torch.tensor(val_3).type_as(acc), on_epoch=True, logger=False, prog_bar=True)
 
         self.training_step_called = True
         return result
diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index c32d698fe90ad..fe7d88cde1a44 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -2,6 +2,7 @@
 Tests to ensure that the training loop works with a dict
 """
 import os
+import torch
 from pytorch_lightning import Trainer
 from tests.base.deterministic_model import DeterministicModel
 from pytorch_lightning.core.step_result import Result, TrainResult, EvalResult
@@ -46,8 +47,8 @@ def test_training_step_result_log_step_only(tmpdir):
     assert not model.training_epoch_end_called
 
     # make sure correct metrics are logged (one per batch step as requested)
-    assert len(trainer.debug_logged_metrics) == batches + 1
-    for batch_idx, logged_metrics in enumerate(trainer.debug_logged_metrics[:-1]):
+    assert len(trainer.debug_logged_metrics) == batches
+    for batch_idx, logged_metrics in enumerate(trainer.debug_logged_metrics):
         assert logged_metrics[f'step_log_and_pbar_acc1_b{batch_idx}'] == 11.0
         assert logged_metrics[f'step_log_acc2_b{batch_idx}'] == 12.0
         assert f'step_pbar_acc3_b{batch_idx}' not in logged_metrics
@@ -118,8 +119,8 @@ def test_training_step_result_log_epoch_only(tmpdir):
     assert not model.training_epoch_end_called
 
     # make sure correct metrics are logged (one per batch step as requested)
-    assert len(trainer.debug_logged_metrics) == epochs * (batches + 1)
-    epoch_metrics = [x for x in trainer.debug_logged_metrics if len(x) > 1]
+    assert len(trainer.debug_logged_metrics) == epochs
+    epoch_metrics = trainer.debug_logged_metrics
     assert len(epoch_metrics) == epochs
     for batch_idx, logged_metrics in enumerate(epoch_metrics):
         assert logged_metrics[f'epoch_log_and_pbar_acc1_e{batch_idx}'] == 14.0
@@ -172,11 +173,62 @@ def test_training_step_result_log_step_and_epoch(tmpdir):
     model.training_step_end = None
     model.training_epoch_end = None
     model.val_dataloader = None
-    # TODO
 
+    epochs = 3
+    batches = 2
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        limit_train_batches=batches,
+        limit_val_batches=batches,
+        row_log_interval=1,
+        max_epochs=epochs,
+        weights_summary=None,
+    )
+    trainer.fit(model)
+
+    # make sure correct steps were called
+    assert model.training_step_called
+    assert not model.training_step_end_called
+    assert not model.training_epoch_end_called
+
+    # make sure correct metrics are logged (one per batch step as requested)
+    assert len(trainer.debug_logged_metrics) == (epochs * batches) + epochs
+    epoch_metrics = trainer.debug_logged_metrics
+    epoch_idx = -1
+    for i_start in range(0, len(epoch_metrics), batches + 1):
+        epoch_idx += 1
+        epoch_outputs = epoch_metrics[i_start: i_start + batches + 1]
+        mean_vals = {
+            'step_epoch_log_and_pbar_acc1': [],
+            'step_epoch_log_acc2': []
+        }
+
+        # make sure each batch logged the expected value
+        for batch_idx in range(len(epoch_outputs) - 1):
+            logged_metrics = epoch_outputs[batch_idx]
+
+            expected_val_1 = (5 + batch_idx) * (epoch_idx + 1)
+            expected_val_2 = (6 + batch_idx) * (epoch_idx + 1)
+            mean_vals['step_epoch_log_and_pbar_acc1'].append(torch.tensor(expected_val_1).float())
+            mean_vals['step_epoch_log_acc2'].append(torch.tensor(expected_val_2).float())
+            assert logged_metrics['step_epoch_log_and_pbar_acc1'] == expected_val_1
+            assert logged_metrics['step_epoch_log_acc2'] == expected_val_2
+            assert 'step_epoch_pbar_acc3' not in logged_metrics
+            assert len(logged_metrics) == 3
+
+        # make sure the metrics for the epoch end are actual means (the default reduce fx) or all the batches
+        epoch_end_metrics = epoch_outputs[-1]
+        eval_1 = torch.stack(mean_vals['step_epoch_log_and_pbar_acc1']).mean()
+        eval_2 = torch.stack(mean_vals['step_epoch_log_acc2']).mean()
+        assert epoch_end_metrics['step_epoch_log_and_pbar_acc1'] == eval_1
+        assert epoch_end_metrics['step_epoch_log_acc2'] == eval_2
+        assert 'step_epoch_pbar_acc3' not in epoch_end_metrics
+        assert len(logged_metrics) == 3
+
+    print('a')
 
-test_training_step_result_log_step_only('')
-test_training_step_result_log_epoch_only('')
+# test_training_step_result_log_step_only('')
+# test_training_step_result_log_epoch_only('')
 test_training_step_result_log_step_and_epoch('')
 print('a')
 

From 4a36ea5dfcd10c16f3e7a7ae5d4635809d3a5b97 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sat, 18 Jul 2020 14:49:03 -0400
Subject: [PATCH 027/168] added autoreduce for train step

---
 pytorch_lightning/trainer/logging.py          |  4 ++
 pytorch_lightning/trainer/trainer.py          |  1 +
 .../test_trainer_steps_result_return.py       | 67 ++++++++++++++++++-
 3 files changed, 69 insertions(+), 3 deletions(-)

diff --git a/pytorch_lightning/trainer/logging.py b/pytorch_lightning/trainer/logging.py
index 17b48aeface66..4425da3a20d46 100644
--- a/pytorch_lightning/trainer/logging.py
+++ b/pytorch_lightning/trainer/logging.py
@@ -84,6 +84,10 @@ def add_progress_bar_metrics(self, metrics):
 
             self.progress_bar_metrics[k] = v
 
+        if 'PL_DEV_DEBUG' in os.environ:
+            metrics['debug_epoch'] = self.current_epoch
+            self.debug_pbar_added_metrics.append(metrics)
+
     def metrics_to_scalars(self, metrics):
         new_metrics = {}
         for k, v in metrics.items():
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index b626280f17b5e..3b9ef8572b0f0 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -618,6 +618,7 @@ def __init__(
 
         # for debugging purposes only, track the logged metrics
         self.debug_logged_metrics = []
+        self.debug_pbar_added_metrics = []
 
         # Callback system
         self.on_init_end()
diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index fe7d88cde1a44..af3d875a8f575 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -225,10 +225,71 @@ def test_training_step_result_log_step_and_epoch(tmpdir):
         assert 'step_epoch_pbar_acc3' not in epoch_end_metrics
         assert len(logged_metrics) == 3
 
-    print('a')
+    # make sure we are using the correct metrics for callbacks
+    assert trainer.callback_metrics['early_stop_on'] == 171
+    assert trainer.callback_metrics['checkpoint_on'] == 171
+
+    # -------------------------------
+    # VERIFY PBAR METRICS
+    # -------------------------------
+    # make sure pbar metrics are correct ang log metrics did not leak
+    all_pbar_metrics = trainer.debug_pbar_added_metrics
+    assert len(all_pbar_metrics) == (epochs * batches) + epochs
+
+    epoch_idx = -1
+    for i_start in range(0, len(all_pbar_metrics), batches + 1):
+        epoch_idx += 1
+        epoch_outputs = all_pbar_metrics[i_start: i_start + batches + 1]
+        mean_vals = {
+            'step_epoch_log_and_pbar_acc1': [],
+            'step_epoch_pbar_acc3': []
+        }
+
+        # make sure each batch logged the expected value
+        for batch_idx in range(len(epoch_outputs) - 1):
+            logged_metrics = epoch_outputs[batch_idx]
+
+            expected_val_1 = (5 + batch_idx) * (epoch_idx + 1)
+            expected_val_2 = (7 + batch_idx) * (epoch_idx + 1)
+            mean_vals['step_epoch_log_and_pbar_acc1'].append(torch.tensor(expected_val_1).float())
+            mean_vals['step_epoch_pbar_acc3'].append(torch.tensor(expected_val_2).float())
+            assert logged_metrics['step_epoch_log_and_pbar_acc1'] == expected_val_1
+            assert logged_metrics['step_epoch_pbar_acc3'] == expected_val_2
+            assert 'step_epoch_log_acc2' not in logged_metrics
+            assert len(logged_metrics) == 3
+
+        # make sure the metrics for the epoch end are actual means (the default reduce fx) or all the batches
+        epoch_end_metrics = epoch_outputs[-1]
+        eval_1 = torch.stack(mean_vals['step_epoch_log_and_pbar_acc1']).mean()
+        eval_2 = torch.stack(mean_vals['step_epoch_pbar_acc3']).mean()
+        assert epoch_end_metrics['step_epoch_log_and_pbar_acc1'] == eval_1
+        assert epoch_end_metrics['step_epoch_pbar_acc3'] == eval_2
+        assert 'step_epoch_log_acc2' not in epoch_end_metrics
+        assert len(logged_metrics) == 3
+
+    # -----------------------------------------
+    # make sure training outputs what is expected
+    # -----------------------------------------
+    for batch_idx, batch in enumerate(model.train_dataloader()):
+        break
+
+    out = trainer.run_training_batch(batch, batch_idx)
+    assert out.signal == 0
+    assert len(out.batch_log_metrics) == 2
+
+    train_step_out = out.training_step_output_for_epoch_end
+    assert isinstance(train_step_out, TrainResult)
+
+    assert 'minimize' in train_step_out
+    assert f'step_epoch_log_and_pbar_acc1' in train_step_out
+    assert f'step_epoch_log_acc2' in train_step_out
+
+    # make sure the optimizer closure returns the correct things
+    opt_closure_result = trainer.optimizer_closure(batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens)
+    assert opt_closure_result['loss'] == (42.0 * 3) + (15.0 * 3)
 
-# test_training_step_result_log_step_only('')
-# test_training_step_result_log_epoch_only('')
+test_training_step_result_log_step_only('')
+test_training_step_result_log_epoch_only('')
 test_training_step_result_log_step_and_epoch('')
 print('a')
 

From 4837cf4461357db6c427e36e12d842ec7aa8c8c9 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sat, 18 Jul 2020 15:12:02 -0400
Subject: [PATCH 028/168] added auto reduce on train

---
 tests/base/deterministic_model.py             | 14 ++--
 .../test_trainer_steps_result_return.py       | 82 +++++++++++--------
 2 files changed, 55 insertions(+), 41 deletions(-)

diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index fc1d8793ce184..96e879f921119 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -138,7 +138,7 @@ def training_step_result_log_epoch_and_step(self, batch, batch_idx):
         self.training_step_called = True
         return result
 
-    def training_epoch_end_return(self, result):
+    def training_epoch_end_return_for_log_epoch_and_step(self, result):
         """
         There should be an array of scalars without graphs that are all 171 (4 of them)
         """
@@ -150,12 +150,12 @@ def training_epoch_end_return(self, result):
             # only saw 4 batches
             assert isinstance(result, TrainResult)
 
-        result.log_acc2 = result.log_acc2.mean() + 11
-        result.log_and_pbar_acc1 = result.log_and_pbar_acc1.mean() + 11
-        result.pbar_acc3 = result.pbar_acc3.mean() + 11
-        result.log('epoch_end_log_acc', torch.tensor(1212).type_as(result.pbar_acc3), logger=True)
-        result.log('epoch_end_pbar_acc', torch.tensor(1213).type_as(result.pbar_acc3), logger=False, prog_bar=True)
-        result.log('epoch_end_log_pbar_acc', torch.tensor(1214).type_as(result.pbar_acc3), logger=True, prog_bar=True)
+        result.step_epoch_log_and_pbar_acc1 = result.step_epoch_log_and_pbar_acc1.prod()
+        result.step_epoch_log_acc2 = result.step_epoch_log_acc2.prod()
+        result.step_epoch_pbar_acc3 = result.step_epoch_pbar_acc3.prod()
+        result.log('epoch_end_log_acc', torch.tensor(1212).type_as(result.step_epoch_log_acc2), logger=True, on_epoch=True)
+        result.log('epoch_end_pbar_acc', torch.tensor(1213).type_as(result.step_epoch_log_acc2), logger=False, prog_bar=True, on_epoch=True)
+        result.log('epoch_end_log_pbar_acc', torch.tensor(1214).type_as(result.step_epoch_log_acc2), logger=True, prog_bar=True, on_epoch=True)
         return result
 
     # --------------------------
diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index af3d875a8f575..d402e98ef8371 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -288,29 +288,6 @@ def test_training_step_result_log_step_and_epoch(tmpdir):
     opt_closure_result = trainer.optimizer_closure(batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens)
     assert opt_closure_result['loss'] == (42.0 * 3) + (15.0 * 3)
 
-test_training_step_result_log_step_only('')
-test_training_step_result_log_epoch_only('')
-test_training_step_result_log_step_and_epoch('')
-print('a')
-
-def test_training_step_auto_reduce(tmpdir):
-    # TODO: test that it gets reduced on epoch end
-    # TODO: test that on batch end gets reduced
-
-    os.environ['PL_DEV_DEBUG'] = '1'
-
-    model = DeterministicModel()
-    model.training_step = model.training_step_result_return
-    model.val_dataloader = None
-
-    trainer = Trainer(
-        default_root_dir=tmpdir,
-        max_epochs=1,
-        weights_summary=None,
-    )
-    trainer.fit(model)
-
-
 
 def test_training_step_epoch_end_result(tmpdir):
     """
@@ -319,13 +296,17 @@ def test_training_step_epoch_end_result(tmpdir):
     os.environ['PL_DEV_DEBUG'] = '1'
 
     model = DeterministicModel()
-    model.training_step = model.training_step_result_return
-    model.training_epoch_end = model.training_epoch_end_return
+    model.training_step = model.training_step_result_log_epoch_and_step
+    model.training_epoch_end = model.training_epoch_end_return_for_log_epoch_and_step
     model.val_dataloader = None
 
+    batches = 3
+    epochs = 1
     trainer = Trainer(
         default_root_dir=tmpdir,
-        max_epochs=1,
+        max_epochs=epochs,
+        row_log_interval=1,
+        limit_train_batches=batches,
         weights_summary=None,
     )
     trainer.fit(model)
@@ -336,16 +317,22 @@ def test_training_step_epoch_end_result(tmpdir):
     assert model.training_epoch_end_called
 
     # make sure correct metrics were logged
-    logged_metrics = trainer.debug_logged_metrics[-1]
-    assert logged_metrics['log_and_pbar_acc1'] == 23.0
-    assert logged_metrics['log_acc2'] == 18.0
-    assert logged_metrics['epoch_end_log_acc'] == 1212.0
-    assert logged_metrics['epoch_end_log_pbar_acc'] == 1214.0
-    assert 'epoch_end_pbar_acc' not in logged_metrics
+    logged_metrics = trainer.debug_logged_metrics
+    assert len(logged_metrics) == (epochs * batches) + epochs
+    last_logged = logged_metrics[-1]
+
+    assert last_logged['step_epoch_log_and_pbar_acc1'] == 210.0
+    assert last_logged['step_epoch_log_acc2'] == 336.0
+    assert last_logged['epoch_end_log_acc'] == 1212.0
+    assert last_logged['epoch_end_log_pbar_acc'] == 1214.0
+    assert 'epoch_end_pbar_acc' not in last_logged
 
     # make sure pbar metrics are correct
-    assert trainer.progress_bar_metrics['log_and_pbar_acc1'] == 23.0
-    assert trainer.progress_bar_metrics['pbar_acc3'] == 28.0
+    logged_pbar = trainer.debug_pbar_added_metrics
+    assert len(logged_pbar) == (epochs * batches) + epochs
+
+    assert trainer.progress_bar_metrics['step_epoch_log_and_pbar_acc1'] == 210.0
+    assert trainer.progress_bar_metrics['step_epoch_pbar_acc3'] == 504.0
     assert trainer.progress_bar_metrics['epoch_end_pbar_acc'] == 1213.0
     assert trainer.progress_bar_metrics['epoch_end_log_pbar_acc'] == 1214.0
     assert 'epoch_end_log_acc' not in trainer.progress_bar_metrics
@@ -354,3 +341,30 @@ def test_training_step_epoch_end_result(tmpdir):
     # make sure callback metrics didn't change
     assert trainer.callback_metrics['early_stop_on'] == 171
     assert trainer.callback_metrics['checkpoint_on'] == 171
+
+    # -----------------------------------------
+    # make sure training outputs what is expected
+    # -----------------------------------------
+    for batch_idx, batch in enumerate(model.train_dataloader()):
+        break
+
+    out = trainer.run_training_batch(batch, batch_idx)
+    assert out.signal == 0
+    assert len(out.batch_log_metrics) == 2
+
+    train_step_out = out.training_step_output_for_epoch_end
+    assert isinstance(train_step_out, TrainResult)
+
+    assert 'minimize' in train_step_out
+    assert f'step_epoch_log_and_pbar_acc1' in train_step_out
+    assert f'step_epoch_log_acc2' in train_step_out
+
+    # make sure the optimizer closure returns the correct things
+    opt_closure_result = trainer.optimizer_closure(batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens)
+    assert opt_closure_result['loss'] == (42.0 * 3) + (15.0 * 3)
+
+test_training_step_result_log_step_only('')
+test_training_step_result_log_epoch_only('')
+test_training_step_result_log_step_and_epoch('')
+test_training_step_epoch_end_result('')
+print('a')

From 63789856d13ef50d4e332050bd48c91aec1f8b2b Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sat, 18 Jul 2020 15:12:14 -0400
Subject: [PATCH 029/168] added auto reduce on train

---
 tests/trainer/test_trainer_steps_result_return.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index d402e98ef8371..c2e5a770ad69f 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -362,9 +362,3 @@ def test_training_step_epoch_end_result(tmpdir):
     # make sure the optimizer closure returns the correct things
     opt_closure_result = trainer.optimizer_closure(batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens)
     assert opt_closure_result['loss'] == (42.0 * 3) + (15.0 * 3)
-
-test_training_step_result_log_step_only('')
-test_training_step_result_log_epoch_only('')
-test_training_step_result_log_step_and_epoch('')
-test_training_step_epoch_end_result('')
-print('a')

From f7f654a5e5829ede14818dff7ba4e6fbe359f52b Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sat, 18 Jul 2020 15:12:40 -0400
Subject: [PATCH 030/168] added auto reduce on train

---
 tests/trainer/test_trainer_steps_result_return.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index c2e5a770ad69f..88f20f32a2d08 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -10,9 +10,6 @@
 
 # TODOs:
 # make checkpoint and early stopping use the correct metrics
-# make sure step_ends receive a plain dict
-# same for epoch_end
-# make sure to auto-reduce when no epoch_end is implemented
 
 def test_training_step_result_log_step_only(tmpdir):
     """

From 73fd54b043dae4cb0d0a4694221028fd6e59486c Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sat, 18 Jul 2020 15:13:23 -0400
Subject: [PATCH 031/168] added auto reduce on train

---
 tests/trainer/test_trainer_steps_result_return.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 88f20f32a2d08..42dafd3864878 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -10,6 +10,7 @@
 
 # TODOs:
 # make checkpoint and early stopping use the correct metrics
+# test with train_step_end
 
 def test_training_step_result_log_step_only(tmpdir):
     """

From b3f38c2594ce2dd507b1eb98fa568acbb85f464a Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sat, 18 Jul 2020 15:14:10 -0400
Subject: [PATCH 032/168] added auto reduce on train

---
 tests/trainer/test_trainer_steps_result_return.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 42dafd3864878..5ee2be98e3a43 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -11,6 +11,7 @@
 # TODOs:
 # make checkpoint and early stopping use the correct metrics
 # test with train_step_end
+# add logging + row interval tests
 
 def test_training_step_result_log_step_only(tmpdir):
     """

From 6e64ba95bc5e8c43ac30a79cf1bfd0c66fdddae2 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sat, 18 Jul 2020 15:52:49 -0400
Subject: [PATCH 033/168] added auto reduce on train

---
 pytorch_lightning/callbacks/early_stopping.py |  5 +++
 .../callbacks/model_checkpoint.py             |  5 +++
 pytorch_lightning/core/step_result.py         |  2 -
 pytorch_lightning/trainer/trainer.py          |  5 ++-
 pytorch_lightning/trainer/training_loop.py    |  7 ++-
 tests/base/deterministic_model.py             | 19 ++++++++
 .../test_trainer_steps_result_return.py       | 43 +++++++++++++++++++
 7 files changed, 82 insertions(+), 4 deletions(-)

diff --git a/pytorch_lightning/callbacks/early_stopping.py b/pytorch_lightning/callbacks/early_stopping.py
index 544854fa4e983..4248ca2cfabd9 100644
--- a/pytorch_lightning/callbacks/early_stopping.py
+++ b/pytorch_lightning/callbacks/early_stopping.py
@@ -142,6 +142,11 @@ def on_validation_end(self, trainer, pl_module):
 
     def _run_early_stopping_check(self, trainer, pl_module):
         logs = trainer.callback_metrics
+
+        # support structured results
+        if 'early_stop_on' in logs and logs['early_stop_on'] is not None:
+            self.monitor = 'early_stop_on'
+
         if not self._validate_condition_metric(logs):
             return  # short circuit if metric not present
 
diff --git a/pytorch_lightning/callbacks/model_checkpoint.py b/pytorch_lightning/callbacks/model_checkpoint.py
index f70d8d8d0a5e1..d3e8544f85217 100644
--- a/pytorch_lightning/callbacks/model_checkpoint.py
+++ b/pytorch_lightning/callbacks/model_checkpoint.py
@@ -270,6 +270,11 @@ def on_validation_end(self, trainer, pl_module):
 
         metrics = trainer.callback_metrics
         epoch = trainer.current_epoch
+
+        # support structured results
+        if 'checkpoint_on' in metrics and metrics['checkpoint_on'] is not None:
+            self.monitor = 'checkpoint_on'
+
         if self.save_top_k == 0:
             # no models are saved
             return
diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
index ec1f6a3e1014e..117e1ea45a325 100644
--- a/pytorch_lightning/core/step_result.py
+++ b/pytorch_lightning/core/step_result.py
@@ -27,8 +27,6 @@ def __init__(
             self._assert_grad_tensor_metric('minimize', minimize, err)
             self.minimize = minimize
 
-        if minimize is not None and early_stop_on is None:
-            self.early_stop_on = minimize.detach()
         if minimize is not None and checkpoint_on is None:
             self.checkpoint_on = minimize.detach()
 
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index 3b9ef8572b0f0..80896005f33dd 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -616,9 +616,12 @@ def __init__(
 
         self.on_colab_kaggle = os.getenv('COLAB_GPU') or os.getenv('KAGGLE_URL_BASE')
 
-        # for debugging purposes only, track the logged metrics
+        # ---------------------------
+        # only active when debugging PL for dev purposes and tests
+        # ---------------------------
         self.debug_logged_metrics = []
         self.debug_pbar_added_metrics = []
+        self.debug_saved_losses = []
 
         # Callback system
         self.on_init_end()
diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py
index 053c3c166843b..862a42be8d327 100644
--- a/pytorch_lightning/trainer/training_loop.py
+++ b/pytorch_lightning/trainer/training_loop.py
@@ -143,7 +143,7 @@ def training_step(self, batch, batch_idx):
     trainer = Trainer(terminate_on_nan=True)
 
 """
-
+import os
 import subprocess
 from abc import ABC, abstractmethod
 from typing import Callable
@@ -925,6 +925,11 @@ def optimizer_closure(self, split_batch, batch_idx, opt_idx, optimizer, hiddens)
             with self.profiler.profile('on_after_backward'):
                 model_ref.on_after_backward()
 
+        # when in dev debugging track the losses
+        if 'PL_DEV_DEBUG' in os.environ:
+            loss_dict = {'batch_idx': batch_idx, 'epoch': self.current_epoch, 'loss': untouched_loss.detach()}
+            self.debug_saved_losses.append(loss_dict)
+
         result = AttributeDict(
             loss=untouched_loss,
             training_step_output=training_step_output,
diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index 96e879f921119..a21af96da95f8 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -101,6 +101,25 @@ def training_epoch_end_scalar(self, outputs):
     # --------------------------
     # Result returns
     # --------------------------
+    def training_step_no_default_callbacks_for_train_loop(self, batch, batch_idx):
+        """
+        Early stop and checkpoint only on these values
+        """
+        acc = self.step(batch, batch_idx)
+        result = TrainResult(minimize=acc)
+        assert 'early_step_on' not in result
+        assert 'checkpoint_on' in result
+        return result
+
+    def training_step_result_log_epoch_and_step_for_callbacks(self, batch, batch_idx):
+        """
+        Early stop and checkpoint only on these values
+        """
+        losses = [20, 19, 18, 10, 15, 14, 9, 11, 11, 20, 22]
+        loss = losses[batch_idx]
+        result = TrainResult(minimize=loss, early_stop_on=loss, checkpoint_on=loss)
+        return result
+
     def training_step_result_log_step_only(self, batch, batch_idx):
         acc = self.step(batch, batch_idx)
         result = TrainResult(minimize=acc)
diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 5ee2be98e3a43..aa9ef0765acfd 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -361,3 +361,46 @@ def test_training_step_epoch_end_result(tmpdir):
     # make sure the optimizer closure returns the correct things
     opt_closure_result = trainer.optimizer_closure(batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens)
     assert opt_closure_result['loss'] == (42.0 * 3) + (15.0 * 3)
+
+
+def test_no_auto_callbacks_with_train_loop_only(tmpdir):
+    """
+    Make sure early stop + checkpoint work with only a train loop
+    """
+    os.environ['PL_DEV_DEBUG'] = '1'
+
+    model = DeterministicModel()
+    model.training_step = model.training_step_no_default_callbacks_for_train_loop
+    model.training_epoch_end = None
+    model.val_dataloader = None
+
+    batches = 3
+    epochs = 3
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_epochs=epochs,
+        row_log_interval=1,
+        limit_train_batches=batches,
+        weights_summary=None,
+    )
+    trainer.fit(model)
+
+    all_losses = trainer.debug_saved_losses
+    assert len(all_losses) == batches * epochs
+
+    assert trainer.checkpoint_callback.monitor == 'checkpoint_on'
+    assert trainer.early_stop_callback is None
+
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        early_stop_callback=True,
+        max_epochs=epochs,
+        row_log_interval=1,
+        limit_train_batches=batches,
+        weights_summary=None,
+    )
+    trainer.fit(model)
+
+    assert trainer.early_stop_callback.monitor == 'val_loss'
+
+test_no_auto_callbacks_with_train_loop_only('')
\ No newline at end of file

From 1b24903312f248c8a6b13fbe50413619eb18298f Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sat, 18 Jul 2020 16:08:52 -0400
Subject: [PATCH 034/168] added hooks

---
 pytorch_lightning/callbacks/base.py           | 24 +++++++++++++++
 pytorch_lightning/callbacks/early_stopping.py |  2 ++
 pytorch_lightning/trainer/callback_hook.py    | 30 +++++++++++++++++++
 tests/base/deterministic_model.py             | 16 ++++++----
 .../test_trainer_steps_result_return.py       | 26 +++++++++++++++-
 5 files changed, 92 insertions(+), 6 deletions(-)

diff --git a/pytorch_lightning/callbacks/base.py b/pytorch_lightning/callbacks/base.py
index dac8ddc11c093..37ef84c796ec2 100644
--- a/pytorch_lightning/callbacks/base.py
+++ b/pytorch_lightning/callbacks/base.py
@@ -46,6 +46,30 @@ def on_sanity_check_end(self, trainer, pl_module):
         """Called when the validation sanity check ends."""
         pass
 
+    def on_train_epoch_start(self, trainer, pl_module):
+        """Called when the train epoch begins."""
+        pass
+
+    def on_train_epoch_end(self, trainer, pl_module):
+        """Called when the train epoch ends."""
+        pass
+
+    def on_val_epoch_start(self, trainer, pl_module):
+        """Called when the val epoch begins."""
+        pass
+
+    def on_val_epoch_end(self, trainer, pl_module):
+        """Called when the val epoch ends."""
+        pass
+
+    def on_test_epoch_start(self, trainer, pl_module):
+        """Called when the test epoch begins."""
+        pass
+
+    def on_test_epoch_end(self, trainer, pl_module):
+        """Called when the test epoch ends."""
+        pass
+
     def on_epoch_start(self, trainer, pl_module):
         """Called when the epoch begins."""
         pass
diff --git a/pytorch_lightning/callbacks/early_stopping.py b/pytorch_lightning/callbacks/early_stopping.py
index 4248ca2cfabd9..78ca2c31ae9c1 100644
--- a/pytorch_lightning/callbacks/early_stopping.py
+++ b/pytorch_lightning/callbacks/early_stopping.py
@@ -140,6 +140,8 @@ def on_sanity_check_end(self, trainer, pl_module):
     def on_validation_end(self, trainer, pl_module):
         self._run_early_stopping_check(trainer, pl_module)
 
+    def on_epoch_end(self, trainer, pl_module):
+
     def _run_early_stopping_check(self, trainer, pl_module):
         logs = trainer.callback_metrics
 
diff --git a/pytorch_lightning/trainer/callback_hook.py b/pytorch_lightning/trainer/callback_hook.py
index 50ea8bb7ce3c4..6266cccc25f1e 100644
--- a/pytorch_lightning/trainer/callback_hook.py
+++ b/pytorch_lightning/trainer/callback_hook.py
@@ -51,6 +51,36 @@ def on_sanity_check_end(self):
         for callback in self.callbacks:
             callback.on_sanity_check_end(self, self.get_model())
 
+    def on_train_epoch_start(self):
+        """Called when the epoch begins."""
+        for callback in self.callbacks:
+            callback.on_train_epoch_start(self, self.get_model())
+
+    def on_train_epoch_end(self):
+        """Called when the epoch begins."""
+        for callback in self.callbacks:
+            callback.on_train_epoch_end(self, self.get_model())
+
+    def on_val_epoch_start(self):
+        """Called when the epoch begins."""
+        for callback in self.callbacks:
+            callback.on_val_epoch_start(self, self.get_model())
+
+    def on_val_epoch_end(self):
+        """Called when the epoch begins."""
+        for callback in self.callbacks:
+            callback.on_val_epoch_end(self, self.get_model())
+
+    def on_test_epoch_start(self):
+        """Called when the epoch begins."""
+        for callback in self.callbacks:
+            callback.on_test_epoch_start(self, self.get_model())
+
+    def on_test_epoch_end(self):
+        """Called when the epoch begins."""
+        for callback in self.callbacks:
+            callback.on_test_epoch_end(self, self.get_model())
+
     def on_epoch_start(self):
         """Called when the epoch begins."""
         for callback in self.callbacks:
diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index a21af96da95f8..77fc30c2c4f70 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -20,6 +20,8 @@ def __init__(self, weights=None):
         self.validation_step_end_called = False
         self.validation_epoch_end_called = False
 
+        self.assert_backward = True
+
         self.l1 = nn.Linear(2, 3, bias=False)
         if weights is None:
             weights = torch.tensor([
@@ -115,8 +117,11 @@ def training_step_result_log_epoch_and_step_for_callbacks(self, batch, batch_idx
         """
         Early stop and checkpoint only on these values
         """
+        acc = self.step(batch, batch_idx)
+
+        self.assert_backward = False
         losses = [20, 19, 18, 10, 15, 14, 9, 11, 11, 20, 22]
-        loss = losses[batch_idx]
+        loss = acc + losses[batch_idx]
         result = TrainResult(minimize=loss, early_stop_on=loss, checkpoint_on=loss)
         return result
 
@@ -311,10 +316,11 @@ def configure_optimizers(self):
         return torch.optim.Adam(self.parameters(), lr=0)
 
     def backward(self, trainer, loss, optimizer, optimizer_idx):
-        if self.trainer.precision == 16:
-            assert loss > 171 * 1000
-        else:
-            assert loss == 171.0
+        if self.assert_backward:
+            if self.trainer.precision == 16:
+                assert loss > 171 * 1000
+            else:
+                assert loss == 171.0
         loss.backward()
 
 
diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index aa9ef0765acfd..038901339d02e 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -403,4 +403,28 @@ def test_no_auto_callbacks_with_train_loop_only(tmpdir):
 
     assert trainer.early_stop_callback.monitor == 'val_loss'
 
-test_no_auto_callbacks_with_train_loop_only('')
\ No newline at end of file
+
+def test_use_callbacks_with_train_loop_only(tmpdir):
+    os.environ['PL_DEV_DEBUG'] = '1'
+
+    model = DeterministicModel()
+    model.training_step = model.training_step_result_log_epoch_and_step_for_callbacks
+    model.training_epoch_end = None
+    model.val_dataloader = None
+
+    batches = 3
+    epochs = 300
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_epochs=epochs,
+        early_stop_callback=True,
+        row_log_interval=1,
+        limit_train_batches=batches,
+        weights_summary=None,
+    )
+    trainer.fit(model)
+
+    all_losses = trainer.debug_saved_losses
+    assert len(all_losses) == batches * epochs
+
+test_use_callbacks_with_train_loop_only('')
\ No newline at end of file

From eae4d6b5dcf43f561219d9e4028076d68a5e07f8 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sat, 18 Jul 2020 16:31:47 -0400
Subject: [PATCH 035/168] added hooks

---
 pytorch_lightning/callbacks/early_stopping.py | 18 +++++++---
 pytorch_lightning/core/hooks.py               | 36 +++++++++++++++++++
 pytorch_lightning/trainer/training_loop.py    | 19 ++++++++++
 tests/base/deterministic_model.py             |  5 +--
 .../test_trainer_steps_result_return.py       |  3 +-
 5 files changed, 73 insertions(+), 8 deletions(-)

diff --git a/pytorch_lightning/callbacks/early_stopping.py b/pytorch_lightning/callbacks/early_stopping.py
index 78ca2c31ae9c1..6fcbeaa1e92e4 100644
--- a/pytorch_lightning/callbacks/early_stopping.py
+++ b/pytorch_lightning/callbacks/early_stopping.py
@@ -140,15 +140,23 @@ def on_sanity_check_end(self, trainer, pl_module):
     def on_validation_end(self, trainer, pl_module):
         self._run_early_stopping_check(trainer, pl_module)
 
-    def on_epoch_end(self, trainer, pl_module):
+    def on_train_epoch_end(self, trainer, pl_module):
+        # early stopping can also work in the train loop when there is no val loop and when using structured results
+        should_check_early_stop = False
+        if 'early_stop_on' in trainer.callback_metrics and trainer.callback_metrics['early_stop_on'] is not None:
+            self.monitor = 'early_stop_on'
+            should_check_early_stop = True
+
+        if 'val_early_stop_on' in trainer.callback_metrics and trainer.callback_metrics['val_early_stop_on'] is not None:
+            self.monitor = 'val_early_stop_on'
+            should_check_early_stop = True
+
+        if should_check_early_stop:
+            self._run_early_stopping_check(trainer, pl_module)
 
     def _run_early_stopping_check(self, trainer, pl_module):
         logs = trainer.callback_metrics
 
-        # support structured results
-        if 'early_stop_on' in logs and logs['early_stop_on'] is not None:
-            self.monitor = 'early_stop_on'
-
         if not self._validate_condition_metric(logs):
             return  # short circuit if metric not present
 
diff --git a/pytorch_lightning/core/hooks.py b/pytorch_lightning/core/hooks.py
index d8c2181251b45..60e93aa275d93 100644
--- a/pytorch_lightning/core/hooks.py
+++ b/pytorch_lightning/core/hooks.py
@@ -115,6 +115,42 @@ def on_epoch_end(self) -> None:
         """
         # do something when the epoch ends
 
+    def on_train_epoch_start(self) -> None:
+        """
+        Called in the training loop at the very beginning of the epoch.
+        """
+        # do something when the epoch starts
+
+    def on_train_epoch_end(self) -> None:
+        """
+        Called in the training loop at the very end of the epoch.
+        """
+        # do something when the epoch ends
+
+    def on_val_epoch_start(self) -> None:
+        """
+        Called in the training loop at the very beginning of the epoch.
+        """
+        # do something when the epoch starts
+
+    def on_val_epoch_end(self) -> None:
+        """
+        Called in the training loop at the very end of the epoch.
+        """
+        # do something when the epoch ends
+
+    def on_test_epoch_start(self) -> None:
+        """
+        Called in the training loop at the very beginning of the epoch.
+        """
+        # do something when the epoch starts
+
+    def on_test_epoch_end(self) -> None:
+        """
+        Called in the training loop at the very end of the epoch.
+        """
+        # do something when the epoch ends
+
     def on_pre_performance_check(self) -> None:
         """
         Called at the very beginning of the validation loop.
diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py
index 862a42be8d327..2327dd6b09234 100644
--- a/pytorch_lightning/trainer/training_loop.py
+++ b/pytorch_lightning/trainer/training_loop.py
@@ -253,6 +253,8 @@ class TrainerTrainLoopMixin(ABC):
     on_epoch_end: Callable
     on_validation_end: Callable
     on_keyboard_interrupt: Callable
+    on_train_epoch_start: Callable
+    on_train_epoch_end: Callable
 
     @abstractmethod
     def get_model(self) -> LightningModule:
@@ -422,6 +424,15 @@ def run_on_epoch_start_hook(self, model):
             if self.is_function_implemented('on_epoch_start'):
                 model.on_epoch_start()
 
+        # Epoch start events
+        with self.profiler.profile('on_train_epoch_start'):
+            # callbacks
+            self.on_train_epoch_start()
+
+            # model hooks
+            if self.is_function_implemented('on_train_epoch_start'):
+                model.on_train_epoch_start()
+
     def run_training_epoch(self):
 
         # get model
@@ -529,6 +540,14 @@ def run_on_epoch_end_hook(self, model):
             if self.is_function_implemented('on_epoch_end'):
                 model.on_epoch_end()
 
+        with self.profiler.profile('on_train_epoch_end'):
+            # callbacks
+            self.on_train_epoch_end()
+
+            # model hooks
+            if self.is_function_implemented('on_train_epoch_end'):
+                model.on_train_epoch_end()
+
     def run_training_epoch_end(self, epoch_output):
         model = self.get_model()
         is_result_obj = len(epoch_output) > 0 and isinstance(epoch_output[0], Result)
diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index 77fc30c2c4f70..46c0c31039793 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -120,8 +120,9 @@ def training_step_result_log_epoch_and_step_for_callbacks(self, batch, batch_idx
         acc = self.step(batch, batch_idx)
 
         self.assert_backward = False
-        losses = [20, 19, 18, 10, 15, 14, 9, 11, 11, 20, 22]
-        loss = acc + losses[batch_idx]
+        losses = [20, 19, 18, 10, 15, 14, 9, 11, 11, 20, 22, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30]
+        idx = batch_idx + (self.current_epoch * 3)
+        loss = acc + losses[idx]
         result = TrainResult(minimize=loss, early_stop_on=loss, checkpoint_on=loss)
         return result
 
diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 038901339d02e..b6fb56f5911c2 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -424,7 +424,8 @@ def test_use_callbacks_with_train_loop_only(tmpdir):
     )
     trainer.fit(model)
 
+    # TODO: finish test to make sure early stopping happened when expected
     all_losses = trainer.debug_saved_losses
-    assert len(all_losses) == batches * epochs
+    assert len(all_losses) == 12
 
 test_use_callbacks_with_train_loop_only('')
\ No newline at end of file

From 6102c8a3f0fef8a44cafbcbcc717457b3d3abd5d Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 07:45:09 -0400
Subject: [PATCH 036/168] added hooks

---
 pytorch_lightning/callbacks/early_stopping.py | 14 +++++
 pytorch_lightning/trainer/logging.py          |  7 +--
 pytorch_lightning/trainer/supporters.py       | 14 +++++
 pytorch_lightning/trainer/trainer.py          |  8 +--
 pytorch_lightning/trainer/training_loop.py    | 52 +++++++++++--------
 .../test_trainer_steps_result_return.py       | 20 +++----
 6 files changed, 72 insertions(+), 43 deletions(-)

diff --git a/pytorch_lightning/callbacks/early_stopping.py b/pytorch_lightning/callbacks/early_stopping.py
index 6fcbeaa1e92e4..41a6865e61788 100644
--- a/pytorch_lightning/callbacks/early_stopping.py
+++ b/pytorch_lightning/callbacks/early_stopping.py
@@ -7,6 +7,7 @@
 """
 from copy import deepcopy
 
+import os
 import numpy as np
 import torch
 import torch.distributed as dist
@@ -161,6 +162,19 @@ def _run_early_stopping_check(self, trainer, pl_module):
             return  # short circuit if metric not present
 
         current = logs.get(self.monitor)
+
+        # track values for dev debugging
+        if 'PL_DEV_DEBUG' in os.environ:
+            debug_dict = {
+                'epoch': trainer.current_epoch,
+                'global_step': trainer.global_step,
+                'rank': trainer.global_rank,
+                'current': current,
+                'best': self.best_score,
+                'patience': self.wait_count
+            }
+            trainer.debug_early_stopping_values.append(debug_dict)
+
         if not isinstance(current, torch.Tensor):
             current = torch.tensor(current, device=pl_module.device)
 
diff --git a/pytorch_lightning/trainer/logging.py b/pytorch_lightning/trainer/logging.py
index 4425da3a20d46..e7c9d7636d9ae 100644
--- a/pytorch_lightning/trainer/logging.py
+++ b/pytorch_lightning/trainer/logging.py
@@ -74,8 +74,7 @@ def log_metrics(self, metrics, grad_norm_dic, step=None):
             self.logger.agg_and_log_metrics(scalar_metrics, step=step)
             self.logger.save()
 
-            if 'PL_DEV_DEBUG' in os.environ:
-                self.debug_logged_metrics.append(scalar_metrics)
+            self.dev_debugger.track_logged_metrics(scalar_metrics)
 
     def add_progress_bar_metrics(self, metrics):
         for k, v in metrics.items():
@@ -84,9 +83,7 @@ def add_progress_bar_metrics(self, metrics):
 
             self.progress_bar_metrics[k] = v
 
-        if 'PL_DEV_DEBUG' in os.environ:
-            metrics['debug_epoch'] = self.current_epoch
-            self.debug_pbar_added_metrics.append(metrics)
+        self.dev_debugger.track_pbar_metrics(self, metrics)
 
     def metrics_to_scalars(self, metrics):
         new_metrics = {}
diff --git a/pytorch_lightning/trainer/supporters.py b/pytorch_lightning/trainer/supporters.py
index fcd21becfeac0..8853d7aaa05b0 100644
--- a/pytorch_lightning/trainer/supporters.py
+++ b/pytorch_lightning/trainer/supporters.py
@@ -76,3 +76,17 @@ def _agg_memory(self, how: str):
                 return getattr(self.memory, how)()
             else:
                 return getattr(self.memory[:self.current_idx], how)()
+
+
+class Accumulator(object):
+    def __init__(self):
+        self.num_values = 0
+        self.total = 0
+
+    def accumulate(self, x):
+        with torch.no_grad():
+            self.total += x
+            self.num_values += 1
+
+    def mean(self):
+        return self.total / self.num_values
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index 80896005f33dd..4b2ccd09a6b60 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -33,6 +33,7 @@
 from pytorch_lightning.trainer.lr_finder import TrainerLRFinderMixin
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities import rank_zero_warn, parsing, rank_zero_info, rank_zero_only
+from pytorch_lightning.utilities.debugging import InternalDebugger
 import warnings
 
 # warnings to ignore in trainer
@@ -616,12 +617,7 @@ def __init__(
 
         self.on_colab_kaggle = os.getenv('COLAB_GPU') or os.getenv('KAGGLE_URL_BASE')
 
-        # ---------------------------
-        # only active when debugging PL for dev purposes and tests
-        # ---------------------------
-        self.debug_logged_metrics = []
-        self.debug_pbar_added_metrics = []
-        self.debug_saved_losses = []
+        self.dev_debugger = InternalDebugger()
 
         # Callback system
         self.on_init_end()
diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py
index 2327dd6b09234..237286d7245af 100644
--- a/pytorch_lightning/trainer/training_loop.py
+++ b/pytorch_lightning/trainer/training_loop.py
@@ -160,7 +160,7 @@ def training_step(self, batch, batch_idx):
 from pytorch_lightning.callbacks import ModelCheckpoint
 from pytorch_lightning.core.lightning import LightningModule
 from pytorch_lightning.loggers import LightningLoggerBase
-from pytorch_lightning.trainer.supporters import TensorRunningAccum
+from pytorch_lightning.trainer.supporters import TensorRunningAccum, Accumulator
 from pytorch_lightning.utilities import rank_zero_warn, NATIVE_AMP_AVALAIBLE
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.parsing import AttributeDict
@@ -448,6 +448,10 @@ def run_training_epoch(self):
         epoch_output = []
         should_check_val = False
 
+        # structured result accumulators for callbacks
+        early_stopping_accumulator = Accumulator()
+        checkpoint_accumulator = Accumulator()
+
         # run epoch
         for batch_idx, (batch, is_last_batch) in self.profiler.profile_iterable(
                 enumerate(_with_is_last(train_dataloader)), "get_train_batch"
@@ -468,6 +472,12 @@ def run_training_epoch(self):
             # otherwise we will build up unnecessary memory
             step_out = batch_output.training_step_output_for_epoch_end
             should_auto_reduce_train_result = isinstance(step_out, Result) and step_out.should_reduce_on_epoch_end
+            if 'early_stop_on' in step_out:
+                early_stopping_accumulator.accumulate(step_out['early_stop_on'])
+
+            if 'checkpoint_on' in step_out:
+                checkpoint_accumulator.accumulate(step_out['checkpoint_on'])
+
             if self.is_overridden('training_epoch_end', model=self.get_model()) or should_auto_reduce_train_result:
                 epoch_output.append(batch_output.training_step_output_for_epoch_end)
 
@@ -511,7 +521,7 @@ def run_training_epoch(self):
         self.sync_horovod()
 
         # process epoch outputs
-        self.run_training_epoch_end(epoch_output)
+        self.run_training_epoch_end(epoch_output, checkpoint_accumulator, early_stopping_accumulator)
 
         # checkpoint callback
         self.check_checkpoint_callback(should_check_val)
@@ -548,7 +558,7 @@ def run_on_epoch_end_hook(self, model):
             if self.is_function_implemented('on_train_epoch_end'):
                 model.on_train_epoch_end()
 
-    def run_training_epoch_end(self, epoch_output):
+    def run_training_epoch_end(self, epoch_output, checkpoint_accumulator, early_stopping_accumulator):
         model = self.get_model()
         is_result_obj = len(epoch_output) > 0 and isinstance(epoch_output[0], Result)
 
@@ -556,6 +566,15 @@ def run_training_epoch_end(self, epoch_output):
         epoch_callback_metrics = {}
         epoch_progress_bar_metrics = {}
 
+        # -----------------------
+        # Calculate epoch callback values if given
+        # -----------------------
+        if checkpoint_accumulator.num_values > 0:
+            epoch_callback_metrics['checkpoint_on'] = checkpoint_accumulator.mean()
+
+        if early_stopping_accumulator.num_values > 0:
+            epoch_callback_metrics['early_stop_on'] = early_stopping_accumulator.mean()
+
         # --------------------------
         # EPOCH END STEP IF DEFINED
         # --------------------------
@@ -565,26 +584,13 @@ def run_training_epoch_end(self, epoch_output):
             # remove the protected keys so the user doesn't have to deal with them
             if is_result_obj:
                 epoch_output = epoch_output[0].__class__.gather(epoch_output)
-                minimize = epoch_output.minimize
-                early_stop_on = epoch_output.early_stop_on
-                checkpoint_on = epoch_output.checkpoint_on
-                del epoch_output['minimize']
-                del epoch_output['early_stop_on']
-                del epoch_output['checkpoint_on']
 
             # run training_epoch_end
             epoch_output = model.training_epoch_end(epoch_output)
 
-            # with a result we put back the main metrics and compute means
-            if isinstance(epoch_output, Result):
-                epoch_output.minimize = minimize.mean()
-                epoch_output.early_stop_on = early_stop_on.mean()
-                epoch_output.checkpoint_on = checkpoint_on.mean()
-
             if isinstance(epoch_output, Result):
                 epoch_log_metrics = epoch_output.epoch_log_metrics
                 epoch_progress_bar_metrics = epoch_output.epoch_pbar_metrics
-                epoch_callback_metrics = epoch_output.callback_metrics
             else:
                 _processed_outputs = self.process_output(epoch_output)
                 epoch_progress_bar_metrics = _processed_outputs[1]
@@ -597,11 +603,8 @@ def run_training_epoch_end(self, epoch_output):
         elif is_result_obj:
             epoch_output = epoch_output[0].__class__.reduce_on_epoch_end(epoch_output)
             epoch_output.minimize = epoch_output.minimize.mean()
-            epoch_output.early_stop_on = epoch_output.early_stop_on.mean()
-            epoch_output.checkpoint_on = epoch_output.checkpoint_on.mean()
             epoch_log_metrics = epoch_output.epoch_log_metrics
             epoch_progress_bar_metrics = epoch_output.epoch_pbar_metrics
-            epoch_callback_metrics = epoch_output.callback_metrics
 
         # --------------------------
         # track results
@@ -663,6 +666,8 @@ def run_training_batch(self, batch, batch_idx):
         # track metrics to log
         batch_log_metrics = []
 
+        using_results_obj = False
+
         if batch is None:
             return AttributeDict(signal=0, grad_norm_dic=grad_norm_dic)
 
@@ -706,7 +711,7 @@ def run_training_batch(self, batch, batch_idx):
                     optimizer,
                     self.hiddens
                 )
-                is_result_obj = isinstance(opt_closure_result.training_step_output, Result)
+                using_results_obj = isinstance(opt_closure_result.training_step_output, Result)
 
                 # ------------------------------
                 # POST forward bookkeeping
@@ -714,14 +719,14 @@ def run_training_batch(self, batch, batch_idx):
                 batch_callback_metrics.append(opt_closure_result.training_step_output.callback_metrics)
 
                 # add metrics to loggers
-                if is_result_obj:
+                if using_results_obj:
                     metrics_to_log = opt_closure_result.training_step_output.batch_log_metrics
                 else:
                     metrics_to_log = opt_closure_result.training_step_output.log_metrics
                 batch_log_metrics.append(metrics_to_log)
 
                 # add metrics to progress bar
-                if is_result_obj:
+                if using_results_obj:
                     metrics_for_pbar = opt_closure_result.training_step_output.batch_pbar_metrics
                 else:
                     metrics_for_pbar = opt_closure_result.training_step_output.pbar_on_batch_end
@@ -764,7 +769,8 @@ def run_training_batch(self, batch, batch_idx):
         batch_log_metrics = {k: v for d in batch_log_metrics for k, v in d.items()}
 
         # track all metrics for callbacks
-        self.callback_metrics.update({k: v for d in batch_callback_metrics for k, v in d.items()})
+        if not using_results_obj:
+            self.callback_metrics.update({k: v for d in batch_callback_metrics for k, v in d.items()})
 
         result = AttributeDict(
             signal=0,
diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index b6fb56f5911c2..86a7bafc54288 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -46,8 +46,8 @@ def test_training_step_result_log_step_only(tmpdir):
     assert not model.training_epoch_end_called
 
     # make sure correct metrics are logged (one per batch step as requested)
-    assert len(trainer.debug_logged_metrics) == batches
-    for batch_idx, logged_metrics in enumerate(trainer.debug_logged_metrics):
+    assert len(trainer.dev_debugger.logged_metrics) == batches
+    for batch_idx, logged_metrics in enumerate(trainer.dev_debugger.logged_metrics):
         assert logged_metrics[f'step_log_and_pbar_acc1_b{batch_idx}'] == 11.0
         assert logged_metrics[f'step_log_acc2_b{batch_idx}'] == 12.0
         assert f'step_pbar_acc3_b{batch_idx}' not in logged_metrics
@@ -118,8 +118,8 @@ def test_training_step_result_log_epoch_only(tmpdir):
     assert not model.training_epoch_end_called
 
     # make sure correct metrics are logged (one per batch step as requested)
-    assert len(trainer.debug_logged_metrics) == epochs
-    epoch_metrics = trainer.debug_logged_metrics
+    assert len(trainer.dev_debugger.logged_metrics) == epochs
+    epoch_metrics = trainer.dev_debugger.logged_metrics
     assert len(epoch_metrics) == epochs
     for batch_idx, logged_metrics in enumerate(epoch_metrics):
         assert logged_metrics[f'epoch_log_and_pbar_acc1_e{batch_idx}'] == 14.0
@@ -191,8 +191,8 @@ def test_training_step_result_log_step_and_epoch(tmpdir):
     assert not model.training_epoch_end_called
 
     # make sure correct metrics are logged (one per batch step as requested)
-    assert len(trainer.debug_logged_metrics) == (epochs * batches) + epochs
-    epoch_metrics = trainer.debug_logged_metrics
+    assert len(trainer.dev_debugger.logged_metrics) == (epochs * batches) + epochs
+    epoch_metrics = trainer.dev_debugger.logged_metrics
     epoch_idx = -1
     for i_start in range(0, len(epoch_metrics), batches + 1):
         epoch_idx += 1
@@ -232,7 +232,7 @@ def test_training_step_result_log_step_and_epoch(tmpdir):
     # VERIFY PBAR METRICS
     # -------------------------------
     # make sure pbar metrics are correct ang log metrics did not leak
-    all_pbar_metrics = trainer.debug_pbar_added_metrics
+    all_pbar_metrics = trainer.dev_debugger.pbar_added_metrics
     assert len(all_pbar_metrics) == (epochs * batches) + epochs
 
     epoch_idx = -1
@@ -316,7 +316,7 @@ def test_training_step_epoch_end_result(tmpdir):
     assert model.training_epoch_end_called
 
     # make sure correct metrics were logged
-    logged_metrics = trainer.debug_logged_metrics
+    logged_metrics = trainer.dev_debugger.logged_metrics
     assert len(logged_metrics) == (epochs * batches) + epochs
     last_logged = logged_metrics[-1]
 
@@ -327,7 +327,7 @@ def test_training_step_epoch_end_result(tmpdir):
     assert 'epoch_end_pbar_acc' not in last_logged
 
     # make sure pbar metrics are correct
-    logged_pbar = trainer.debug_pbar_added_metrics
+    logged_pbar = trainer.dev_debugger.pbar_added_metrics
     assert len(logged_pbar) == (epochs * batches) + epochs
 
     assert trainer.progress_bar_metrics['step_epoch_log_and_pbar_acc1'] == 210.0
@@ -425,7 +425,9 @@ def test_use_callbacks_with_train_loop_only(tmpdir):
     trainer.fit(model)
 
     # TODO: finish test to make sure early stopping happened when expected
+    early_stop_vals = trainer.debug_early_stopping_values
     all_losses = trainer.debug_saved_losses
+
     assert len(all_losses) == 12
 
 test_use_callbacks_with_train_loop_only('')
\ No newline at end of file

From ccd08edf81ef60d03111396e8d0c1c13bb2bd4f7 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 07:55:05 -0400
Subject: [PATCH 037/168] added hooks

---
 pytorch_lightning/callbacks/early_stopping.py     | 13 ++-----------
 pytorch_lightning/trainer/logging.py              |  4 ++--
 pytorch_lightning/trainer/trainer.py              |  3 ++-
 pytorch_lightning/trainer/training_loop.py        |  4 +---
 tests/trainer/test_trainer_steps_result_return.py |  6 +++---
 5 files changed, 10 insertions(+), 20 deletions(-)

diff --git a/pytorch_lightning/callbacks/early_stopping.py b/pytorch_lightning/callbacks/early_stopping.py
index 41a6865e61788..abebaf416fcd9 100644
--- a/pytorch_lightning/callbacks/early_stopping.py
+++ b/pytorch_lightning/callbacks/early_stopping.py
@@ -163,17 +163,8 @@ def _run_early_stopping_check(self, trainer, pl_module):
 
         current = logs.get(self.monitor)
 
-        # track values for dev debugging
-        if 'PL_DEV_DEBUG' in os.environ:
-            debug_dict = {
-                'epoch': trainer.current_epoch,
-                'global_step': trainer.global_step,
-                'rank': trainer.global_rank,
-                'current': current,
-                'best': self.best_score,
-                'patience': self.wait_count
-            }
-            trainer.debug_early_stopping_values.append(debug_dict)
+        # when in dev debugging
+        trainer.dev_debugger.track_early_stopping_history()
 
         if not isinstance(current, torch.Tensor):
             current = torch.tensor(current, device=pl_module.device)
diff --git a/pytorch_lightning/trainer/logging.py b/pytorch_lightning/trainer/logging.py
index e7c9d7636d9ae..96dd0d028be8d 100644
--- a/pytorch_lightning/trainer/logging.py
+++ b/pytorch_lightning/trainer/logging.py
@@ -74,7 +74,7 @@ def log_metrics(self, metrics, grad_norm_dic, step=None):
             self.logger.agg_and_log_metrics(scalar_metrics, step=step)
             self.logger.save()
 
-            self.dev_debugger.track_logged_metrics(scalar_metrics)
+            self.dev_debugger.track_logged_metrics_history(scalar_metrics)
 
     def add_progress_bar_metrics(self, metrics):
         for k, v in metrics.items():
@@ -83,7 +83,7 @@ def add_progress_bar_metrics(self, metrics):
 
             self.progress_bar_metrics[k] = v
 
-        self.dev_debugger.track_pbar_metrics(self, metrics)
+        self.dev_debugger.track_pbar_metrics_history(self, metrics)
 
     def metrics_to_scalars(self, metrics):
         new_metrics = {}
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index 4b2ccd09a6b60..76c7899508144 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -617,7 +617,8 @@ def __init__(
 
         self.on_colab_kaggle = os.getenv('COLAB_GPU') or os.getenv('KAGGLE_URL_BASE')
 
-        self.dev_debugger = InternalDebugger()
+        # tracks internal state for debugging
+        self.dev_debugger = InternalDebugger(self)
 
         # Callback system
         self.on_init_end()
diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py
index 237286d7245af..a0444b62ff02d 100644
--- a/pytorch_lightning/trainer/training_loop.py
+++ b/pytorch_lightning/trainer/training_loop.py
@@ -951,9 +951,7 @@ def optimizer_closure(self, split_batch, batch_idx, opt_idx, optimizer, hiddens)
                 model_ref.on_after_backward()
 
         # when in dev debugging track the losses
-        if 'PL_DEV_DEBUG' in os.environ:
-            loss_dict = {'batch_idx': batch_idx, 'epoch': self.current_epoch, 'loss': untouched_loss.detach()}
-            self.debug_saved_losses.append(loss_dict)
+        self.dev_debugger.track_train_loss_history(batch_idx, untouched_loss.detach())
 
         result = AttributeDict(
             loss=untouched_loss,
diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 86a7bafc54288..3901d22f49e61 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -385,7 +385,7 @@ def test_no_auto_callbacks_with_train_loop_only(tmpdir):
     )
     trainer.fit(model)
 
-    all_losses = trainer.debug_saved_losses
+    all_losses = trainer.dev_debugger.saved_losses
     assert len(all_losses) == batches * epochs
 
     assert trainer.checkpoint_callback.monitor == 'checkpoint_on'
@@ -425,8 +425,8 @@ def test_use_callbacks_with_train_loop_only(tmpdir):
     trainer.fit(model)
 
     # TODO: finish test to make sure early stopping happened when expected
-    early_stop_vals = trainer.debug_early_stopping_values
-    all_losses = trainer.debug_saved_losses
+    early_stop_vals = trainer.dev_debugger.early_stopping_history
+    all_losses = trainer.dev_debugger.saved_losses
 
     assert len(all_losses) == 12
 

From 804e9c83cec39b931cfdd5d76a657d23a55f2836 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 08:01:41 -0400
Subject: [PATCH 038/168] added hooks

---
 pytorch_lightning/callbacks/early_stopping.py     |  2 +-
 pytorch_lightning/trainer/logging.py              |  2 +-
 tests/trainer/test_trainer_steps_result_return.py | 11 ++++++-----
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/pytorch_lightning/callbacks/early_stopping.py b/pytorch_lightning/callbacks/early_stopping.py
index abebaf416fcd9..7035159e990ff 100644
--- a/pytorch_lightning/callbacks/early_stopping.py
+++ b/pytorch_lightning/callbacks/early_stopping.py
@@ -164,7 +164,7 @@ def _run_early_stopping_check(self, trainer, pl_module):
         current = logs.get(self.monitor)
 
         # when in dev debugging
-        trainer.dev_debugger.track_early_stopping_history()
+        trainer.dev_debugger.track_early_stopping_history(current)
 
         if not isinstance(current, torch.Tensor):
             current = torch.tensor(current, device=pl_module.device)
diff --git a/pytorch_lightning/trainer/logging.py b/pytorch_lightning/trainer/logging.py
index 96dd0d028be8d..3baed4ef9d81d 100644
--- a/pytorch_lightning/trainer/logging.py
+++ b/pytorch_lightning/trainer/logging.py
@@ -83,7 +83,7 @@ def add_progress_bar_metrics(self, metrics):
 
             self.progress_bar_metrics[k] = v
 
-        self.dev_debugger.track_pbar_metrics_history(self, metrics)
+        self.dev_debugger.track_pbar_metrics_history(metrics)
 
     def metrics_to_scalars(self, metrics):
         new_metrics = {}
diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 3901d22f49e61..2469a6f450b55 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -54,7 +54,6 @@ def test_training_step_result_log_step_only(tmpdir):
         assert len(logged_metrics) == 3
 
     # make sure we are using the correct metrics for callbacks
-    assert trainer.callback_metrics['early_stop_on'] == 171
     assert trainer.callback_metrics['checkpoint_on'] == 171
 
     # make sure pbar metrics are correct ang log metrics did not leak
@@ -128,7 +127,6 @@ def test_training_step_result_log_epoch_only(tmpdir):
         assert len(logged_metrics) == 3
 
     # make sure we are using the correct metrics for callbacks
-    assert trainer.callback_metrics['early_stop_on'] == 171
     assert trainer.callback_metrics['checkpoint_on'] == 171
 
     # make sure pbar metrics are correct ang log metrics did not leak
@@ -225,7 +223,6 @@ def test_training_step_result_log_step_and_epoch(tmpdir):
         assert len(logged_metrics) == 3
 
     # make sure we are using the correct metrics for callbacks
-    assert trainer.callback_metrics['early_stop_on'] == 171
     assert trainer.callback_metrics['checkpoint_on'] == 171
 
     # -------------------------------
@@ -338,7 +335,6 @@ def test_training_step_epoch_end_result(tmpdir):
     assert 'log_acc2' not in trainer.progress_bar_metrics
 
     # make sure callback metrics didn't change
-    assert trainer.callback_metrics['early_stop_on'] == 171
     assert trainer.callback_metrics['checkpoint_on'] == 171
 
     # -----------------------------------------
@@ -428,6 +424,11 @@ def test_use_callbacks_with_train_loop_only(tmpdir):
     early_stop_vals = trainer.dev_debugger.early_stopping_history
     all_losses = trainer.dev_debugger.saved_losses
 
-    assert len(all_losses) == 12
+    # assert len(all_losses) == 12
 
+test_training_step_result_log_step_only('')
+test_training_step_result_log_epoch_only('')
+test_training_step_result_log_step_and_epoch('')
+test_training_step_epoch_end_result('')
+test_no_auto_callbacks_with_train_loop_only('')
 test_use_callbacks_with_train_loop_only('')
\ No newline at end of file

From 2736c7062fc7c3192f952ff149570f8738c201a9 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 08:24:16 -0400
Subject: [PATCH 039/168] finished tests for structured results on train epoch

---
 .../callbacks/model_checkpoint.py             | 16 +++---
 pytorch_lightning/utilities/debugging.py      | 53 +++++++++++++++++++
 tests/base/deterministic_model.py             |  4 +-
 .../test_trainer_steps_result_return.py       | 32 +++++++----
 4 files changed, 88 insertions(+), 17 deletions(-)
 create mode 100644 pytorch_lightning/utilities/debugging.py

diff --git a/pytorch_lightning/callbacks/model_checkpoint.py b/pytorch_lightning/callbacks/model_checkpoint.py
index d3e8544f85217..eb81e879c7d56 100644
--- a/pytorch_lightning/callbacks/model_checkpoint.py
+++ b/pytorch_lightning/callbacks/model_checkpoint.py
@@ -159,7 +159,11 @@ def _del_model(self, filepath):
         if os.path.isfile(filepath):
             os.remove(filepath)
 
-    def _save_model(self, filepath):
+    def _save_model(self, filepath, trainer, pl_module):
+
+        # in debugging, track when we save checkpoints
+        trainer.dev_debugger.track_checkpointing_history(filepath)
+
         # make paths
         os.makedirs(os.path.dirname(filepath), exist_ok=True)
 
@@ -286,7 +290,7 @@ def on_validation_end(self, trainer, pl_module):
 
         if self.save_last:
             filepath = os.path.join(self.dirpath, self.prefix + 'last.ckpt')
-            self._save_model(filepath)
+            self._save_model(filepath, trainer, pl_module)
 
         filepath = self.format_checkpoint_name(epoch, metrics)
         version_cnt = 0
@@ -311,7 +315,7 @@ def on_validation_end(self, trainer, pl_module):
                     f'Can save best model only with {self.monitor} available, skipping.', RuntimeWarning
                 )
             elif self.check_monitor_top_k(current):
-                self._do_check_save(filepath, current, epoch)
+                self._do_check_save(filepath, current, epoch, trainer, pl_module)
             elif self.verbose > 0:
                 log.info(f'\nEpoch {epoch:05d}: {self.monitor}  was not in top {self.save_top_k}')
 
@@ -320,9 +324,9 @@ def on_validation_end(self, trainer, pl_module):
                 log.info(f'\nEpoch {epoch:05d}: saving model to {filepath}')
 
             assert trainer.global_rank == 0, 'tried to make a checkpoint from non global_rank=0'
-            self._save_model(filepath)
+            self._save_model(filepath, trainer, pl_module)
 
-    def _do_check_save(self, filepath, current, epoch):
+    def _do_check_save(self, filepath, current, epoch, trainer, pl_module):
         # remove kth
 
         del_list = []
@@ -348,7 +352,7 @@ def _do_check_save(self, filepath, current, epoch):
                 f'\nEpoch {epoch:05d}: {self.monitor} reached'
                 f' {current:0.5f} (best {self.best_model_score:0.5f}), saving model to'
                 f' {filepath} as top {self.save_top_k}')
-        self._save_model(filepath)
+        self._save_model(filepath, trainer, pl_module)
 
         for cur_path in del_list:
             if cur_path != filepath:
diff --git a/pytorch_lightning/utilities/debugging.py b/pytorch_lightning/utilities/debugging.py
new file mode 100644
index 0000000000000..d8a7722fd8884
--- /dev/null
+++ b/pytorch_lightning/utilities/debugging.py
@@ -0,0 +1,53 @@
+import os
+
+
+class InternalDebugger(object):
+
+    def __init__(self, trainer):
+        
+        self.enabled = 'PL_DEV_DEBUG' in os.environ
+        self.trainer = trainer
+        self.logged_metrics = []
+        self.pbar_added_metrics = []
+        self.saved_losses = []
+        self.early_stopping_history = []
+        self.checkpoint_callback_history = []
+
+    def track_logged_metrics_history(self, scalar_metrics):
+        if self.enabled:
+            self.logged_metrics.append(scalar_metrics)
+
+    def track_train_loss_history(self, batch_idx, loss):
+        if self.enabled:
+            loss_dict = {'batch_idx': batch_idx, 'epoch': self.trainer.current_epoch, 'loss': loss.detach()}
+            self.saved_losses.append(loss_dict)
+
+    def track_pbar_metrics_history(self, metrics):
+        if self.enabled:
+            metrics['debug_epoch'] = self.trainer.current_epoch
+            self.pbar_added_metrics.append(metrics)
+
+    def track_early_stopping_history(self, current):
+        if self.enabled:
+            es = self.trainer.early_stop_callback
+            debug_dict = {
+                'epoch': self.trainer.current_epoch,
+                'global_step': self.trainer.global_step,
+                'rank': self.trainer.global_rank,
+                'current': current,
+                'best': es.best_score,
+                'patience': es.wait_count
+            }
+            self.early_stopping_history.append(debug_dict)
+
+    def track_checkpointing_history(self, filepath):
+        if self.enabled:
+            cb = self.trainer.checkpoint_callback
+            debug_dict = {
+                'epoch': self.trainer.current_epoch,
+                'global_step': self.trainer.global_step,
+                'monitor': cb.monitor,
+                'rank': self.trainer.global_rank,
+                'filepath': filepath
+            }
+            self.checkpoint_callback_history.append(debug_dict)
\ No newline at end of file
diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index 46c0c31039793..4da5da9c977f7 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -120,8 +120,8 @@ def training_step_result_log_epoch_and_step_for_callbacks(self, batch, batch_idx
         acc = self.step(batch, batch_idx)
 
         self.assert_backward = False
-        losses = [20, 19, 18, 10, 15, 14, 9, 11, 11, 20, 22, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30]
-        idx = batch_idx + (self.current_epoch * 3)
+        losses = [20, 19, 18, 10, 15, 14, 9, 11, 11, 20]
+        idx = self.current_epoch
         loss = acc + losses[idx]
         result = TrainResult(minimize=loss, early_stop_on=loss, checkpoint_on=loss)
         return result
diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 2469a6f450b55..c8459f47e2c25 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -420,15 +420,29 @@ def test_use_callbacks_with_train_loop_only(tmpdir):
     )
     trainer.fit(model)
 
-    # TODO: finish test to make sure early stopping happened when expected
+    num_expected_epochs = 10
+
+    # ----------------------------------
+    # VERIFY EARLY STOPPING BEHAVIOR
+    # ----------------------------------
+    # with train loop only it happens on every epoch
     early_stop_vals = trainer.dev_debugger.early_stopping_history
+    assert len(early_stop_vals) == num_expected_epochs
+    min_val = min([x['best'] for x in early_stop_vals])
+    assert min_val == 171 + 9
     all_losses = trainer.dev_debugger.saved_losses
 
-    # assert len(all_losses) == 12
-
-test_training_step_result_log_step_only('')
-test_training_step_result_log_epoch_only('')
-test_training_step_result_log_step_and_epoch('')
-test_training_step_epoch_end_result('')
-test_no_auto_callbacks_with_train_loop_only('')
-test_use_callbacks_with_train_loop_only('')
\ No newline at end of file
+    from collections import Counter
+    batch_idxs = Counter([x['batch_idx'] for x in all_losses])
+    for i, val in batch_idxs.items():
+        assert val == num_expected_epochs
+        assert i in [0, 1, 2]
+
+    # ----------------------------------
+    # VERIFY CHECKPOINTING BEHAVIOR
+    # ----------------------------------
+    ckpt_vals = trainer.dev_debugger.checkpoint_callback_history
+    assert len(ckpt_vals) == 5, '5 ckpts should have been saved'
+    for ckpt_val, expected_epoch in zip(ckpt_vals, [0, 1, 2, 3, 6]):
+        assert ckpt_val['epoch'] == expected_epoch
+        assert ckpt_val['monitor'] == 'checkpoint_on'

From e09bcfc5f735a8b4e792162a52ca494119488605 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 08:40:58 -0400
Subject: [PATCH 040/168] finished tests for structured results on train epoch

---
 pytorch_lightning/core/step_result.py         | 12 +--
 tests/base/deterministic_model.py             | 10 +++
 .../test_trainer_steps_result_return.py       | 81 ++++++++++++++++++-
 3 files changed, 95 insertions(+), 8 deletions(-)

diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
index 117e1ea45a325..abbe5e42676a4 100644
--- a/pytorch_lightning/core/step_result.py
+++ b/pytorch_lightning/core/step_result.py
@@ -1,4 +1,4 @@
-from typing import Optional, Dict
+from typing import Optional, Dict, Union
 from torch import Tensor
 import torch
 from copy import copy
@@ -10,7 +10,7 @@ def __init__(
             self,
             minimize: Optional[Tensor] = None,
             early_stop_on: Tensor = None,
-            checkpoint_on: Tensor = None,
+            checkpoint_on: Union[Tensor, bool] = None,
             hiddens: Optional[Tensor] = None
     ):
 
@@ -18,7 +18,7 @@ def __init__(
 
         if early_stop_on is not None:
             self.early_stop_on = early_stop_on
-        if checkpoint_on is not None:
+        if checkpoint_on is not None and checkpoint_on:
             self.checkpoint_on = checkpoint_on
         if hiddens is not None:
             self.hiddens = hiddens
@@ -57,7 +57,7 @@ def __setattr__(self, key, val):
         # ensure reserve keys are tensors and detached
         if key in {'hiddens', 'checkpoint_on', 'early_stop_on'}:
             self._assert_tensor_metric(key, val)
-            if val is not None:
+            if val is not None and isinstance(val, torch.Tensor):
                 val = val.detach()
 
         # ensure anything else that is a tensor is detached
@@ -67,7 +67,7 @@ def __setattr__(self, key, val):
         self[key] = val
 
     def _assert_tensor_metric(self, name, x):
-        if x is not None:
+        if x is not None and not isinstance(x, bool):
             assert isinstance(x, Tensor), f'{name} must be a torch.Tensor'
 
     def _assert_grad_tensor_metric(self, name, x, additional_err: str = None):
@@ -269,7 +269,7 @@ def __init__(
             self,
             minimize: Optional[Tensor] = None,
             early_stop_on: Tensor = None,
-            checkpoint_on: Tensor = None,
+            checkpoint_on: Union[Tensor, bool] = None,
             hiddens: Optional[Tensor] = None
     ):
 
diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index 4da5da9c977f7..4e46dd1dd39d5 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -113,6 +113,16 @@ def training_step_no_default_callbacks_for_train_loop(self, batch, batch_idx):
         assert 'checkpoint_on' in result
         return result
 
+    def training_step_no_callbacks_result_obj(self, batch, batch_idx):
+        """
+        Early stop and checkpoint only on these values
+        """
+        acc = self.step(batch, batch_idx)
+        result = TrainResult(minimize=acc, checkpoint_on=False)
+        assert 'early_step_on' not in result
+        assert 'checkpoint_on' not in result
+        return result
+
     def training_step_result_log_epoch_and_step_for_callbacks(self, batch, batch_idx):
         """
         Early stop and checkpoint only on these values
diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index c8459f47e2c25..1b5fabcc980d3 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -8,8 +8,6 @@
 from pytorch_lightning.core.step_result import Result, TrainResult, EvalResult
 
 
-# TODOs:
-# make checkpoint and early stopping use the correct metrics
 # test with train_step_end
 # add logging + row interval tests
 
@@ -400,6 +398,37 @@ def test_no_auto_callbacks_with_train_loop_only(tmpdir):
     assert trainer.early_stop_callback.monitor == 'val_loss'
 
 
+def test_no_callbacks_with_train_loop_only(tmpdir):
+    """
+    Make sure early stop + checkpoint work with only a train loop
+    """
+    os.environ['PL_DEV_DEBUG'] = '1'
+
+    model = DeterministicModel()
+    model.training_step = model.training_step_no_callbacks_result_obj
+    model.training_epoch_end = None
+    model.val_dataloader = None
+
+    batches = 3
+    epochs = 3
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_epochs=epochs,
+        row_log_interval=1,
+        limit_train_batches=batches,
+        weights_summary=None,
+    )
+    trainer.fit(model)
+
+    all_losses = trainer.dev_debugger.saved_losses
+    assert len(all_losses) == batches * epochs
+
+    assert trainer.early_stop_callback is None
+
+    assert len(trainer.dev_debugger.checkpoint_callback_history) == 0
+    assert len(trainer.dev_debugger.early_stopping_history) == 0
+
+
 def test_use_callbacks_with_train_loop_only(tmpdir):
     os.environ['PL_DEV_DEBUG'] = '1'
 
@@ -446,3 +475,51 @@ def test_use_callbacks_with_train_loop_only(tmpdir):
     for ckpt_val, expected_epoch in zip(ckpt_vals, [0, 1, 2, 3, 6]):
         assert ckpt_val['epoch'] == expected_epoch
         assert ckpt_val['monitor'] == 'checkpoint_on'
+
+
+def test_full_train_loop_with_results_obj(tmpdir):
+    os.environ['PL_DEV_DEBUG'] = '1'
+
+    model = DeterministicModel()
+    model.training_step = model.training_step_result_log_epoch_and_step_for_callbacks
+    model.training_epoch_end = None
+    model.val_dataloader = None
+
+    batches = 3
+    epochs = 300
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_epochs=epochs,
+        early_stop_callback=True,
+        row_log_interval=1,
+        limit_train_batches=batches,
+        weights_summary=None,
+    )
+    trainer.fit(model)
+
+    num_expected_epochs = 10
+
+    # ----------------------------------
+    # VERIFY EARLY STOPPING BEHAVIOR
+    # ----------------------------------
+    # with train loop only it happens on every epoch
+    early_stop_vals = trainer.dev_debugger.early_stopping_history
+    assert len(early_stop_vals) == num_expected_epochs
+    min_val = min([x['best'] for x in early_stop_vals])
+    assert min_val == 171 + 9
+    all_losses = trainer.dev_debugger.saved_losses
+
+    from collections import Counter
+    batch_idxs = Counter([x['batch_idx'] for x in all_losses])
+    for i, val in batch_idxs.items():
+        assert val == num_expected_epochs
+        assert i in [0, 1, 2]
+
+    # ----------------------------------
+    # VERIFY CHECKPOINTING BEHAVIOR
+    # ----------------------------------
+    ckpt_vals = trainer.dev_debugger.checkpoint_callback_history
+    assert len(ckpt_vals) == 5, '5 ckpts should have been saved'
+    for ckpt_val, expected_epoch in zip(ckpt_vals, [0, 1, 2, 3, 6]):
+        assert ckpt_val['epoch'] == expected_epoch
+        assert ckpt_val['monitor'] == 'checkpoint_on'

From 55eb02c28f35f19f70e7def5e948eaf27a2cea01 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 08:56:49 -0400
Subject: [PATCH 041/168] finished tests for structured results on train epoch

---
 pytorch_lightning/core/step_result.py         | 20 +++++++++-----
 pytorch_lightning/trainer/training_loop.py    |  1 +
 tests/base/deterministic_model.py             | 26 +++++++++++++++++++
 .../test_trainer_steps_result_return.py       | 12 ++++++---
 4 files changed, 48 insertions(+), 11 deletions(-)

diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
index abbe5e42676a4..f3ca6b970cd22 100644
--- a/pytorch_lightning/core/step_result.py
+++ b/pytorch_lightning/core/step_result.py
@@ -87,10 +87,16 @@ def log(
             logger=True,
             on_step=False,
             on_epoch=True,
-            reduce_fx=torch.mean
+            reduce_fx=torch.mean,
+            enable_graph=False,
     ):
+        # no metrics should be logged with graphs
+        if not enable_graph and isinstance(value, torch.Tensor):
+            value = value.detach()
+
         if 'meta' not in self:
             self.__setitem__('meta', {})
+
         self.__set_meta(name, value, prog_bar, logger, on_step, on_epoch, reduce_fx)
 
         # set the value
@@ -99,8 +105,6 @@ def log(
     def __set_meta(self, name, value, prog_bar, logger, on_step, on_epoch, reduce_fx):
         # set the meta for the item
         meta_value = value
-        if isinstance(meta_value, torch.Tensor):
-            meta_value = meta_value.detach()
         meta = dict(
             prog_bar=prog_bar,
             logger=logger,
@@ -283,9 +287,10 @@ def log(
             logger=True,
             on_step=True,
             on_epoch=False,
-            reduce_fx=torch.mean
+            reduce_fx=torch.mean,
+            enable_graph=False,
     ):
-        super().log(name, value, prog_bar, logger, on_step, on_epoch, reduce_fx)
+        super().log(name, value, prog_bar, logger, on_step, on_epoch, reduce_fx, enable_graph)
 
 
 class EvalResult(Result):
@@ -307,9 +312,10 @@ def log(
             logger=True,
             on_step=False,
             on_epoch=True,
-            reduce_fx=torch.mean
+            reduce_fx=torch.mean,
+            enable_graph=False,
     ):
-        super().log(name, value, prog_bar, logger, on_step, on_epoch, reduce_fx)
+        super().log(name, value, prog_bar, logger, on_step, on_epoch, reduce_fx, enable_graph)
 
 
 if __name__ == '__main__':
diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py
index a0444b62ff02d..bca2ec8dbb943 100644
--- a/pytorch_lightning/trainer/training_loop.py
+++ b/pytorch_lightning/trainer/training_loop.py
@@ -1075,6 +1075,7 @@ def training_forward(self, batch, batch_idx, opt_idx, hiddens):
         if self.is_overridden('training_step_end'):
             model_ref = self.get_model()
             with self.profiler.profile('training_step_end'):
+                # TODO: modify when using result obj
                 output = model_ref.training_step_end(output)
 
         # allow any mode to define training_end
diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index 4e46dd1dd39d5..f7164af3fce4e 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -103,6 +103,32 @@ def training_epoch_end_scalar(self, outputs):
     # --------------------------
     # Result returns
     # --------------------------
+    def training_step_full_loop_result_obj(self, batch, batch_idx):
+        """
+        Full loop flow train step
+        """
+        acc = self.step(batch, batch_idx)
+        result = TrainResult(minimize=acc)
+        result.log('train_step_acc1', acc + 1)
+        self.training_step_called = True
+        return result
+
+    def training_step_end_full_loop_result_obj_dp(self, result):
+        """
+        Full loop flow train step
+        """
+        result.log('train_step_end_acc1', 1)
+        self.training_step_end_called = True
+        return result
+
+    def training_epoch_end_full_loop_result_obj(self, result):
+        """
+        Full loop flow train step
+        """
+        result.log('train_epoch_end_acc1', 1)
+        self.training_epoch_end_called = True
+        return result
+
     def training_step_no_default_callbacks_for_train_loop(self, batch, batch_idx):
         """
         Early stop and checkpoint only on these values
diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 1b5fabcc980d3..9921c8907d731 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -477,18 +477,20 @@ def test_use_callbacks_with_train_loop_only(tmpdir):
         assert ckpt_val['monitor'] == 'checkpoint_on'
 
 
-def test_full_train_loop_with_results_obj(tmpdir):
+def test_full_train_loop_with_results_obj_dp(tmpdir):
     os.environ['PL_DEV_DEBUG'] = '1'
 
     model = DeterministicModel()
-    model.training_step = model.training_step_result_log_epoch_and_step_for_callbacks
-    model.training_epoch_end = None
+    model.training_step = model.training_step_full_loop_result_obj
+    model.training_step_end = model.training_step_end_full_loop_result_obj_dp
+    model.training_epoch_end = model.training_epoch_end_full_loop_result_obj
     model.val_dataloader = None
 
     batches = 3
-    epochs = 300
+    epochs = 3
     trainer = Trainer(
         default_root_dir=tmpdir,
+        distributed_backend='dp',
         max_epochs=epochs,
         early_stop_callback=True,
         row_log_interval=1,
@@ -523,3 +525,5 @@ def test_full_train_loop_with_results_obj(tmpdir):
     for ckpt_val, expected_epoch in zip(ckpt_vals, [0, 1, 2, 3, 6]):
         assert ckpt_val['epoch'] == expected_epoch
         assert ckpt_val['monitor'] == 'checkpoint_on'
+
+test_full_train_loop_with_results_obj_dp('')
\ No newline at end of file

From b13d62b33c909a9458770a07a73a87e3b2c47b81 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 08:58:42 -0400
Subject: [PATCH 042/168] finished tests for structured results on train epoch

---
 tests/base/deterministic_model.py                 | 1 +
 tests/trainer/test_trainer_steps_result_return.py | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index f7164af3fce4e..2b9de7329dfbe 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -117,6 +117,7 @@ def training_step_end_full_loop_result_obj_dp(self, result):
         """
         Full loop flow train step
         """
+        import pdb; pdb.set_trace()
         result.log('train_step_end_acc1', 1)
         self.training_step_end_called = True
         return result
diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 9921c8907d731..e7859dca7f937 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -497,6 +497,8 @@ def test_full_train_loop_with_results_obj_dp(tmpdir):
         limit_train_batches=batches,
         weights_summary=None,
     )
+
+    import pdb; pdb.set_trace()
     trainer.fit(model)
 
     num_expected_epochs = 10

From 70061387777b77e6041d9a20c92b2ed7390fd4cf Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:05:17 -0400
Subject: [PATCH 043/168] finished tests for structured results on train epoch

---
 tests/base/deterministic_model.py                 | 1 +
 tests/trainer/test_trainer_steps_result_return.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index 2b9de7329dfbe..ea0bb2c1d2790 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -42,6 +42,7 @@ def step(self, batch, batch_idx):
         assert torch.all(test_hat[:, 0] == 15.0)
         assert torch.all(test_hat[:, 1] == 42.0)
         out = y_hat.sum()
+        import pdb; pdb.set_trace()
         assert out == (42.0 * 3) + (15.0 * 3)
 
         return out
diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index e7859dca7f937..27e10f7d59210 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -491,6 +491,7 @@ def test_full_train_loop_with_results_obj_dp(tmpdir):
     trainer = Trainer(
         default_root_dir=tmpdir,
         distributed_backend='dp',
+        gpus=2,
         max_epochs=epochs,
         early_stop_callback=True,
         row_log_interval=1,

From 715a634a08ef3d2aeab40892bda8f3ab41265f3d Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:07:15 -0400
Subject: [PATCH 044/168] finished tests for structured results on train epoch

---
 tests/base/deterministic_model.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index ea0bb2c1d2790..76f4eeb6f7142 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -6,6 +6,23 @@
 
 from pytorch_lightning.core.lightning import LightningModule
 
+import sys
+import pdb
+
+
+class ForkedPdb(pdb.Pdb):
+    """A Pdb subclass that may be used
+    from a forked multiprocessing child
+    """
+
+    def interaction(self, *args, **kwargs):
+        _stdin = sys.stdin
+        try:
+            sys.stdin = open('/dev/stdin')
+            pdb.Pdb.interaction(self, *args, **kwargs)
+        finally:
+            sys.stdin = _stdin
+
 
 class DeterministicModel(LightningModule):
 
@@ -42,7 +59,7 @@ def step(self, batch, batch_idx):
         assert torch.all(test_hat[:, 0] == 15.0)
         assert torch.all(test_hat[:, 1] == 42.0)
         out = y_hat.sum()
-        import pdb; pdb.set_trace()
+        ForkedPdb().set_trace()
         assert out == (42.0 * 3) + (15.0 * 3)
 
         return out
@@ -118,7 +135,7 @@ def training_step_end_full_loop_result_obj_dp(self, result):
         """
         Full loop flow train step
         """
-        import pdb; pdb.set_trace()
+        ForkedPdb().set_trace()
         result.log('train_step_end_acc1', 1)
         self.training_step_end_called = True
         return result

From c26a92ec2f7e87bf8374081188f436ba3d424b41 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:08:22 -0400
Subject: [PATCH 045/168] finished tests for structured results on train epoch

---
 tests/base/deterministic_model.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index 76f4eeb6f7142..36cf1aef72ef0 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -60,6 +60,7 @@ def step(self, batch, batch_idx):
         assert torch.all(test_hat[:, 1] == 42.0)
         out = y_hat.sum()
         ForkedPdb().set_trace()
+        print(out)
         assert out == (42.0 * 3) + (15.0 * 3)
 
         return out

From e59b04cbc2f0415e0ac0c3738d1e55348bbacebc Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:10:50 -0400
Subject: [PATCH 046/168] finished tests for structured results on train epoch

---
 tests/base/deterministic_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index 36cf1aef72ef0..281414874e739 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -55,11 +55,11 @@ def step(self, batch, batch_idx):
         x = batch
         y_hat = self(x)
 
+        ForkedPdb().set_trace()
         test_hat = y_hat.cpu().detach()
         assert torch.all(test_hat[:, 0] == 15.0)
         assert torch.all(test_hat[:, 1] == 42.0)
         out = y_hat.sum()
-        ForkedPdb().set_trace()
         print(out)
         assert out == (42.0 * 3) + (15.0 * 3)
 

From a90a719d84109e46a710c201b6a78231ba10e47c Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:12:01 -0400
Subject: [PATCH 047/168] finished tests for structured results on train epoch

---
 tests/base/deterministic_model.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index 281414874e739..232d58871a55f 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -55,7 +55,6 @@ def step(self, batch, batch_idx):
         x = batch
         y_hat = self(x)
 
-        ForkedPdb().set_trace()
         test_hat = y_hat.cpu().detach()
         assert torch.all(test_hat[:, 0] == 15.0)
         assert torch.all(test_hat[:, 1] == 42.0)

From 43b372454e2678556c4e7016837d8c1ba6ab83ec Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:13:27 -0400
Subject: [PATCH 048/168] finished tests for structured results on train epoch

---
 tests/base/deterministic_model.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index 232d58871a55f..b433750342679 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -53,6 +53,7 @@ def forward(self, x):
 
     def step(self, batch, batch_idx):
         x = batch
+        bs = x.size(0)
         y_hat = self(x)
 
         test_hat = y_hat.cpu().detach()
@@ -60,7 +61,7 @@ def step(self, batch, batch_idx):
         assert torch.all(test_hat[:, 1] == 42.0)
         out = y_hat.sum()
         print(out)
-        assert out == (42.0 * 3) + (15.0 * 3)
+        assert out == (42.0 * bs) + (15.0 * bs)
 
         return out
 

From a45b8088de1fb85cc8a6144648f9f69b52f8e7ea Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:15:13 -0400
Subject: [PATCH 049/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py |  1 +
 tests/base/deterministic_model.py            | 18 ------------------
 2 files changed, 1 insertion(+), 18 deletions(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index f2a23b188e068..bb147d385ce54 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -63,6 +63,7 @@ def forward(self, *inputs, **kwargs):
 
         replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
         outputs = self.parallel_apply(replicas, inputs, kwargs)
+        import pdb; pdb.set_trace()
         return self.gather(outputs, self.output_device)
 
     def parallel_apply(self, replicas, inputs, kwargs):
diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index b433750342679..e8caaf9c92be2 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -6,23 +6,6 @@
 
 from pytorch_lightning.core.lightning import LightningModule
 
-import sys
-import pdb
-
-
-class ForkedPdb(pdb.Pdb):
-    """A Pdb subclass that may be used
-    from a forked multiprocessing child
-    """
-
-    def interaction(self, *args, **kwargs):
-        _stdin = sys.stdin
-        try:
-            sys.stdin = open('/dev/stdin')
-            pdb.Pdb.interaction(self, *args, **kwargs)
-        finally:
-            sys.stdin = _stdin
-
 
 class DeterministicModel(LightningModule):
 
@@ -60,7 +43,6 @@ def step(self, batch, batch_idx):
         assert torch.all(test_hat[:, 0] == 15.0)
         assert torch.all(test_hat[:, 1] == 42.0)
         out = y_hat.sum()
-        print(out)
         assert out == (42.0 * bs) + (15.0 * bs)
 
         return out

From c50c74e631bd61aaa1cd8812095346b6230f26d4 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:17:38 -0400
Subject: [PATCH 050/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index bb147d385ce54..d1a8a36590eac 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -63,6 +63,7 @@ def forward(self, *inputs, **kwargs):
 
         replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
         outputs = self.parallel_apply(replicas, inputs, kwargs)
+        outputs = [{'a': x['minimize']} for x in outputs]
         import pdb; pdb.set_trace()
         return self.gather(outputs, self.output_device)
 

From 758b5d86d76707da32d94547b6eb6f918e310c10 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:22:58 -0400
Subject: [PATCH 051/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 34 ++++++++++++++++++--
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index d1a8a36590eac..0176c67265904 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -6,6 +6,7 @@
 from torch.cuda._utils import _get_device_index
 from torch.nn import DataParallel
 from torch.nn.parallel import DistributedDataParallel
+from pytorch_lightning.core.step_result import Result
 
 
 def _find_tensors(obj):  # pragma: no-cover
@@ -63,9 +64,36 @@ def forward(self, *inputs, **kwargs):
 
         replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
         outputs = self.parallel_apply(replicas, inputs, kwargs)
-        outputs = [{'a': x['minimize']} for x in outputs]
-        import pdb; pdb.set_trace()
-        return self.gather(outputs, self.output_device)
+
+        if isinstance(outputs[0], Result):
+            import pdb; pdb.set_trace()
+            outputs = self.__gather_structured_result(outputs)
+        else:
+            outputs = self.gather(outputs, self.output_device)
+        return outputs
+
+    def __gather_structured_result(self, outputs):
+        prototype_output = outputs[0]
+        original_class = prototype_output.__class__
+        outputs = [dict(x) for x in outputs]
+
+        # functions cannot be reduced... delete from each output and track so we can add back
+        reduce_fxs = {k: prototype_output[k] for k in prototype_output.keys() if 'reduce_fx' in k}
+        for i, output in enumerate(outputs):
+            for k in reduce_fxs.keys():
+                del output[k]
+
+        outputs = self.gather(outputs, self.output_device)
+
+        # pass minimize to constructor for TrainResult
+        if 'minimize' in outputs:
+            result = original_class(outputs['minimize'])
+        else:
+            result = original_class()
+
+        result.update(outputs)
+        result.update(reduce_fxs)
+        return result
 
     def parallel_apply(self, replicas, inputs, kwargs):
         return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])

From 98123180d3b81d3f186df79b5cf5da40ae6ec042 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:24:11 -0400
Subject: [PATCH 052/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index 0176c67265904..40276b4920586 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -66,7 +66,6 @@ def forward(self, *inputs, **kwargs):
         outputs = self.parallel_apply(replicas, inputs, kwargs)
 
         if isinstance(outputs[0], Result):
-            import pdb; pdb.set_trace()
             outputs = self.__gather_structured_result(outputs)
         else:
             outputs = self.gather(outputs, self.output_device)
@@ -83,6 +82,7 @@ def __gather_structured_result(self, outputs):
             for k in reduce_fxs.keys():
                 del output[k]
 
+        import pdb; pdb.set_trace()
         outputs = self.gather(outputs, self.output_device)
 
         # pass minimize to constructor for TrainResult

From c1af2221eea16beabb4745a59ac0548aa4ebab74 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:25:43 -0400
Subject: [PATCH 053/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index 40276b4920586..a4a59b29cda16 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -77,10 +77,9 @@ def __gather_structured_result(self, outputs):
         outputs = [dict(x) for x in outputs]
 
         # functions cannot be reduced... delete from each output and track so we can add back
-        reduce_fxs = {k: prototype_output[k] for k in prototype_output.keys() if 'reduce_fx' in k}
+        meta = outputs[0].meta
         for i, output in enumerate(outputs):
-            for k in reduce_fxs.keys():
-                del output[k]
+            del output['meta']
 
         import pdb; pdb.set_trace()
         outputs = self.gather(outputs, self.output_device)

From 07c4f42d4c3d9c68a9c71cf4981231696f8a45b5 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:26:23 -0400
Subject: [PATCH 054/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index a4a59b29cda16..492d227dbf956 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -77,7 +77,7 @@ def __gather_structured_result(self, outputs):
         outputs = [dict(x) for x in outputs]
 
         # functions cannot be reduced... delete from each output and track so we can add back
-        meta = outputs[0].meta
+        meta = outputs[0]['meta']
         for i, output in enumerate(outputs):
             del output['meta']
 

From 28f2c40f515fb309cbf51974bdc3b4d531f219ee Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:28:05 -0400
Subject: [PATCH 055/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index 492d227dbf956..282c66fd09880 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -76,14 +76,14 @@ def __gather_structured_result(self, outputs):
         original_class = prototype_output.__class__
         outputs = [dict(x) for x in outputs]
 
-        # functions cannot be reduced... delete from each output and track so we can add back
+        # remove all the meta info
         meta = outputs[0]['meta']
         for i, output in enumerate(outputs):
             del output['meta']
 
-        import pdb; pdb.set_trace()
         outputs = self.gather(outputs, self.output_device)
 
+        import pdb; pdb.set_trace()
         # pass minimize to constructor for TrainResult
         if 'minimize' in outputs:
             result = original_class(outputs['minimize'])
@@ -91,7 +91,7 @@ def __gather_structured_result(self, outputs):
             result = original_class()
 
         result.update(outputs)
-        result.update(reduce_fxs)
+        result['meta'] = meta
         return result
 
     def parallel_apply(self, replicas, inputs, kwargs):

From 15c8f55d6a32d9de91a579ac55c4ce6bceed0b3b Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:29:56 -0400
Subject: [PATCH 056/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index 282c66fd09880..e4e29517b41d1 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -83,7 +83,6 @@ def __gather_structured_result(self, outputs):
 
         outputs = self.gather(outputs, self.output_device)
 
-        import pdb; pdb.set_trace()
         # pass minimize to constructor for TrainResult
         if 'minimize' in outputs:
             result = original_class(outputs['minimize'])

From f8209b22139d4588efd2773167ab58525224cbfc Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:30:44 -0400
Subject: [PATCH 057/168] finished tests for structured results on train epoch

---
 tests/base/deterministic_model.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index e8caaf9c92be2..4070019131c08 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -3,6 +3,7 @@
 from torch import nn
 from torch.utils.data import Dataset, DataLoader
 from pytorch_lightning import TrainResult
+import pdb
 
 from pytorch_lightning.core.lightning import LightningModule
 
@@ -118,7 +119,7 @@ def training_step_end_full_loop_result_obj_dp(self, result):
         """
         Full loop flow train step
         """
-        ForkedPdb().set_trace()
+        pdb.set_trace()
         result.log('train_step_end_acc1', 1)
         self.training_step_end_called = True
         return result

From 882437e00218abf714b8ee4df8d67a6321d3a2c7 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:32:31 -0400
Subject: [PATCH 058/168] finished tests for structured results on train epoch

---
 tests/base/deterministic_model.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index 4070019131c08..a3e4e71a9b53e 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -119,9 +119,11 @@ def training_step_end_full_loop_result_obj_dp(self, result):
         """
         Full loop flow train step
         """
-        pdb.set_trace()
+        result.minimize = result.minimize.mean()
+        result.checkpoint_on = result.checkpoint_on.mean()
         result.log('train_step_end_acc1', 1)
         self.training_step_end_called = True
+        import pdb; pdb.set_trace()
         return result
 
     def training_epoch_end_full_loop_result_obj(self, result):

From 874f4a27e25d5f4e35e98065ab8a5051fa624aee Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:33:25 -0400
Subject: [PATCH 059/168] finished tests for structured results on train epoch

---
 tests/base/deterministic_model.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index a3e4e71a9b53e..1473434a58d6c 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -121,6 +121,7 @@ def training_step_end_full_loop_result_obj_dp(self, result):
         """
         result.minimize = result.minimize.mean()
         result.checkpoint_on = result.checkpoint_on.mean()
+        result.train_step_acc1 = result.train_step_acc1.mean()
         result.log('train_step_end_acc1', 1)
         self.training_step_end_called = True
         import pdb; pdb.set_trace()

From 968b17e5e55bc456d85fa181cddbbcba4b98ba89 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:34:46 -0400
Subject: [PATCH 060/168] finished tests for structured results on train epoch

---
 tests/base/deterministic_model.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index 1473434a58d6c..e6e2fb46da366 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -119,12 +119,13 @@ def training_step_end_full_loop_result_obj_dp(self, result):
         """
         Full loop flow train step
         """
+        self.assert_backward = False
+
         result.minimize = result.minimize.mean()
         result.checkpoint_on = result.checkpoint_on.mean()
         result.train_step_acc1 = result.train_step_acc1.mean()
         result.log('train_step_end_acc1', 1)
         self.training_step_end_called = True
-        import pdb; pdb.set_trace()
         return result
 
     def training_epoch_end_full_loop_result_obj(self, result):

From 176b884362906e23136abfa2b778e20360e9e144 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:35:11 -0400
Subject: [PATCH 061/168] finished tests for structured results on train epoch

---
 tests/trainer/test_trainer_steps_result_return.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 27e10f7d59210..2f4e321a1bf54 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -499,8 +499,8 @@ def test_full_train_loop_with_results_obj_dp(tmpdir):
         weights_summary=None,
     )
 
-    import pdb; pdb.set_trace()
     trainer.fit(model)
+    import pdb; pdb.set_trace()
 
     num_expected_epochs = 10
 

From 7b4be6aa37f904ca62fa523559259e3eff74b938 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:36:06 -0400
Subject: [PATCH 062/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index e4e29517b41d1..3bd89a21122a4 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -81,6 +81,7 @@ def __gather_structured_result(self, outputs):
         for i, output in enumerate(outputs):
             del output['meta']
 
+        import pdb; pdb.set_trace()
         outputs = self.gather(outputs, self.output_device)
 
         # pass minimize to constructor for TrainResult

From 92c0323904d972d7ac0728d0500ec8188bd9845d Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:37:55 -0400
Subject: [PATCH 063/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 1 -
 tests/base/deterministic_model.py            | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index 3bd89a21122a4..e4e29517b41d1 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -81,7 +81,6 @@ def __gather_structured_result(self, outputs):
         for i, output in enumerate(outputs):
             del output['meta']
 
-        import pdb; pdb.set_trace()
         outputs = self.gather(outputs, self.output_device)
 
         # pass minimize to constructor for TrainResult
diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index e6e2fb46da366..236694935567b 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -365,6 +365,8 @@ def backward(self, trainer, loss, optimizer, optimizer_idx):
                 assert loss > 171 * 1000
             else:
                 assert loss == 171.0
+
+        import pdb; pdb.set_trace()
         loss.backward()
 
 

From 8bb3b194b9cbaa4047c0902ee30d1c0684abc8c6 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:42:13 -0400
Subject: [PATCH 064/168] finished tests for structured results on train epoch

---
 tests/base/deterministic_model.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index 236694935567b..dac6316cf3ce2 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -37,6 +37,7 @@ def forward(self, x):
 
     def step(self, batch, batch_idx):
         x = batch
+        print(x.device)
         bs = x.size(0)
         y_hat = self(x)
 

From 1c69301965b9e11c976d23c26a748b1c2e560242 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:43:00 -0400
Subject: [PATCH 065/168] finished tests for structured results on train epoch

---
 tests/base/deterministic_model.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index dac6316cf3ce2..236694935567b 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -37,7 +37,6 @@ def forward(self, x):
 
     def step(self, batch, batch_idx):
         x = batch
-        print(x.device)
         bs = x.size(0)
         y_hat = self(x)
 

From 4a6f193097c31cda15d96455146319430b420f44 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:43:40 -0400
Subject: [PATCH 066/168] finished tests for structured results on train epoch

---
 tests/base/deterministic_model.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index 236694935567b..3fca21d5cab16 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -122,6 +122,7 @@ def training_step_end_full_loop_result_obj_dp(self, result):
         self.assert_backward = False
 
         result.minimize = result.minimize.mean()
+        import pdb; pdb.set_trace()
         result.checkpoint_on = result.checkpoint_on.mean()
         result.train_step_acc1 = result.train_step_acc1.mean()
         result.log('train_step_end_acc1', 1)

From 44d9a0a43e22bf5abca541a758a89c738d1d6708 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:44:34 -0400
Subject: [PATCH 067/168] finished tests for structured results on train epoch

---
 tests/base/deterministic_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index 3fca21d5cab16..e6b6084f1f095 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -121,8 +121,8 @@ def training_step_end_full_loop_result_obj_dp(self, result):
         """
         self.assert_backward = False
 
-        result.minimize = result.minimize.mean()
         import pdb; pdb.set_trace()
+        result.minimize = result.minimize.mean()
         result.checkpoint_on = result.checkpoint_on.mean()
         result.train_step_acc1 = result.train_step_acc1.mean()
         result.log('train_step_end_acc1', 1)

From 26c8d3c1dcfce4f494490c76a19d817d37a9f9f5 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:47:48 -0400
Subject: [PATCH 068/168] finished tests for structured results on train epoch

---
 tests/base/deterministic_model.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index e6b6084f1f095..b9ce1d1d3f41d 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -39,6 +39,8 @@ def step(self, batch, batch_idx):
         x = batch
         bs = x.size(0)
         y_hat = self(x)
+        print(self.device)
+        print(self.l1.weight.device)
 
         test_hat = y_hat.cpu().detach()
         assert torch.all(test_hat[:, 0] == 15.0)
@@ -121,7 +123,6 @@ def training_step_end_full_loop_result_obj_dp(self, result):
         """
         self.assert_backward = False
 
-        import pdb; pdb.set_trace()
         result.minimize = result.minimize.mean()
         result.checkpoint_on = result.checkpoint_on.mean()
         result.train_step_acc1 = result.train_step_acc1.mean()

From f71c7971832d6278b20859a83e5a86e05a193f2f Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:49:07 -0400
Subject: [PATCH 069/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index e4e29517b41d1..3ee49470f6918 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -62,6 +62,7 @@ def forward(self, *inputs, **kwargs):
 
             return self.module.validation_step(*inputs[0], **kwargs[0])
 
+        import pdb; pdb.set_trace()
         replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
         outputs = self.parallel_apply(replicas, inputs, kwargs)
 

From 6bce9d012b44433cf54e2ed29ad08f8b6c67538e Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:52:20 -0400
Subject: [PATCH 070/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index 3ee49470f6918..432e8ef511d99 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -62,8 +62,9 @@ def forward(self, *inputs, **kwargs):
 
             return self.module.validation_step(*inputs[0], **kwargs[0])
 
-        import pdb; pdb.set_trace()
         replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
+        for replica in replicas:
+            replica.device = self.module.device
         outputs = self.parallel_apply(replicas, inputs, kwargs)
 
         if isinstance(outputs[0], Result):

From e3226f32812a56c3821105900349ab785b6e6b59 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:53:54 -0400
Subject: [PATCH 071/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index 432e8ef511d99..813ae2df98d20 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -63,8 +63,10 @@ def forward(self, *inputs, **kwargs):
             return self.module.validation_step(*inputs[0], **kwargs[0])
 
         replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
-        for replica in replicas:
-            replica.device = self.module.device
+        for replica, device_idx in zip(replicas, self.device_ids[:len(inputs)]):
+            replica.device = torch.device(device_idx)
+            # replica.to(replica.device)
+
         outputs = self.parallel_apply(replicas, inputs, kwargs)
 
         if isinstance(outputs[0], Result):

From 18630d248264bd394b5a83d37e88ba180bd1189f Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:54:47 -0400
Subject: [PATCH 072/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index 813ae2df98d20..d3281c1328003 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -64,8 +64,7 @@ def forward(self, *inputs, **kwargs):
 
         replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
         for replica, device_idx in zip(replicas, self.device_ids[:len(inputs)]):
-            replica.device = torch.device(device_idx)
-            # replica.to(replica.device)
+            replica.to(torch.device(device_idx))
 
         outputs = self.parallel_apply(replicas, inputs, kwargs)
 

From 4a9659c17928497781577eafd7544c7a7db65ece Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:55:35 -0400
Subject: [PATCH 073/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index d3281c1328003..4aa4f50f18d5b 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -64,6 +64,7 @@ def forward(self, *inputs, **kwargs):
 
         replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
         for replica, device_idx in zip(replicas, self.device_ids[:len(inputs)]):
+            print(device_idx)
             replica.to(torch.device(device_idx))
 
         outputs = self.parallel_apply(replicas, inputs, kwargs)

From 90939a8b9d86e0cacfc686330d1155bc1e1b4cee Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:57:05 -0400
Subject: [PATCH 074/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index 4aa4f50f18d5b..bc267c3b545a5 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -63,9 +63,10 @@ def forward(self, *inputs, **kwargs):
             return self.module.validation_step(*inputs[0], **kwargs[0])
 
         replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
-        for replica, device_idx in zip(replicas, self.device_ids[:len(inputs)]):
-            print(device_idx)
-            replica.to(torch.device(device_idx))
+        for replica_idx, device_idx in zip(len(replicas), self.device_ids[:len(inputs)]):
+            replica = replicas[replica_idx]
+            replica = replica.to(torch.device(device_idx))
+            replicas[replica_idx] = replica
 
         outputs = self.parallel_apply(replicas, inputs, kwargs)
 

From 33cd21b577684af85c5392940ddc6712e774bf07 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:57:47 -0400
Subject: [PATCH 075/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index bc267c3b545a5..599a4e77ae385 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -63,7 +63,7 @@ def forward(self, *inputs, **kwargs):
             return self.module.validation_step(*inputs[0], **kwargs[0])
 
         replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
-        for replica_idx, device_idx in zip(len(replicas), self.device_ids[:len(inputs)]):
+        for replica_idx, device_idx in zip(range(len(replicas)), self.device_ids[:len(inputs)]):
             replica = replicas[replica_idx]
             replica = replica.to(torch.device(device_idx))
             replicas[replica_idx] = replica

From 8d32a7ae6ad341fe64c67d89163404acd2ed1b5a Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:58:55 -0400
Subject: [PATCH 076/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index 599a4e77ae385..6765c08c9cd9b 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -63,11 +63,6 @@ def forward(self, *inputs, **kwargs):
             return self.module.validation_step(*inputs[0], **kwargs[0])
 
         replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
-        for replica_idx, device_idx in zip(range(len(replicas)), self.device_ids[:len(inputs)]):
-            replica = replicas[replica_idx]
-            replica = replica.to(torch.device(device_idx))
-            replicas[replica_idx] = replica
-
         outputs = self.parallel_apply(replicas, inputs, kwargs)
 
         if isinstance(outputs[0], Result):
@@ -188,6 +183,7 @@ def _worker(i, module, input, kwargs, device=None):
         if device is None:
             device = get_a_var(input).get_device()
         try:
+            print(device)
             with torch.cuda.device(device):
                 # this also avoids accidental slicing of `input` if it is a Tensor
                 if not isinstance(input, (list, tuple)):

From f4a0a6f76cfcf2b37571146d48a6cf8845e1c00a Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 09:59:46 -0400
Subject: [PATCH 077/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index 6765c08c9cd9b..64907db71862c 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -191,6 +191,7 @@ def _worker(i, module, input, kwargs, device=None):
 
                 # ---------------
                 # CHANGE
+                print(module.device)
                 if module.training:
                     output = module.training_step(*input, **kwargs)
 

From 78d335d47877df9276f5fd922203c41bdc787f57 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 10:01:00 -0400
Subject: [PATCH 078/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 34 ++++++++++----------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index 64907db71862c..86cb4817f00af 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -183,27 +183,27 @@ def _worker(i, module, input, kwargs, device=None):
         if device is None:
             device = get_a_var(input).get_device()
         try:
-            print(device)
-            with torch.cuda.device(device):
-                # this also avoids accidental slicing of `input` if it is a Tensor
-                if not isinstance(input, (list, tuple)):
-                    input = (input,)
+            module.to(device)
 
-                # ---------------
-                # CHANGE
-                print(module.device)
-                if module.training:
-                    output = module.training_step(*input, **kwargs)
+            # this also avoids accidental slicing of `input` if it is a Tensor
+            if not isinstance(input, (list, tuple)):
+                input = (input,)
 
-                elif module.testing:
-                    output = module.test_step(*input, **kwargs)
+            # ---------------
+            # CHANGE
+            print(module.device)
+            if module.training:
+                output = module.training_step(*input, **kwargs)
 
-                else:
-                    output = module.validation_step(*input, **kwargs)
+            elif module.testing:
+                output = module.test_step(*input, **kwargs)
+
+            else:
+                output = module.validation_step(*input, **kwargs)
 
-                if module.use_dp or module.use_ddp2:
-                    auto_squeeze_dim_zeros(output)
-                # ---------------
+            if module.use_dp or module.use_ddp2:
+                auto_squeeze_dim_zeros(output)
+            # ---------------
 
             with lock:
                 results[i] = output

From 0faf91280b2d5cce99de86e7b975a45bf9d5a106 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 10:02:20 -0400
Subject: [PATCH 079/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index 86cb4817f00af..7aa5657debb8e 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -183,7 +183,9 @@ def _worker(i, module, input, kwargs, device=None):
         if device is None:
             device = get_a_var(input).get_device()
         try:
-            module.to(device)
+            print('old device', module.device)
+            module = module.to(device)
+            print('new device', module.device)
 
             # this also avoids accidental slicing of `input` if it is a Tensor
             if not isinstance(input, (list, tuple)):
@@ -191,7 +193,6 @@ def _worker(i, module, input, kwargs, device=None):
 
             # ---------------
             # CHANGE
-            print(module.device)
             if module.training:
                 output = module.training_step(*input, **kwargs)
 

From e0ce316e972a8fd520df64e25035c9a4fed17440 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 10:05:18 -0400
Subject: [PATCH 080/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index 7aa5657debb8e..21e9ee9741106 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -186,6 +186,7 @@ def _worker(i, module, input, kwargs, device=None):
             print('old device', module.device)
             module = module.to(device)
             print('new device', module.device)
+            print(input)
 
             # this also avoids accidental slicing of `input` if it is a Tensor
             if not isinstance(input, (list, tuple)):

From 947dc70df0e82fea4138974a1dae73016a1c8ff2 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 10:13:11 -0400
Subject: [PATCH 081/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 35 ++++++++++----------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index 21e9ee9741106..00a572a173d17 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -183,29 +183,28 @@ def _worker(i, module, input, kwargs, device=None):
         if device is None:
             device = get_a_var(input).get_device()
         try:
-            print('old device', module.device)
-            module = module.to(device)
-            print('new device', module.device)
-            print(input)
+            with torch.cuda.device(device):
+                # this also avoids accidental slicing of `input` if it is a Tensor
+                if not isinstance(input, (list, tuple)):
+                    input = (input,)
 
-            # this also avoids accidental slicing of `input` if it is a Tensor
-            if not isinstance(input, (list, tuple)):
-                input = (input,)
+                print(input, module.device)
 
-            # ---------------
-            # CHANGE
-            if module.training:
-                output = module.training_step(*input, **kwargs)
+                # ---------------
+                # CHANGE
+                print(module.device)
+                if module.training:
+                    output = module.training_step(*input, **kwargs)
 
-            elif module.testing:
-                output = module.test_step(*input, **kwargs)
+                elif module.testing:
+                    output = module.test_step(*input, **kwargs)
 
-            else:
-                output = module.validation_step(*input, **kwargs)
+                else:
+                    output = module.validation_step(*input, **kwargs)
 
-            if module.use_dp or module.use_ddp2:
-                auto_squeeze_dim_zeros(output)
-            # ---------------
+                if module.use_dp or module.use_ddp2:
+                    auto_squeeze_dim_zeros(output)
+                # ---------------
 
             with lock:
                 results[i] = output

From 1bd96c1989a9c114d1331288fd9e119af592d4fb Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 10:14:18 -0400
Subject: [PATCH 082/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index 00a572a173d17..be6603edd2cde 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -188,7 +188,7 @@ def _worker(i, module, input, kwargs, device=None):
                 if not isinstance(input, (list, tuple)):
                     input = (input,)
 
-                print(input, module.device)
+                print(input, module.device, module.l1.device)
 
                 # ---------------
                 # CHANGE

From 886a094d3c30e721bd7444a8571b5c3c62e61479 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 10:15:06 -0400
Subject: [PATCH 083/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index be6603edd2cde..c50a89e989936 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -188,7 +188,7 @@ def _worker(i, module, input, kwargs, device=None):
                 if not isinstance(input, (list, tuple)):
                     input = (input,)
 
-                print(input, module.device, module.l1.device)
+                print(input, module.device, module.l1.weight.device)
 
                 # ---------------
                 # CHANGE

From e891e4bec28294ed7049c26a53993c66b610371f Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 10:16:38 -0400
Subject: [PATCH 084/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index c50a89e989936..5634e08658226 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -187,7 +187,7 @@ def _worker(i, module, input, kwargs, device=None):
                 # this also avoids accidental slicing of `input` if it is a Tensor
                 if not isinstance(input, (list, tuple)):
                     input = (input,)
-
+                module._device = device
                 print(input, module.device, module.l1.weight.device)
 
                 # ---------------

From 987073920460ee4fcb3bb436865ccc5fd3609951 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 10:18:38 -0400
Subject: [PATCH 085/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 1 -
 tests/base/deterministic_model.py            | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index 5634e08658226..f45daad389688 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -192,7 +192,6 @@ def _worker(i, module, input, kwargs, device=None):
 
                 # ---------------
                 # CHANGE
-                print(module.device)
                 if module.training:
                     output = module.training_step(*input, **kwargs)
 
diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index b9ce1d1d3f41d..420b327ed5fbf 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -38,7 +38,7 @@ def forward(self, x):
     def step(self, batch, batch_idx):
         x = batch
         bs = x.size(0)
-        y_hat = self(x)
+        y_hat = self.l1(x)
         print(self.device)
         print(self.l1.weight.device)
 

From 50ddc5afb482e3f4bbe02011be336795a891bd52 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 10:19:29 -0400
Subject: [PATCH 086/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index f45daad389688..5d58a3b62d973 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -188,7 +188,6 @@ def _worker(i, module, input, kwargs, device=None):
                 if not isinstance(input, (list, tuple)):
                     input = (input,)
                 module._device = device
-                print(input, module.device, module.l1.weight.device)
 
                 # ---------------
                 # CHANGE

From bb9dce788b32d478c595258c2a0547ed9c31ace0 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 10:23:45 -0400
Subject: [PATCH 087/168] finished tests for structured results on train epoch

---
 tests/base/deterministic_model.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index 420b327ed5fbf..b882379add749 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -39,8 +39,7 @@ def step(self, batch, batch_idx):
         x = batch
         bs = x.size(0)
         y_hat = self.l1(x)
-        print(self.device)
-        print(self.l1.weight.device)
+        print(x.device, self.device, self.l1.weight.device)
 
         test_hat = y_hat.cpu().detach()
         assert torch.all(test_hat[:, 0] == 15.0)

From df2b590ba2338481f1a1b312e0a3409b846e1a38 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 10:35:20 -0400
Subject: [PATCH 088/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index 5d58a3b62d973..52d25a515e072 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -188,6 +188,7 @@ def _worker(i, module, input, kwargs, device=None):
                 if not isinstance(input, (list, tuple)):
                     input = (input,)
                 module._device = device
+                module = module.to(device)
 
                 # ---------------
                 # CHANGE

From 68ab1302c4455fa984e4342010c7c61c5224c200 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 10:36:14 -0400
Subject: [PATCH 089/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index 52d25a515e072..5aabd64cbaf95 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -189,6 +189,7 @@ def _worker(i, module, input, kwargs, device=None):
                     input = (input,)
                 module._device = device
                 module = module.to(device)
+                print(module.device, module._device, 'a')
 
                 # ---------------
                 # CHANGE

From d846ff3fb92bec383e3efff349ff88a23076358b Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 10:36:59 -0400
Subject: [PATCH 090/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index 5aabd64cbaf95..b8b373f83ea1d 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -187,9 +187,9 @@ def _worker(i, module, input, kwargs, device=None):
                 # this also avoids accidental slicing of `input` if it is a Tensor
                 if not isinstance(input, (list, tuple)):
                     input = (input,)
+                print(module.device, module._device, 'a')
                 module._device = device
                 module = module.to(device)
-                print(module.device, module._device, 'a')
 
                 # ---------------
                 # CHANGE

From e97722a1cd14aa632d9e8e77c2ad2078da5b3467 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 10:38:35 -0400
Subject: [PATCH 091/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index b8b373f83ea1d..229d9133b1efd 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -187,8 +187,9 @@ def _worker(i, module, input, kwargs, device=None):
                 # this also avoids accidental slicing of `input` if it is a Tensor
                 if not isinstance(input, (list, tuple)):
                     input = (input,)
-                print(module.device, module._device, 'a')
-                module._device = device
+
+                if hasattr(module, '_device'):
+                    module._device = device
                 module = module.to(device)
 
                 # ---------------

From 36319cd17e43766ae8f85e1667e04f5d13fa186a Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 10:39:08 -0400
Subject: [PATCH 092/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index 229d9133b1efd..e71b487d13488 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -194,6 +194,8 @@ def _worker(i, module, input, kwargs, device=None):
 
                 # ---------------
                 # CHANGE
+                print(module._device, module.device)
+                import pdb; pdb.set_trace()
                 if module.training:
                     output = module.training_step(*input, **kwargs)
 

From 06ecec5c94f18bf1695b5181a386dc1547856d9a Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 11:14:42 -0400
Subject: [PATCH 093/168] finished tests for structured results on train epoch

---
 tests/trainer/test_trainer_steps_result_return.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 2f4e321a1bf54..4c34ed4d09437 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -6,6 +6,7 @@
 from pytorch_lightning import Trainer
 from tests.base.deterministic_model import DeterministicModel
 from pytorch_lightning.core.step_result import Result, TrainResult, EvalResult
+from tests.base import EvalModelTemplate
 
 
 # test with train_step_end
@@ -480,7 +481,7 @@ def test_use_callbacks_with_train_loop_only(tmpdir):
 def test_full_train_loop_with_results_obj_dp(tmpdir):
     os.environ['PL_DEV_DEBUG'] = '1'
 
-    model = DeterministicModel()
+    model = EvalModelTemplate()
     model.training_step = model.training_step_full_loop_result_obj
     model.training_step_end = model.training_step_end_full_loop_result_obj_dp
     model.training_epoch_end = model.training_epoch_end_full_loop_result_obj

From 3a6c132c6b7227ed9223a2f19fe2e0588357e5f1 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 11:15:47 -0400
Subject: [PATCH 094/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index e71b487d13488..007643266da4b 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -195,7 +195,6 @@ def _worker(i, module, input, kwargs, device=None):
                 # ---------------
                 # CHANGE
                 print(module._device, module.device)
-                import pdb; pdb.set_trace()
                 if module.training:
                     output = module.training_step(*input, **kwargs)
 

From 98e11e323e4e8c0aa4a6e77bfe18349c6536e447 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 11:18:51 -0400
Subject: [PATCH 095/168] finished tests for structured results on train epoch

---
 tests/trainer/test_trainer_steps_result_return.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 4c34ed4d09437..d1dde797e6bd2 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -482,9 +482,10 @@ def test_full_train_loop_with_results_obj_dp(tmpdir):
     os.environ['PL_DEV_DEBUG'] = '1'
 
     model = EvalModelTemplate()
-    model.training_step = model.training_step_full_loop_result_obj
-    model.training_step_end = model.training_step_end_full_loop_result_obj_dp
-    model.training_epoch_end = model.training_epoch_end_full_loop_result_obj
+    model2 = DeterministicModel()
+    model.training_step = model2.training_step_full_loop_result_obj
+    model.training_step_end = model2.training_step_end_full_loop_result_obj_dp
+    model.training_epoch_end = model2.training_epoch_end_full_loop_result_obj
     model.val_dataloader = None
 
     batches = 3

From 7782abe2960300e62701390ec3caba46f232f73d Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 11:20:40 -0400
Subject: [PATCH 096/168] finished tests for structured results on train epoch

---
 tests/trainer/test_trainer_steps_result_return.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index d1dde797e6bd2..e5d992c988a20 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -482,11 +482,14 @@ def test_full_train_loop_with_results_obj_dp(tmpdir):
     os.environ['PL_DEV_DEBUG'] = '1'
 
     model = EvalModelTemplate()
+    model.validation_step = None
+    model.test_step = None
     model2 = DeterministicModel()
     model.training_step = model2.training_step_full_loop_result_obj
     model.training_step_end = model2.training_step_end_full_loop_result_obj_dp
     model.training_epoch_end = model2.training_epoch_end_full_loop_result_obj
     model.val_dataloader = None
+    model.test_dataloader = None
 
     batches = 3
     epochs = 3

From 2d4eccfce97abb9a9f9ab14ef03cdf6b6118b085 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 11:23:07 -0400
Subject: [PATCH 097/168] finished tests for structured results on train epoch

---
 tests/base/deterministic_model.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index b882379add749..5e013dbfb0425 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -110,9 +110,12 @@ def training_step_full_loop_result_obj(self, batch, batch_idx):
         """
         Full loop flow train step
         """
-        acc = self.step(batch, batch_idx)
-        result = TrainResult(minimize=acc)
-        result.log('train_step_acc1', acc + 1)
+        x, y = batch
+        x = x.view(x.size(0), -1)
+        y_hat = self(x)
+        loss_val = self.loss(y, y_hat)
+        result = TrainResult(minimize=loss_val)
+        result.log('train_step_acc1', loss_val + 1)
         self.training_step_called = True
         return result
 

From 3bbd01fc40dad2130765aa973577e0b111a6b999 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 11:26:59 -0400
Subject: [PATCH 098/168] finished tests for structured results on train epoch

---
 tests/base/deterministic_model.py             | 37 -------------------
 tests/base/model_template.py                  |  3 ++
 tests/base/model_train_steps.py               | 33 +++++++++++++++++
 .../test_trainer_steps_result_return.py       |  7 ++--
 4 files changed, 39 insertions(+), 41 deletions(-)

diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index 5e013dbfb0425..a7c5b210d55cd 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -103,43 +103,6 @@ def training_epoch_end_scalar(self, outputs):
         prototype_loss = outputs[0]
         return prototype_loss
 
-    # --------------------------
-    # Result returns
-    # --------------------------
-    def training_step_full_loop_result_obj(self, batch, batch_idx):
-        """
-        Full loop flow train step
-        """
-        x, y = batch
-        x = x.view(x.size(0), -1)
-        y_hat = self(x)
-        loss_val = self.loss(y, y_hat)
-        result = TrainResult(minimize=loss_val)
-        result.log('train_step_acc1', loss_val + 1)
-        self.training_step_called = True
-        return result
-
-    def training_step_end_full_loop_result_obj_dp(self, result):
-        """
-        Full loop flow train step
-        """
-        self.assert_backward = False
-
-        result.minimize = result.minimize.mean()
-        result.checkpoint_on = result.checkpoint_on.mean()
-        result.train_step_acc1 = result.train_step_acc1.mean()
-        result.log('train_step_end_acc1', 1)
-        self.training_step_end_called = True
-        return result
-
-    def training_epoch_end_full_loop_result_obj(self, result):
-        """
-        Full loop flow train step
-        """
-        result.log('train_epoch_end_acc1', 1)
-        self.training_epoch_end_called = True
-        return result
-
     def training_step_no_default_callbacks_for_train_loop(self, batch, batch_idx):
         """
         Early stop and checkpoint only on these values
diff --git a/tests/base/model_template.py b/tests/base/model_template.py
index 48851cdb08219..a89769e6f487b 100644
--- a/tests/base/model_template.py
+++ b/tests/base/model_template.py
@@ -63,6 +63,9 @@ def __init__(
         self.hidden_dim = hidden_dim
         self.b1 = b1
         self.b2 = b2
+        self.training_step_called = False
+        self.training_step_end_called = False
+        self.training_epoch_end_called = False
 
         # if you specify an example input, the summary will show input/output for each layer
         # TODO: to be fixed in #1773
diff --git a/tests/base/model_train_steps.py b/tests/base/model_train_steps.py
index fcd020d852126..e86b6e4f9f0af 100644
--- a/tests/base/model_train_steps.py
+++ b/tests/base/model_train_steps.py
@@ -1,6 +1,7 @@
 import math
 from abc import ABC
 from collections import OrderedDict
+from pytorch_lightning import TrainResult
 
 import torch
 
@@ -38,3 +39,35 @@ def training_step__inf_loss(self, batch, batch_idx, optimizer_idx=None):
             else:
                 output /= 0
         return output
+
+    def training_step_full_loop_result_obj(self, batch, batch_idx):
+        """
+        Full loop flow train step
+        """
+        x, y = batch
+        x = x.view(x.size(0), -1)
+        y_hat = self(x)
+        loss_val = self.loss(y, y_hat)
+        result = TrainResult(minimize=loss_val)
+        result.log('train_step_acc1', loss_val + 1)
+        self.training_step_called = True
+        return result
+
+    def training_step_end_full_loop_result_obj_dp(self, result):
+        """
+        Full loop flow train step
+        """
+        result.minimize = result.minimize.mean()
+        result.checkpoint_on = result.checkpoint_on.mean()
+        result.train_step_acc1 = result.train_step_acc1.mean()
+        result.log('train_step_end_acc1', 1)
+        self.training_step_end_called = True
+        return result
+
+    def training_epoch_end_full_loop_result_obj(self, result):
+        """
+        Full loop flow train step
+        """
+        result.log('train_epoch_end_acc1', 1)
+        self.training_epoch_end_called = True
+        return result
diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index e5d992c988a20..13a14492a0a4f 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -484,10 +484,9 @@ def test_full_train_loop_with_results_obj_dp(tmpdir):
     model = EvalModelTemplate()
     model.validation_step = None
     model.test_step = None
-    model2 = DeterministicModel()
-    model.training_step = model2.training_step_full_loop_result_obj
-    model.training_step_end = model2.training_step_end_full_loop_result_obj_dp
-    model.training_epoch_end = model2.training_epoch_end_full_loop_result_obj
+    model.training_step = model.training_step_full_loop_result_obj
+    model.training_step_end = model.training_step_end_full_loop_result_obj_dp
+    model.training_epoch_end = model.training_epoch_end_full_loop_result_obj
     model.val_dataloader = None
     model.test_dataloader = None
 

From 77c28b080fa8c060ea38f822d3b214a4d96dd05f Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 11:29:31 -0400
Subject: [PATCH 099/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 1 -
 tests/base/model_train_steps.py              | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index 007643266da4b..229d9133b1efd 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -194,7 +194,6 @@ def _worker(i, module, input, kwargs, device=None):
 
                 # ---------------
                 # CHANGE
-                print(module._device, module.device)
                 if module.training:
                     output = module.training_step(*input, **kwargs)
 
diff --git a/tests/base/model_train_steps.py b/tests/base/model_train_steps.py
index e86b6e4f9f0af..62fe11f4b2750 100644
--- a/tests/base/model_train_steps.py
+++ b/tests/base/model_train_steps.py
@@ -46,6 +46,7 @@ def training_step_full_loop_result_obj(self, batch, batch_idx):
         """
         x, y = batch
         x = x.view(x.size(0), -1)
+        print(self.device, self.c_d1.weight.device, x.device)
         y_hat = self(x)
         loss_val = self.loss(y, y_hat)
         result = TrainResult(minimize=loss_val)

From 0f180731479ef683aebaabfb51402750d893ffed Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 11:32:29 -0400
Subject: [PATCH 100/168] finished tests for structured results on train epoch

---
 pytorch_lightning/trainer/distrib_parts.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/trainer/distrib_parts.py b/pytorch_lightning/trainer/distrib_parts.py
index 20edc0d60541a..83bb1b8875902 100644
--- a/pytorch_lightning/trainer/distrib_parts.py
+++ b/pytorch_lightning/trainer/distrib_parts.py
@@ -232,7 +232,7 @@ def dp_train(self, model):
         if self.is_function_implemented('setup', model):
             model.setup('fit')
 
-        model.cuda(self.root_gpu)
+        # model.cuda(self.root_gpu)
 
         # CHOOSE OPTIMIZER
         # allow for lr schedulers as well

From a0dd29bb0b8b1772d1b8fdca3d17be9191453b58 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 11:33:07 -0400
Subject: [PATCH 101/168] finished tests for structured results on train epoch

---
 pytorch_lightning/trainer/distrib_parts.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pytorch_lightning/trainer/distrib_parts.py b/pytorch_lightning/trainer/distrib_parts.py
index 83bb1b8875902..94c9f83328bfc 100644
--- a/pytorch_lightning/trainer/distrib_parts.py
+++ b/pytorch_lightning/trainer/distrib_parts.py
@@ -232,7 +232,7 @@ def dp_train(self, model):
         if self.is_function_implemented('setup', model):
             model.setup('fit')
 
-        # model.cuda(self.root_gpu)
+        model.cuda(self.root_gpu)
 
         # CHOOSE OPTIMIZER
         # allow for lr schedulers as well
@@ -263,7 +263,7 @@ def dp_train(self, model):
             device_ids = list(range(device_ids))
 
         # set dp device
-        torch.cuda.set_device(self.root_gpu)
+        # torch.cuda.set_device(self.root_gpu)
 
         model = LightningDataParallel(model, device_ids=device_ids)
 

From 36c10b59fd114d142dbbe217d97eb2f2fb8b72bf Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 11:33:52 -0400
Subject: [PATCH 102/168] finished tests for structured results on train epoch

---
 pytorch_lightning/trainer/distrib_parts.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pytorch_lightning/trainer/distrib_parts.py b/pytorch_lightning/trainer/distrib_parts.py
index 94c9f83328bfc..47a63207babc0 100644
--- a/pytorch_lightning/trainer/distrib_parts.py
+++ b/pytorch_lightning/trainer/distrib_parts.py
@@ -264,6 +264,7 @@ def dp_train(self, model):
 
         # set dp device
         # torch.cuda.set_device(self.root_gpu)
+        print(device_ids)
 
         model = LightningDataParallel(model, device_ids=device_ids)
 

From e113e2c1e34defbaa2ee8089817750cc00bd8101 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 11:36:42 -0400
Subject: [PATCH 103/168] finished tests for structured results on train epoch

---
 pytorch_lightning/trainer/distrib_parts.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pytorch_lightning/trainer/distrib_parts.py b/pytorch_lightning/trainer/distrib_parts.py
index 47a63207babc0..20edc0d60541a 100644
--- a/pytorch_lightning/trainer/distrib_parts.py
+++ b/pytorch_lightning/trainer/distrib_parts.py
@@ -263,8 +263,7 @@ def dp_train(self, model):
             device_ids = list(range(device_ids))
 
         # set dp device
-        # torch.cuda.set_device(self.root_gpu)
-        print(device_ids)
+        torch.cuda.set_device(self.root_gpu)
 
         model = LightningDataParallel(model, device_ids=device_ids)
 

From f7d2841a2dbdf44e16be0a72de57a5601cde7f42 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 11:37:16 -0400
Subject: [PATCH 104/168] finished tests for structured results on train epoch

---
 tests/base/model_train_steps.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/base/model_train_steps.py b/tests/base/model_train_steps.py
index 62fe11f4b2750..72b1bc1d8fdf0 100644
--- a/tests/base/model_train_steps.py
+++ b/tests/base/model_train_steps.py
@@ -48,7 +48,7 @@ def training_step_full_loop_result_obj(self, batch, batch_idx):
         x = x.view(x.size(0), -1)
         print(self.device, self.c_d1.weight.device, x.device)
         y_hat = self(x)
-        loss_val = self.loss(y, y_hat)
+        loss_val = self.loss(y.type_as(y_hat), y_hat)
         result = TrainResult(minimize=loss_val)
         result.log('train_step_acc1', loss_val + 1)
         self.training_step_called = True

From 94ea112f5a2b4e2fa5ac07679868c0b992d587a8 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 11:38:17 -0400
Subject: [PATCH 105/168] finished tests for structured results on train epoch

---
 tests/base/model_train_steps.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/base/model_train_steps.py b/tests/base/model_train_steps.py
index 72b1bc1d8fdf0..3652a5f937aa5 100644
--- a/tests/base/model_train_steps.py
+++ b/tests/base/model_train_steps.py
@@ -48,7 +48,7 @@ def training_step_full_loop_result_obj(self, batch, batch_idx):
         x = x.view(x.size(0), -1)
         print(self.device, self.c_d1.weight.device, x.device)
         y_hat = self(x)
-        loss_val = self.loss(y.type_as(y_hat), y_hat)
+        loss_val = self.loss(y.type_as(y_hat), y_hat.long())
         result = TrainResult(minimize=loss_val)
         result.log('train_step_acc1', loss_val + 1)
         self.training_step_called = True

From f5b4259310f21f2e850bedfbf161f203f879d12e Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 11:39:52 -0400
Subject: [PATCH 106/168] finished tests for structured results on train epoch

---
 tests/base/model_train_steps.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/base/model_train_steps.py b/tests/base/model_train_steps.py
index 3652a5f937aa5..ea86ac108cb09 100644
--- a/tests/base/model_train_steps.py
+++ b/tests/base/model_train_steps.py
@@ -48,7 +48,7 @@ def training_step_full_loop_result_obj(self, batch, batch_idx):
         x = x.view(x.size(0), -1)
         print(self.device, self.c_d1.weight.device, x.device)
         y_hat = self(x)
-        loss_val = self.loss(y.type_as(y_hat), y_hat.long())
+        loss_val = self.loss(y.type_as(y_hat).float(), y_hat.float())
         result = TrainResult(minimize=loss_val)
         result.log('train_step_acc1', loss_val + 1)
         self.training_step_called = True

From b0f6590ce5389658a13ee8bee4f3de472900bf6c Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 11:40:40 -0400
Subject: [PATCH 107/168] finished tests for structured results on train epoch

---
 tests/base/model_train_steps.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/base/model_train_steps.py b/tests/base/model_train_steps.py
index ea86ac108cb09..e1de8f39e8fa3 100644
--- a/tests/base/model_train_steps.py
+++ b/tests/base/model_train_steps.py
@@ -48,7 +48,7 @@ def training_step_full_loop_result_obj(self, batch, batch_idx):
         x = x.view(x.size(0), -1)
         print(self.device, self.c_d1.weight.device, x.device)
         y_hat = self(x)
-        loss_val = self.loss(y.type_as(y_hat).float(), y_hat.float())
+        loss_val = self.loss(y.type_as(y_hat).float(), y_hat.long())
         result = TrainResult(minimize=loss_val)
         result.log('train_step_acc1', loss_val + 1)
         self.training_step_called = True

From 5e1882be0eb770128c5a40347724488309f771d4 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 11:43:38 -0400
Subject: [PATCH 108/168] finished tests for structured results on train epoch

---
 tests/base/model_train_steps.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/base/model_train_steps.py b/tests/base/model_train_steps.py
index e1de8f39e8fa3..f913b9ddb3777 100644
--- a/tests/base/model_train_steps.py
+++ b/tests/base/model_train_steps.py
@@ -48,7 +48,7 @@ def training_step_full_loop_result_obj(self, batch, batch_idx):
         x = x.view(x.size(0), -1)
         print(self.device, self.c_d1.weight.device, x.device)
         y_hat = self(x)
-        loss_val = self.loss(y.type_as(y_hat).float(), y_hat.long())
+        loss_val = y_hat.sum()
         result = TrainResult(minimize=loss_val)
         result.log('train_step_acc1', loss_val + 1)
         self.training_step_called = True

From 96f968976f7770f10ea2423703bbc11ee1d6ebbb Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Sun, 19 Jul 2020 11:44:28 -0400
Subject: [PATCH 109/168] finished tests for structured results on train epoch

---
 tests/base/model_train_steps.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/base/model_train_steps.py b/tests/base/model_train_steps.py
index f913b9ddb3777..d59e2e045ee02 100644
--- a/tests/base/model_train_steps.py
+++ b/tests/base/model_train_steps.py
@@ -46,7 +46,6 @@ def training_step_full_loop_result_obj(self, batch, batch_idx):
         """
         x, y = batch
         x = x.view(x.size(0), -1)
-        print(self.device, self.c_d1.weight.device, x.device)
         y_hat = self(x)
         loss_val = y_hat.sum()
         result = TrainResult(minimize=loss_val)

From 5cd90fe8853a30118fa841c755e02c4d6a3808c9 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 07:53:54 -0400
Subject: [PATCH 110/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index 229d9133b1efd..86ebf00f6c24e 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -188,9 +188,9 @@ def _worker(i, module, input, kwargs, device=None):
                 if not isinstance(input, (list, tuple)):
                     input = (input,)
 
-                if hasattr(module, '_device'):
-                    module._device = device
-                module = module.to(device)
+                # if hasattr(module, '_device'):
+                #     module._device = device
+                # module = module.to(device)
 
                 # ---------------
                 # CHANGE

From 4ebd847877f8a6712065b81dcc080be28cbe7c0c Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 07:54:48 -0400
Subject: [PATCH 111/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index 86ebf00f6c24e..e4e29517b41d1 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -188,10 +188,6 @@ def _worker(i, module, input, kwargs, device=None):
                 if not isinstance(input, (list, tuple)):
                     input = (input,)
 
-                # if hasattr(module, '_device'):
-                #     module._device = device
-                # module = module.to(device)
-
                 # ---------------
                 # CHANGE
                 if module.training:

From 4c3b03aef0140c5515d94ffaaec371cdba764d8c Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 07:56:03 -0400
Subject: [PATCH 112/168] finished tests for structured results on train epoch

---
 tests/base/model_train_steps.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/base/model_train_steps.py b/tests/base/model_train_steps.py
index d59e2e045ee02..f038e868c62fb 100644
--- a/tests/base/model_train_steps.py
+++ b/tests/base/model_train_steps.py
@@ -49,7 +49,7 @@ def training_step_full_loop_result_obj(self, batch, batch_idx):
         y_hat = self(x)
         loss_val = y_hat.sum()
         result = TrainResult(minimize=loss_val)
-        result.log('train_step_acc1', loss_val + 1)
+        result.log('train_step_test', loss_val + 1)
         self.training_step_called = True
         return result
 
@@ -57,6 +57,7 @@ def training_step_end_full_loop_result_obj_dp(self, result):
         """
         Full loop flow train step
         """
+        import pdb; pdb.set_trace()
         result.minimize = result.minimize.mean()
         result.checkpoint_on = result.checkpoint_on.mean()
         result.train_step_acc1 = result.train_step_acc1.mean()

From 7886bcb9cc0fa714ad77401206874cbf04833e98 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 07:59:19 -0400
Subject: [PATCH 113/168] finished tests for structured results on train epoch

---
 tests/base/model_train_steps.py                   | 11 +++++------
 tests/trainer/test_trainer_steps_result_return.py |  6 ++++--
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/tests/base/model_train_steps.py b/tests/base/model_train_steps.py
index f038e868c62fb..378fbfd450160 100644
--- a/tests/base/model_train_steps.py
+++ b/tests/base/model_train_steps.py
@@ -40,9 +40,9 @@ def training_step__inf_loss(self, batch, batch_idx, optimizer_idx=None):
                 output /= 0
         return output
 
-    def training_step_full_loop_result_obj(self, batch, batch_idx):
+    def training_step_full_loop_result_obj_dp(self, batch, batch_idx):
         """
-        Full loop flow train step
+        Full loop flow train step (result obj + dp)
         """
         x, y = batch
         x = x.view(x.size(0), -1)
@@ -55,9 +55,8 @@ def training_step_full_loop_result_obj(self, batch, batch_idx):
 
     def training_step_end_full_loop_result_obj_dp(self, result):
         """
-        Full loop flow train step
+        Full loop flow train step (result obj + dp)
         """
-        import pdb; pdb.set_trace()
         result.minimize = result.minimize.mean()
         result.checkpoint_on = result.checkpoint_on.mean()
         result.train_step_acc1 = result.train_step_acc1.mean()
@@ -65,9 +64,9 @@ def training_step_end_full_loop_result_obj_dp(self, result):
         self.training_step_end_called = True
         return result
 
-    def training_epoch_end_full_loop_result_obj(self, result):
+    def training_epoch_end_full_loop_result_obj_dp(self, result):
         """
-        Full loop flow train step
+        Full loop flow train step (result obj + dp)
         """
         result.log('train_epoch_end_acc1', 1)
         self.training_epoch_end_called = True
diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 13a14492a0a4f..daeea1d6c3cff 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -7,6 +7,7 @@
 from tests.base.deterministic_model import DeterministicModel
 from pytorch_lightning.core.step_result import Result, TrainResult, EvalResult
 from tests.base import EvalModelTemplate
+import pytest
 
 
 # test with train_step_end
@@ -478,15 +479,16 @@ def test_use_callbacks_with_train_loop_only(tmpdir):
         assert ckpt_val['monitor'] == 'checkpoint_on'
 
 
+@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
 def test_full_train_loop_with_results_obj_dp(tmpdir):
     os.environ['PL_DEV_DEBUG'] = '1'
 
     model = EvalModelTemplate()
     model.validation_step = None
     model.test_step = None
-    model.training_step = model.training_step_full_loop_result_obj
+    model.training_step = model.training_step_full_loop_result_obj_dp
     model.training_step_end = model.training_step_end_full_loop_result_obj_dp
-    model.training_epoch_end = model.training_epoch_end_full_loop_result_obj
+    model.training_epoch_end = model.training_epoch_end_full_loop_result_obj_dp
     model.val_dataloader = None
     model.test_dataloader = None
 

From 3c2f53cae77b847e39954b3552533e25762537ae Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 08:01:11 -0400
Subject: [PATCH 114/168] finished tests for structured results on train epoch

---
 tests/base/model_train_steps.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/base/model_train_steps.py b/tests/base/model_train_steps.py
index 378fbfd450160..1c06c57708464 100644
--- a/tests/base/model_train_steps.py
+++ b/tests/base/model_train_steps.py
@@ -49,7 +49,7 @@ def training_step_full_loop_result_obj_dp(self, batch, batch_idx):
         y_hat = self(x)
         loss_val = y_hat.sum()
         result = TrainResult(minimize=loss_val)
-        result.log('train_step_test', loss_val + 1)
+        result.log('train_step_metric', loss_val + 1)
         self.training_step_called = True
         return result
 
@@ -59,8 +59,8 @@ def training_step_end_full_loop_result_obj_dp(self, result):
         """
         result.minimize = result.minimize.mean()
         result.checkpoint_on = result.checkpoint_on.mean()
-        result.train_step_acc1 = result.train_step_acc1.mean()
-        result.log('train_step_end_acc1', 1)
+        result.train_step_metric = result.train_step_metric.mean()
+        result.log('train_step_end_metric', 1)
         self.training_step_end_called = True
         return result
 
@@ -68,6 +68,6 @@ def training_epoch_end_full_loop_result_obj_dp(self, result):
         """
         Full loop flow train step (result obj + dp)
         """
-        result.log('train_epoch_end_acc1', 1)
+        result.log('train_epoch_end_metric', 1)
         self.training_epoch_end_called = True
         return result

From 0f3807f91f0efb3935710f235bb0994b0fc33fd7 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 08:22:47 -0400
Subject: [PATCH 115/168] finished tests for structured results on train epoch

---
 pytorch_lightning/utilities/debugging.py      |  1 +
 .../test_trainer_steps_result_return.py       | 28 +------------------
 2 files changed, 2 insertions(+), 27 deletions(-)

diff --git a/pytorch_lightning/utilities/debugging.py b/pytorch_lightning/utilities/debugging.py
index d8a7722fd8884..47f98ac4685e8 100644
--- a/pytorch_lightning/utilities/debugging.py
+++ b/pytorch_lightning/utilities/debugging.py
@@ -15,6 +15,7 @@ def __init__(self, trainer):
 
     def track_logged_metrics_history(self, scalar_metrics):
         if self.enabled:
+            scalar_metrics['global_step'] = self.trainer.global_step
             self.logged_metrics.append(scalar_metrics)
 
     def track_train_loss_history(self, batch_idx, loss):
diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index daeea1d6c3cff..0f25290279076 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -500,7 +500,7 @@ def test_full_train_loop_with_results_obj_dp(tmpdir):
         gpus=2,
         max_epochs=epochs,
         early_stop_callback=True,
-        row_log_interval=1,
+        row_log_interval=2,
         limit_train_batches=batches,
         weights_summary=None,
     )
@@ -508,31 +508,5 @@ def test_full_train_loop_with_results_obj_dp(tmpdir):
     trainer.fit(model)
     import pdb; pdb.set_trace()
 
-    num_expected_epochs = 10
-
-    # ----------------------------------
-    # VERIFY EARLY STOPPING BEHAVIOR
-    # ----------------------------------
-    # with train loop only it happens on every epoch
-    early_stop_vals = trainer.dev_debugger.early_stopping_history
-    assert len(early_stop_vals) == num_expected_epochs
-    min_val = min([x['best'] for x in early_stop_vals])
-    assert min_val == 171 + 9
-    all_losses = trainer.dev_debugger.saved_losses
-
-    from collections import Counter
-    batch_idxs = Counter([x['batch_idx'] for x in all_losses])
-    for i, val in batch_idxs.items():
-        assert val == num_expected_epochs
-        assert i in [0, 1, 2]
-
-    # ----------------------------------
-    # VERIFY CHECKPOINTING BEHAVIOR
-    # ----------------------------------
-    ckpt_vals = trainer.dev_debugger.checkpoint_callback_history
-    assert len(ckpt_vals) == 5, '5 ckpts should have been saved'
-    for ckpt_val, expected_epoch in zip(ckpt_vals, [0, 1, 2, 3, 6]):
-        assert ckpt_val['epoch'] == expected_epoch
-        assert ckpt_val['monitor'] == 'checkpoint_on'
 
 test_full_train_loop_with_results_obj_dp('')
\ No newline at end of file

From 14db086093d35a1e4e0b78320de7d2d9c8c560c9 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 08:25:59 -0400
Subject: [PATCH 116/168] finished tests for structured results on train epoch

---
 tests/trainer/test_trainer_steps_result_return.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 0f25290279076..911c240b72d5d 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -508,5 +508,10 @@ def test_full_train_loop_with_results_obj_dp(tmpdir):
     trainer.fit(model)
     import pdb; pdb.set_trace()
 
+    i = 0
+    for metric in trainer.dev_debugger.logged_metrics:
+        assert metric['global_step'] == i
+        i += trainer.row_log_interval
+
 
 test_full_train_loop_with_results_obj_dp('')
\ No newline at end of file

From 7452cd5684f146cad085cfa039a5477892c5acdd Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 08:27:46 -0400
Subject: [PATCH 117/168] finished tests for structured results on train epoch

---
 tests/base/model_train_steps.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/base/model_train_steps.py b/tests/base/model_train_steps.py
index 1c06c57708464..3ed06a0427444 100644
--- a/tests/base/model_train_steps.py
+++ b/tests/base/model_train_steps.py
@@ -68,6 +68,6 @@ def training_epoch_end_full_loop_result_obj_dp(self, result):
         """
         Full loop flow train step (result obj + dp)
         """
-        result.log('train_epoch_end_metric', 1)
+        result.log('train_epoch_end_metric', 1, on_epoch=True)
         self.training_epoch_end_called = True
         return result

From 21ffdf2dc2e567270101d78ec31be61d25c5eb35 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 08:28:57 -0400
Subject: [PATCH 118/168] finished tests for structured results on train epoch

---
 tests/trainer/test_trainer_steps_result_return.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 911c240b72d5d..4a071eff80ebb 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -506,8 +506,13 @@ def test_full_train_loop_with_results_obj_dp(tmpdir):
     )
 
     trainer.fit(model)
-    import pdb; pdb.set_trace()
 
+    # make sure the loop was good
+    assert model.training_step_called
+    assert model.training_step_end_called
+    assert model.training_epoch_end_called
+
+    # make sure we have the correct metrics logged
     i = 0
     for metric in trainer.dev_debugger.logged_metrics:
         assert metric['global_step'] == i

From ee31889bc52bfb5b004cdd6edd7ce13ad0b533dd Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 08:31:11 -0400
Subject: [PATCH 119/168] finished tests for structured results on train epoch

---
 tests/trainer/test_trainer_steps_result_return.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 4a071eff80ebb..f974eb8ed57ff 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -512,11 +512,15 @@ def test_full_train_loop_with_results_obj_dp(tmpdir):
     assert model.training_step_end_called
     assert model.training_epoch_end_called
 
-    # make sure we have the correct metrics logged
-    i = 0
+    # make sure we saw all the correct keys
+    seen_keys = set()
     for metric in trainer.dev_debugger.logged_metrics:
-        assert metric['global_step'] == i
-        i += trainer.row_log_interval
+        seen_keys.update(metric.keys())
+
+    assert 'train_step_metric' in seen_keys
+    assert 'train_step_end_metric' in seen_keys
+    assert 'train_epoch_end_metric' in seen_keys
+
 
 
 test_full_train_loop_with_results_obj_dp('')
\ No newline at end of file

From 59428214efd7e4b162b55a4d9bb6fc10dc816998 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 08:31:24 -0400
Subject: [PATCH 120/168] finished tests for structured results on train epoch

---
 tests/trainer/test_trainer_steps_result_return.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index f974eb8ed57ff..762a9c33ea536 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -520,7 +520,3 @@ def test_full_train_loop_with_results_obj_dp(tmpdir):
     assert 'train_step_metric' in seen_keys
     assert 'train_step_end_metric' in seen_keys
     assert 'train_epoch_end_metric' in seen_keys
-
-
-
-test_full_train_loop_with_results_obj_dp('')
\ No newline at end of file

From 2692014fc43bb8d53af02d6492f09f85b832a653 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 08:44:06 -0400
Subject: [PATCH 121/168] finished tests for structured results on train epoch

---
 tests/base/deterministic_model.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index a7c5b210d55cd..88651d8ace301 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -333,7 +333,6 @@ def backward(self, trainer, loss, optimizer, optimizer_idx):
             else:
                 assert loss == 171.0
 
-        import pdb; pdb.set_trace()
         loss.backward()
 
 

From 042bcb6cb3d69da1c28fe6faab080baf5610ffb8 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 08:46:00 -0400
Subject: [PATCH 122/168] finished tests for structured results on train epoch

---
 pytorch_lightning/callbacks/early_stopping.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pytorch_lightning/callbacks/early_stopping.py b/pytorch_lightning/callbacks/early_stopping.py
index 7035159e990ff..308408642d159 100644
--- a/pytorch_lightning/callbacks/early_stopping.py
+++ b/pytorch_lightning/callbacks/early_stopping.py
@@ -144,12 +144,14 @@ def on_validation_end(self, trainer, pl_module):
     def on_train_epoch_end(self, trainer, pl_module):
         # early stopping can also work in the train loop when there is no val loop and when using structured results
         should_check_early_stop = False
-        if 'early_stop_on' in trainer.callback_metrics and trainer.callback_metrics['early_stop_on'] is not None:
+        train_es_key = 'early_stop_on'
+        if train_es_key in trainer.callback_metrics and trainer.callback_metrics[train_es_key] is not None:
             self.monitor = 'early_stop_on'
             should_check_early_stop = True
 
-        if 'val_early_stop_on' in trainer.callback_metrics and trainer.callback_metrics['val_early_stop_on'] is not None:
-            self.monitor = 'val_early_stop_on'
+        val_es_key = 'val_early_stop_on'
+        if val_es_key in trainer.callback_metrics and trainer.callback_metrics[val_es_key] is not None:
+            self.monitor = val_es_key
             should_check_early_stop = True
 
         if should_check_early_stop:

From 8a449e654c70a1c58581c7a389388bcdfe33cc40 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 08:47:18 -0400
Subject: [PATCH 123/168] finished tests for structured results on train epoch

---
 pytorch_lightning/core/step_result.py    |  2 +-
 pytorch_lightning/utilities/debugging.py |  4 ++--
 tests/base/deterministic_model.py        | 27 ++++++++++++++++--------
 3 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
index f3ca6b970cd22..b3a76ebfcc412 100644
--- a/pytorch_lightning/core/step_result.py
+++ b/pytorch_lightning/core/step_result.py
@@ -324,4 +324,4 @@ def log(
     result.hiddens = torch.tensor(1)
     result.log('some', 123)
     print(result)
-    result.minimize = torch.tensor(1)
\ No newline at end of file
+    result.minimize = torch.tensor(1)
diff --git a/pytorch_lightning/utilities/debugging.py b/pytorch_lightning/utilities/debugging.py
index 47f98ac4685e8..490356938fb6d 100644
--- a/pytorch_lightning/utilities/debugging.py
+++ b/pytorch_lightning/utilities/debugging.py
@@ -4,7 +4,7 @@
 class InternalDebugger(object):
 
     def __init__(self, trainer):
-        
+
         self.enabled = 'PL_DEV_DEBUG' in os.environ
         self.trainer = trainer
         self.logged_metrics = []
@@ -51,4 +51,4 @@ def track_checkpointing_history(self, filepath):
                 'rank': self.trainer.global_rank,
                 'filepath': filepath
             }
-            self.checkpoint_callback_history.append(debug_dict)
\ No newline at end of file
+            self.checkpoint_callback_history.append(debug_dict)
diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index 88651d8ace301..dc4123b978921 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -152,9 +152,12 @@ def training_step_result_log_epoch_only(self, batch, batch_idx):
         acc = self.step(batch, batch_idx)
         result = TrainResult(minimize=acc)
 
-        result.log(f'epoch_log_and_pbar_acc1_e{self.current_epoch}', torch.tensor(14).type_as(acc), on_epoch=True, prog_bar=True, on_step=False)
-        result.log(f'epoch_log_acc2_e{self.current_epoch}', torch.tensor(15).type_as(acc), on_epoch=True, on_step=False)
-        result.log(f'epoch_pbar_acc3_e{self.current_epoch}', torch.tensor(16).type_as(acc), on_epoch=True, logger=False, prog_bar=True, on_step=False)
+        result.log(f'epoch_log_and_pbar_acc1_e{self.current_epoch}', torch.tensor(14).type_as(acc),
+                   on_epoch=True, prog_bar=True, on_step=False)
+        result.log(f'epoch_log_acc2_e{self.current_epoch}', torch.tensor(15).type_as(acc),
+                   on_epoch=True, on_step=False)
+        result.log(f'epoch_pbar_acc3_e{self.current_epoch}', torch.tensor(16).type_as(acc),
+                   on_epoch=True, logger=False, prog_bar=True, on_step=False)
 
         self.training_step_called = True
         return result
@@ -166,9 +169,12 @@ def training_step_result_log_epoch_and_step(self, batch, batch_idx):
         val_1 = (5 + batch_idx) * (self.current_epoch + 1)
         val_2 = (6 + batch_idx) * (self.current_epoch + 1)
         val_3 = (7 + batch_idx) * (self.current_epoch + 1)
-        result.log(f'step_epoch_log_and_pbar_acc1', torch.tensor(val_1).type_as(acc), on_epoch=True, prog_bar=True)
-        result.log(f'step_epoch_log_acc2', torch.tensor(val_2).type_as(acc), on_epoch=True)
-        result.log(f'step_epoch_pbar_acc3', torch.tensor(val_3).type_as(acc), on_epoch=True, logger=False, prog_bar=True)
+        result.log(f'step_epoch_log_and_pbar_acc1', torch.tensor(val_1).type_as(acc),
+                   on_epoch=True, prog_bar=True)
+        result.log(f'step_epoch_log_acc2', torch.tensor(val_2).type_as(acc),
+                   on_epoch=True)
+        result.log(f'step_epoch_pbar_acc3', torch.tensor(val_3).type_as(acc),
+                   on_epoch=True, logger=False, prog_bar=True)
 
         self.training_step_called = True
         return result
@@ -188,9 +194,12 @@ def training_epoch_end_return_for_log_epoch_and_step(self, result):
         result.step_epoch_log_and_pbar_acc1 = result.step_epoch_log_and_pbar_acc1.prod()
         result.step_epoch_log_acc2 = result.step_epoch_log_acc2.prod()
         result.step_epoch_pbar_acc3 = result.step_epoch_pbar_acc3.prod()
-        result.log('epoch_end_log_acc', torch.tensor(1212).type_as(result.step_epoch_log_acc2), logger=True, on_epoch=True)
-        result.log('epoch_end_pbar_acc', torch.tensor(1213).type_as(result.step_epoch_log_acc2), logger=False, prog_bar=True, on_epoch=True)
-        result.log('epoch_end_log_pbar_acc', torch.tensor(1214).type_as(result.step_epoch_log_acc2), logger=True, prog_bar=True, on_epoch=True)
+        result.log('epoch_end_log_acc', torch.tensor(1212).type_as(result.step_epoch_log_acc2),
+                   logger=True, on_epoch=True)
+        result.log('epoch_end_pbar_acc', torch.tensor(1213).type_as(result.step_epoch_log_acc2),
+                   logger=False, prog_bar=True, on_epoch=True)
+        result.log('epoch_end_log_pbar_acc', torch.tensor(1214).type_as(result.step_epoch_log_acc2),
+                   logger=True, prog_bar=True, on_epoch=True)
         return result
 
     # --------------------------

From e7d158569ce31ecea61ace607803b0cf195b5e92 Mon Sep 17 00:00:00 2001
From: Jirka <jirka@pytorchlightning.ai>
Date: Mon, 20 Jul 2020 15:18:17 +0200
Subject: [PATCH 124/168] cache

---
 .github/workflows/ci-testing.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml
index dea7396125c7e..7c3095fc281a1 100644
--- a/.github/workflows/ci-testing.yml
+++ b/.github/workflows/ci-testing.yml
@@ -82,9 +82,9 @@ jobs:
       uses: actions/cache@v1
       with:
         path: ${{ steps.pip-cache.outputs.dir }}
-        key: ${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}-pip-${{ hashFiles('requirements/base.txt') }}-${{ hashFiles('requirements/extra.txt') }}
+        key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ matrix.requires }}-pip-${{ hashFiles('requirements/base.txt') }}-${{ hashFiles('requirements/extra.txt') }}
         restore-keys: |
-          ${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}-pip-
+          ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ matrix.requires }}-pip-
 
     - name: Install dependencies
       run: |

From 1d34947d0a179ed901dabdc6e2e6d88fa314e5d8 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 09:22:35 -0400
Subject: [PATCH 125/168] finished tests for structured results on train epoch

---
 tests/callbacks/test_model_checkpoint.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/callbacks/test_model_checkpoint.py b/tests/callbacks/test_model_checkpoint.py
index 1091a4cf3a8dd..da3c908a1e11b 100644
--- a/tests/callbacks/test_model_checkpoint.py
+++ b/tests/callbacks/test_model_checkpoint.py
@@ -78,11 +78,11 @@ def __init__(self, expected_count, *args, **kwargs):
         self.count = 0
         self.expected_count = expected_count
 
-    def _save_model(self, filepath):
+    def _save_model(self, filepath, trainer, pl_module):
         # make sure we don't save twice
         assert not os.path.isfile(filepath)
         self.count += 1
-        super()._save_model(filepath)
+        super()._save_model(filepath, trainer, pl_module)
 
     def on_train_end(self, trainer, pl_module):
         super().on_train_end(trainer, pl_module)
@@ -107,3 +107,6 @@ def test_model_checkpoint_no_extraneous_invocations(tmpdir):
     )
     result = trainer.fit(model)
     assert 1 == result
+
+if __name__ == '__main__':
+    test_model_checkpoint_no_extraneous_invocations('')
\ No newline at end of file

From bfde914558ccfdce85c57dc1cd711b9bb8f2d7de Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 09:22:50 -0400
Subject: [PATCH 126/168] finished tests for structured results on train epoch

---
 tests/callbacks/test_model_checkpoint.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/callbacks/test_model_checkpoint.py b/tests/callbacks/test_model_checkpoint.py
index da3c908a1e11b..7257dc3874a2a 100644
--- a/tests/callbacks/test_model_checkpoint.py
+++ b/tests/callbacks/test_model_checkpoint.py
@@ -107,6 +107,3 @@ def test_model_checkpoint_no_extraneous_invocations(tmpdir):
     )
     result = trainer.fit(model)
     assert 1 == result
-
-if __name__ == '__main__':
-    test_model_checkpoint_no_extraneous_invocations('')
\ No newline at end of file

From 2e7b68dd572533b2368076aec0f7308b07cffde9 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 09:45:25 -0400
Subject: [PATCH 127/168] finished tests for structured results on train epoch

---
 pytorch_lightning/trainer/training_loop.py |  5 ++-
 tests/models/test_grad_norm.py             | 44 ++++------------------
 2 files changed, 10 insertions(+), 39 deletions(-)

diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py
index bca2ec8dbb943..5ee9803f390cd 100644
--- a/pytorch_lightning/trainer/training_loop.py
+++ b/pytorch_lightning/trainer/training_loop.py
@@ -635,8 +635,9 @@ def save_train_loop_metrics_to_loggers(self, batch_idx, batch_output):
         if should_log_metrics or self.fast_dev_run:
             # logs user requested information to logger
             metrics = batch_output.batch_log_metrics
-            if len(metrics) > 0:
-                self.log_metrics(metrics, batch_output.grad_norm_dic)
+            grad_norm_dic = batch_output.grad_norm_dic
+            if len(metrics) > 0 or len(grad_norm_dic) > 0:
+                self.log_metrics(metrics, grad_norm_dic)
 
     def save_loggers_in_training_loop(self, batch_idx):
         # when loggers should save to disk
diff --git a/tests/models/test_grad_norm.py b/tests/models/test_grad_norm.py
index ff627c5088987..d7978965a3cfe 100644
--- a/tests/models/test_grad_norm.py
+++ b/tests/models/test_grad_norm.py
@@ -1,43 +1,12 @@
 import numpy as np
 import pytest
+import os
 
 from pytorch_lightning import Trainer
-from pytorch_lightning.loggers import LightningLoggerBase
-from pytorch_lightning.utilities import rank_zero_only
 from tests.base import EvalModelTemplate
 from tests.base.develop_utils import reset_seed
 
 
-class OnlyMetricsListLogger(LightningLoggerBase):
-    def __init__(self):
-        super().__init__()
-        self.metrics = []
-
-    @rank_zero_only
-    def log_metrics(self, metrics, step):
-        self.metrics.append(metrics)
-
-    @property
-    def experiment(self):
-        return 'test'
-
-    @rank_zero_only
-    def log_hyperparams(self, params):
-        pass
-
-    @rank_zero_only
-    def finalize(self, status):
-        pass
-
-    @property
-    def name(self):
-        return 'name'
-
-    @property
-    def version(self):
-        return '1'
-
-
 class ModelWithManualGradTracker(EvalModelTemplate):
     def __init__(self, norm_type, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -75,28 +44,29 @@ def on_after_backward(self):
 
 @pytest.mark.parametrize("norm_type", [1., 1.25, 1.5, 2, 3, 5, 10, 'inf'])
 def test_grad_tracking(tmpdir, norm_type, rtol=5e-3):
-    # rtol=5e-3 respects the 3 decmials rounding in `.grad_norms` and above
+    os.environ['PL_DEV_DEBUG'] = '1'
+
+    # rtol=5e-3 respects the 3 decimals rounding in `.grad_norms` and above
 
     reset_seed()
 
     # use a custom grad tracking module and a list logger
     model = ModelWithManualGradTracker(norm_type)
-    logger = OnlyMetricsListLogger()
 
     trainer = Trainer(
         default_root_dir=tmpdir,
         max_epochs=3,
-        logger=logger,
         track_grad_norm=norm_type,
         row_log_interval=1,  # request grad_norms every batch
     )
     result = trainer.fit(model)
 
     assert result == 1, "Training failed"
-    assert len(logger.metrics) == len(model.stored_grad_norms)
+    logged_metrics = trainer.dev_debugger.logged_metrics
+    assert len(logged_metrics) == len(model.stored_grad_norms)
 
     # compare the logged metrics against tracked norms on `.backward`
-    for mod, log in zip(model.stored_grad_norms, logger.metrics):
+    for mod, log in zip(model.stored_grad_norms, logged_metrics):
         common = mod.keys() & log.keys()
 
         log, mod = [log[k] for k in common], [mod[k] for k in common]

From 71712d89313afef80946e1e2e156a7bf3cec20b5 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 09:57:27 -0400
Subject: [PATCH 128/168] finished tests for structured results on train epoch

---
 tests/trainer/test_trainer_steps_result_return.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 762a9c33ea536..a0ddeacdb9c99 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -51,7 +51,7 @@ def test_training_step_result_log_step_only(tmpdir):
         assert logged_metrics[f'step_log_and_pbar_acc1_b{batch_idx}'] == 11.0
         assert logged_metrics[f'step_log_acc2_b{batch_idx}'] == 12.0
         assert f'step_pbar_acc3_b{batch_idx}' not in logged_metrics
-        assert len(logged_metrics) == 3
+        assert len(logged_metrics) == 4
 
     # make sure we are using the correct metrics for callbacks
     assert trainer.callback_metrics['checkpoint_on'] == 171
@@ -124,7 +124,7 @@ def test_training_step_result_log_epoch_only(tmpdir):
         assert logged_metrics[f'epoch_log_and_pbar_acc1_e{batch_idx}'] == 14.0
         assert logged_metrics[f'epoch_log_acc2_e{batch_idx}'] == 15.0
         assert f'epoch_pbar_acc3_e{batch_idx}' not in logged_metrics
-        assert len(logged_metrics) == 3
+        assert len(logged_metrics) == 4
 
     # make sure we are using the correct metrics for callbacks
     assert trainer.callback_metrics['checkpoint_on'] == 171
@@ -211,7 +211,7 @@ def test_training_step_result_log_step_and_epoch(tmpdir):
             assert logged_metrics['step_epoch_log_and_pbar_acc1'] == expected_val_1
             assert logged_metrics['step_epoch_log_acc2'] == expected_val_2
             assert 'step_epoch_pbar_acc3' not in logged_metrics
-            assert len(logged_metrics) == 3
+            assert len(logged_metrics) == 4
 
         # make sure the metrics for the epoch end are actual means (the default reduce fx) or all the batches
         epoch_end_metrics = epoch_outputs[-1]
@@ -220,7 +220,7 @@ def test_training_step_result_log_step_and_epoch(tmpdir):
         assert epoch_end_metrics['step_epoch_log_and_pbar_acc1'] == eval_1
         assert epoch_end_metrics['step_epoch_log_acc2'] == eval_2
         assert 'step_epoch_pbar_acc3' not in epoch_end_metrics
-        assert len(logged_metrics) == 3
+        assert len(logged_metrics) == 4
 
     # make sure we are using the correct metrics for callbacks
     assert trainer.callback_metrics['checkpoint_on'] == 171

From d93845e14bbfef607acc399dbe7428737310c9e1 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 10:14:23 -0400
Subject: [PATCH 129/168] finished tests for structured results on train epoch

---
 pytorch_lightning/trainer/training_loop.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py
index 5ee9803f390cd..0caf9f22b5108 100644
--- a/pytorch_lightning/trainer/training_loop.py
+++ b/pytorch_lightning/trainer/training_loop.py
@@ -472,10 +472,10 @@ def run_training_epoch(self):
             # otherwise we will build up unnecessary memory
             step_out = batch_output.training_step_output_for_epoch_end
             should_auto_reduce_train_result = isinstance(step_out, Result) and step_out.should_reduce_on_epoch_end
-            if 'early_stop_on' in step_out:
+            if isinstance(step_out, dict) and 'early_stop_on' in step_out:
                 early_stopping_accumulator.accumulate(step_out['early_stop_on'])
 
-            if 'checkpoint_on' in step_out:
+            if isinstance(step_out, dict) and 'checkpoint_on' in step_out:
                 checkpoint_accumulator.accumulate(step_out['checkpoint_on'])
 
             if self.is_overridden('training_epoch_end', model=self.get_model()) or should_auto_reduce_train_result:

From 1ec899227f886b8dd9e1442d96e41cdd7f1d2961 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 10:14:56 -0400
Subject: [PATCH 130/168] Update pytorch_lightning/callbacks/early_stopping.py

Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
---
 pytorch_lightning/callbacks/early_stopping.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/callbacks/early_stopping.py b/pytorch_lightning/callbacks/early_stopping.py
index 308408642d159..39d3615186303 100644
--- a/pytorch_lightning/callbacks/early_stopping.py
+++ b/pytorch_lightning/callbacks/early_stopping.py
@@ -150,7 +150,7 @@ def on_train_epoch_end(self, trainer, pl_module):
             should_check_early_stop = True
 
         val_es_key = 'val_early_stop_on'
-        if val_es_key in trainer.callback_metrics and trainer.callback_metrics[val_es_key] is not None:
+        if trainer.callback_metrics.get(val_es_key, None) is not None:
             self.monitor = val_es_key
             should_check_early_stop = True
 

From e272a59d452c11ca5af8b3ef8d81499bf1cc2011 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 10:15:21 -0400
Subject: [PATCH 131/168] Update pytorch_lightning/callbacks/early_stopping.py

Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
---
 pytorch_lightning/callbacks/early_stopping.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/callbacks/early_stopping.py b/pytorch_lightning/callbacks/early_stopping.py
index 39d3615186303..4f7bb572e6bf5 100644
--- a/pytorch_lightning/callbacks/early_stopping.py
+++ b/pytorch_lightning/callbacks/early_stopping.py
@@ -146,7 +146,7 @@ def on_train_epoch_end(self, trainer, pl_module):
         should_check_early_stop = False
         train_es_key = 'early_stop_on'
         if train_es_key in trainer.callback_metrics and trainer.callback_metrics[train_es_key] is not None:
-            self.monitor = 'early_stop_on'
+            self.monitor = train_es_key
             should_check_early_stop = True
 
         val_es_key = 'val_early_stop_on'

From 6c8f2e5a7916b5196fed6cbbdeb0e9e2f3dd5394 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 10:16:07 -0400
Subject: [PATCH 132/168] Update pytorch_lightning/callbacks/early_stopping.py

Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
---
 pytorch_lightning/callbacks/early_stopping.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/callbacks/early_stopping.py b/pytorch_lightning/callbacks/early_stopping.py
index 4f7bb572e6bf5..4e22cba977198 100644
--- a/pytorch_lightning/callbacks/early_stopping.py
+++ b/pytorch_lightning/callbacks/early_stopping.py
@@ -145,7 +145,7 @@ def on_train_epoch_end(self, trainer, pl_module):
         # early stopping can also work in the train loop when there is no val loop and when using structured results
         should_check_early_stop = False
         train_es_key = 'early_stop_on'
-        if train_es_key in trainer.callback_metrics and trainer.callback_metrics[train_es_key] is not None:
+        if trainer.callback_metrics.get(train_es_key, None) is not None:
             self.monitor = train_es_key
             should_check_early_stop = True
 

From 4ce032fba98e9840153de6fb7bcca4d31f2dc78a Mon Sep 17 00:00:00 2001
From: Jirka Borovec <Borda@users.noreply.github.com>
Date: Mon, 20 Jul 2020 16:32:54 +0200
Subject: [PATCH 133/168] Update
 pytorch_lightning/callbacks/model_checkpoint.py

---
 pytorch_lightning/callbacks/model_checkpoint.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/callbacks/model_checkpoint.py b/pytorch_lightning/callbacks/model_checkpoint.py
index eb81e879c7d56..370a30b75dc4d 100644
--- a/pytorch_lightning/callbacks/model_checkpoint.py
+++ b/pytorch_lightning/callbacks/model_checkpoint.py
@@ -276,7 +276,7 @@ def on_validation_end(self, trainer, pl_module):
         epoch = trainer.current_epoch
 
         # support structured results
-        if 'checkpoint_on' in metrics and metrics['checkpoint_on'] is not None:
+        if metrics.get('checkpoint_on') is not None:
             self.monitor = 'checkpoint_on'
 
         if self.save_top_k == 0:

From b7ea0ccaac3d8a13441fe170471cd36e77d03154 Mon Sep 17 00:00:00 2001
From: Jirka Borovec <Borda@users.noreply.github.com>
Date: Mon, 20 Jul 2020 16:33:49 +0200
Subject: [PATCH 134/168] Update pytorch_lightning/core/step_result.py

---
 pytorch_lightning/core/step_result.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
index b3a76ebfcc412..f5b22a5585574 100644
--- a/pytorch_lightning/core/step_result.py
+++ b/pytorch_lightning/core/step_result.py
@@ -9,7 +9,7 @@ class Result(Dict):
     def __init__(
             self,
             minimize: Optional[Tensor] = None,
-            early_stop_on: Tensor = None,
+            early_stop_on: Optional[Tensor] = None,
             checkpoint_on: Union[Tensor, bool] = None,
             hiddens: Optional[Tensor] = None
     ):

From 3e7af00506cda4c10376a917676d57e1a40f0ebb Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 10:33:53 -0400
Subject: [PATCH 135/168] finished tests for structured results on train epoch

---
 pytorch_lightning/overrides/data_parallel.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index e4e29517b41d1..229d9133b1efd 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -188,6 +188,10 @@ def _worker(i, module, input, kwargs, device=None):
                 if not isinstance(input, (list, tuple)):
                     input = (input,)
 
+                if hasattr(module, '_device'):
+                    module._device = device
+                module = module.to(device)
+
                 # ---------------
                 # CHANGE
                 if module.training:

From b4ad5c2d5c2a23f4d9ab381b4592f336eb74eda0 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 10:49:19 -0400
Subject: [PATCH 136/168] finished tests for structured results on train epoch

---
 tests/base/model_train_steps.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/base/model_train_steps.py b/tests/base/model_train_steps.py
index 3ed06a0427444..5a30c7f2874b2 100644
--- a/tests/base/model_train_steps.py
+++ b/tests/base/model_train_steps.py
@@ -46,7 +46,7 @@ def training_step_full_loop_result_obj_dp(self, batch, batch_idx):
         """
         x, y = batch
         x = x.view(x.size(0), -1)
-        y_hat = self(x)
+        y_hat = self.forward(x)
         loss_val = y_hat.sum()
         result = TrainResult(minimize=loss_val)
         result.log('train_step_metric', loss_val + 1)

From a2c2401c0118b74afce0c1793415032fc27c5229 Mon Sep 17 00:00:00 2001
From: Jirka Borovec <Borda@users.noreply.github.com>
Date: Mon, 20 Jul 2020 17:27:55 +0200
Subject: [PATCH 137/168] Apply suggestions from code review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com>
Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com>
---
 pytorch_lightning/core/hooks.py       |  2 +-
 pytorch_lightning/core/step_result.py | 20 ++++++++++----------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/pytorch_lightning/core/hooks.py b/pytorch_lightning/core/hooks.py
index 60e93aa275d93..f2762e39cced0 100644
--- a/pytorch_lightning/core/hooks.py
+++ b/pytorch_lightning/core/hooks.py
@@ -129,7 +129,7 @@ def on_train_epoch_end(self) -> None:
 
     def on_val_epoch_start(self) -> None:
         """
-        Called in the training loop at the very beginning of the epoch.
+        Called in the validation loop at the very beginning of the epoch.
         """
         # do something when the epoch starts
 
diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
index f5b22a5585574..6cc044ef1c220 100644
--- a/pytorch_lightning/core/step_result.py
+++ b/pytorch_lightning/core/step_result.py
@@ -10,7 +10,7 @@ def __init__(
             self,
             minimize: Optional[Tensor] = None,
             early_stop_on: Optional[Tensor] = None,
-            checkpoint_on: Union[Tensor, bool] = None,
+            checkpoint_on: Union[Tensor, bool, None] = None,
             hiddens: Optional[Tensor] = None
     ):
 
@@ -36,7 +36,7 @@ def __init__(
             }
         }
 
-    def __getattr__(self, key):
+    def __getattr__(self, key: str) -> Any:
         try:
             if key == 'callback_metrics':
                 return self.get_callback_metrics()
@@ -53,7 +53,7 @@ def __getattr__(self, key):
         except KeyError:
             return None
 
-    def __setattr__(self, key, val):
+    def __setattr__(self, key: str, val: Union[Tensor, Any]):
         # ensure reserve keys are tensors and detached
         if key in {'hiddens', 'checkpoint_on', 'early_stop_on'}:
             self._assert_tensor_metric(key, val)
@@ -119,7 +119,7 @@ def __set_meta(self, name, value, prog_bar, logger, on_step, on_epoch, reduce_fx
         internal = self['meta']['_internal']
         internal['_reduce_on_epoch'] = max(internal['_reduce_on_epoch'], on_epoch)
 
-    def get_callback_metrics(self):
+    def get_callback_metrics(self) -> dict:
         result = {
             'early_stop_on': self.early_stop_on,
             'checkpoint_on': self.checkpoint_on
@@ -127,7 +127,7 @@ def get_callback_metrics(self):
 
         return result
 
-    def get_batch_log_metrics(self):
+    def get_batch_log_metrics(self) -> dict:
         """
         Gets the metrics to log at the end of the batch step
         """
@@ -141,7 +141,7 @@ def get_batch_log_metrics(self):
                 result[k] = self[k]
         return result
 
-    def get_epoch_log_metrics(self):
+    def get_epoch_log_metrics(self) -> dict:
         """
         Gets the metrics to log at the end of the batch step
         """
@@ -236,7 +236,7 @@ def reduce_on_epoch_end(cls, outputs):
         return result
 
     @property
-    def should_reduce_on_epoch_end(self):
+    def should_reduce_on_epoch_end(self) -> bool:
         return self['meta']['_internal']['_reduce_on_epoch']
 
 
@@ -257,7 +257,7 @@ def recursive_gather(outputs, result=None):
     return result
 
 
-def recursive_stack(result):
+def recursive_stack(result: MutableMapping):
     for k, v in result.items():
         if isinstance(v, dict):
             recursive_stack(v)
@@ -297,8 +297,8 @@ class EvalResult(Result):
 
     def __init__(
             self,
-            early_stop_on: Tensor = None,
-            checkpoint_on: Tensor = None,
+            early_stop_on: Optional[Tensor] = None,
+            checkpoint_on: Optional[Tensor] = None,
             hiddens: Optional[Tensor] = None
     ):
 

From 7102cef1af2a1bad669aeed6ba02404e0c08e186 Mon Sep 17 00:00:00 2001
From: Jirka Borovec <Borda@users.noreply.github.com>
Date: Mon, 20 Jul 2020 17:55:39 +0200
Subject: [PATCH 138/168] Apply suggestions from code review

Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com>
---
 pytorch_lightning/core/step_result.py | 55 ++++++++++++++++-----------
 1 file changed, 32 insertions(+), 23 deletions(-)

diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
index 6cc044ef1c220..ac520e9c214e6 100644
--- a/pytorch_lightning/core/step_result.py
+++ b/pytorch_lightning/core/step_result.py
@@ -66,9 +66,9 @@ def __setattr__(self, key: str, val: Union[Tensor, Any]):
 
         self[key] = val
 
-    def _assert_tensor_metric(self, name, x):
-        if x is not None and not isinstance(x, bool):
-            assert isinstance(x, Tensor), f'{name} must be a torch.Tensor'
+    def _assert_tensor_metric(self, name: str, potential_metric: Union[bool, Tensor, None, Any]):
+        if potential_metric is not None and not isinstance(potential_metric, bool):
+            assert isinstance(potential_metric, Tensor), f'{name} must be a torch.Tensor'
 
     def _assert_grad_tensor_metric(self, name, x, additional_err: str = None):
         if x is not None:
@@ -83,12 +83,12 @@ def log(
             self,
             name,
             value,
-            prog_bar=False,
-            logger=True,
-            on_step=False,
-            on_epoch=True,
-            reduce_fx=torch.mean,
-            enable_graph=False,
+            prog_bar: bool = False,
+            logger: bool = True,
+            on_step: bool = False,
+            on_epoch: bool = True,
+            reduce_fx: Callable = torch.mean,
+            enable_graph: bool = False,
     ):
         # no metrics should be logged with graphs
         if not enable_graph and isinstance(value, torch.Tensor):
@@ -102,7 +102,16 @@ def log(
         # set the value
         self.__setitem__(name, value)
 
-    def __set_meta(self, name, value, prog_bar, logger, on_step, on_epoch, reduce_fx):
+    def __set_meta(
+            self,
+            name: str,
+            value,
+            prog_bar: bool,
+            logger: bool,
+            on_step: bool,
+            on_epoch: bool,
+            reduce_fx: Callable,
+        ):
         # set the meta for the item
         meta_value = value
         meta = dict(
@@ -240,7 +249,7 @@ def should_reduce_on_epoch_end(self) -> bool:
         return self['meta']['_internal']['_reduce_on_epoch']
 
 
-def recursive_gather(outputs, result=None):
+def recursive_gather(outputs: Sequence[dict], result: Optional[MutableMapping] = None) -> Optional[MutableMapping]:
     for out in outputs:
         if 'meta' in out:
             del out['meta']
@@ -283,12 +292,12 @@ def log(
             self,
             name,
             value,
-            prog_bar=False,
-            logger=True,
-            on_step=True,
-            on_epoch=False,
-            reduce_fx=torch.mean,
-            enable_graph=False,
+            prog_bar: bool = False,
+            logger: bool = True,
+            on_step: bool = True,
+            on_epoch: bool = False,
+            reduce_fx: Callable = torch.mean,
+            enable_graph: bool = False,
     ):
         super().log(name, value, prog_bar, logger, on_step, on_epoch, reduce_fx, enable_graph)
 
@@ -308,12 +317,12 @@ def log(
             self,
             name,
             value,
-            prog_bar=False,
-            logger=True,
-            on_step=False,
-            on_epoch=True,
-            reduce_fx=torch.mean,
-            enable_graph=False,
+            prog_bar: bool = False,
+            logger: bool = True,
+            on_step: bool = False,
+            on_epoch: bool = True,
+            reduce_fx: Callable = torch.mean,
+            enable_graph: bool = False,
     ):
         super().log(name, value, prog_bar, logger, on_step, on_epoch, reduce_fx, enable_graph)
 

From 12ef3b0f99e7aedb9fb802ba50022f063170d8df Mon Sep 17 00:00:00 2001
From: Jirka Borovec <Borda@users.noreply.github.com>
Date: Mon, 20 Jul 2020 18:14:57 +0200
Subject: [PATCH 139/168] Apply suggestions from code review

Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com>
---
 pytorch_lightning/core/step_result.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
index ac520e9c214e6..89586f6bb52b2 100644
--- a/pytorch_lightning/core/step_result.py
+++ b/pytorch_lightning/core/step_result.py
@@ -70,7 +70,7 @@ def _assert_tensor_metric(self, name: str, potential_metric: Union[bool, Tensor,
         if potential_metric is not None and not isinstance(potential_metric, bool):
             assert isinstance(potential_metric, Tensor), f'{name} must be a torch.Tensor'
 
-    def _assert_grad_tensor_metric(self, name, x, additional_err: str = None):
+    def _assert_grad_tensor_metric(self, name: str, x: Union[torch.Tensor, Any], additional_err: str = ''):
         if x is not None:
             assert isinstance(x, Tensor), f'{name} must be a torch.Tensor'
             m = f'{name} must have a computational graph.'
@@ -125,8 +125,7 @@ def __set_meta(
         self['meta'][name] = meta
 
         # track whether any input requires reduction on epoch end
-        internal = self['meta']['_internal']
-        internal['_reduce_on_epoch'] = max(internal['_reduce_on_epoch'], on_epoch)
+        self['meta']['_internal']['_reduce_on_epoch'] = max(internal['_reduce_on_epoch'], on_epoch)
 
     def get_callback_metrics(self) -> dict:
         result = {

From 2116a61aa6ed0d4dbbe91b9549cb1e7caf63f58b Mon Sep 17 00:00:00 2001
From: Jirka <jirka@pytorchlightning.ai>
Date: Mon, 20 Jul 2020 18:27:30 +0200
Subject: [PATCH 140/168] simple

---
 pytorch_lightning/core/step_result.py | 47 +++++++++------------------
 1 file changed, 16 insertions(+), 31 deletions(-)

diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
index 89586f6bb52b2..e58f9d0c9b137 100644
--- a/pytorch_lightning/core/step_result.py
+++ b/pytorch_lightning/core/step_result.py
@@ -1,9 +1,11 @@
-from typing import Optional, Dict, Union
+from typing import Optional, Dict, Union, Sequence, Callable, MutableMapping, Any
 from torch import Tensor
 import torch
 from copy import copy
 
 
+
+
 class Result(Dict):
 
     def __init__(
@@ -125,7 +127,8 @@ def __set_meta(
         self['meta'][name] = meta
 
         # track whether any input requires reduction on epoch end
-        self['meta']['_internal']['_reduce_on_epoch'] = max(internal['_reduce_on_epoch'], on_epoch)
+        _internal = self['meta']['_internal']
+        _internal['_reduce_on_epoch'] = max(_internal['_reduce_on_epoch'], on_epoch)
 
     def get_callback_metrics(self) -> dict:
         result = {
@@ -135,7 +138,7 @@ def get_callback_metrics(self) -> dict:
 
         return result
 
-    def get_batch_log_metrics(self) -> dict:
+    def _get_metrics(self, opt_names: Sequence[str]) -> dict:
         """
         Gets the metrics to log at the end of the batch step
         """
@@ -145,51 +148,33 @@ def get_batch_log_metrics(self) -> dict:
         for k, options in meta.items():
             if k == '_internal':
                 continue
-            if options['logger'] and options['on_step']:
+            if all(options[n] for n in opt_names):
                 result[k] = self[k]
         return result
 
-    def get_epoch_log_metrics(self) -> dict:
+    def get_batch_log_metrics(self) -> dict:
         """
         Gets the metrics to log at the end of the batch step
         """
-        result = {}
+        return self._get_metrics(self, opt_names=['logger', 'on_step'])
 
-        meta = self['meta']
-        for k, options in meta.items():
-            if k == '_internal':
-                continue
-            if options['logger'] and options['on_epoch']:
-                result[k] = self[k]
-        return result
+    def get_epoch_log_metrics(self) -> dict:
+        """
+        Gets the metrics to log at the end of the batch step
+        """
+        return self._get_metrics(self, opt_names=['logger', 'on_epoch'])
 
     def get_epoch_pbar_metrics(self):
         """
         Gets the metrics to log at the end of the batch step
         """
-        result = {}
-
-        meta = self['meta']
-        for k, options in meta.items():
-            if k == '_internal':
-                continue
-            if options['prog_bar'] and options['on_epoch']:
-                result[k] = self[k]
-        return result
+        return self._get_metrics(self, opt_names=['prog_bar', 'on_epoch'])
 
     def get_batch_pbar_metrics(self):
         """
         Gets the metrics to log at the end of the batch step
         """
-        result = {}
-
-        meta = self['meta']
-        for k, options in meta.items():
-            if k == '_internal':
-                continue
-            if options['prog_bar'] and options['on_step']:
-                result[k] = self[k]
-        return result
+        return self._get_metrics(self, opt_names=['prog_bar', 'on_epoch'])
 
     def detach(self):
         for k, v in self.items():

From fd5445d1765ce09bf81f064729f645a115f69ebe Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 12:34:16 -0400
Subject: [PATCH 141/168] finished tests for structured results on train epoch

---
 tests/trainer/test_trainer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py
index 0c4212b66f390..e45040f8ceaaf 100644
--- a/tests/trainer/test_trainer.py
+++ b/tests/trainer/test_trainer.py
@@ -589,7 +589,7 @@ def test_test_checkpoint_path(tmpdir, ckpt_path, save_top_k):
             with pytest.raises(FileNotFoundError):
                 trainer.test(ckpt_path='random.ckpt')
         else:
-            ckpt_path = str(list((Path(tmpdir) / 'lightning_logs/version_0/checkpoints').iterdir())[0].absolute())
+            ckpt_path = str(list((Path(tmpdir) / f'lightning_logs/version_{trainer.logger.version}/checkpoints').iterdir())[0].absolute())
             trainer.test(ckpt_path=ckpt_path)
             assert trainer.tested_ckpt_path == ckpt_path
 

From 6a63fe0a21e2e706590f3799a26e7f522fd7ff63 Mon Sep 17 00:00:00 2001
From: Jirka <jirka@pytorchlightning.ai>
Date: Mon, 20 Jul 2020 19:10:08 +0200
Subject: [PATCH 142/168] simple

---
 pytorch_lightning/core/step_result.py | 38 +++++++++++++--------------
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
index e58f9d0c9b137..139cefb7db7d5 100644
--- a/pytorch_lightning/core/step_result.py
+++ b/pytorch_lightning/core/step_result.py
@@ -4,8 +4,6 @@
 from copy import copy
 
 
-
-
 class Result(Dict):
 
     def __init__(
@@ -13,7 +11,7 @@ def __init__(
             minimize: Optional[Tensor] = None,
             early_stop_on: Optional[Tensor] = None,
             checkpoint_on: Union[Tensor, bool, None] = None,
-            hiddens: Optional[Tensor] = None
+            hiddens: Optional[Tensor] = None,
     ):
 
         super().__init__()
@@ -83,8 +81,8 @@ def _assert_grad_tensor_metric(self, name: str, x: Union[torch.Tensor, Any], add
 
     def log(
             self,
-            name,
-            value,
+            name: str,
+            value: Any,
             prog_bar: bool = False,
             logger: bool = True,
             on_step: bool = False,
@@ -107,7 +105,7 @@ def log(
     def __set_meta(
             self,
             name: str,
-            value,
+            value: Any,
             prog_bar: bool,
             logger: bool,
             on_step: bool,
@@ -138,7 +136,7 @@ def get_callback_metrics(self) -> dict:
 
         return result
 
-    def _get_metrics(self, opt_names: Sequence[str]) -> dict:
+    def __get_meta_metrics(self, opt_names: Sequence[str]) -> dict:
         """
         Gets the metrics to log at the end of the batch step
         """
@@ -156,25 +154,25 @@ def get_batch_log_metrics(self) -> dict:
         """
         Gets the metrics to log at the end of the batch step
         """
-        return self._get_metrics(self, opt_names=['logger', 'on_step'])
+        return self.__get_meta_metrics(self, opt_names=['logger', 'on_step'])
 
     def get_epoch_log_metrics(self) -> dict:
         """
         Gets the metrics to log at the end of the batch step
         """
-        return self._get_metrics(self, opt_names=['logger', 'on_epoch'])
+        return self.__get_meta_metrics(self, opt_names=['logger', 'on_epoch'])
 
     def get_epoch_pbar_metrics(self):
         """
         Gets the metrics to log at the end of the batch step
         """
-        return self._get_metrics(self, opt_names=['prog_bar', 'on_epoch'])
+        return self.__get_meta_metrics(self, opt_names=['prog_bar', 'on_epoch'])
 
     def get_batch_pbar_metrics(self):
         """
         Gets the metrics to log at the end of the batch step
         """
-        return self._get_metrics(self, opt_names=['prog_bar', 'on_epoch'])
+        return self.__get_meta_metrics(self, opt_names=['prog_bar', 'on_epoch'])
 
     def detach(self):
         for k, v in self.items():
@@ -267,7 +265,7 @@ def __init__(
             minimize: Optional[Tensor] = None,
             early_stop_on: Tensor = None,
             checkpoint_on: Union[Tensor, bool] = None,
-            hiddens: Optional[Tensor] = None
+            hiddens: Optional[Tensor] = None,
     ):
 
         super().__init__(minimize, early_stop_on, checkpoint_on, hiddens)
@@ -292,7 +290,7 @@ def __init__(
             self,
             early_stop_on: Optional[Tensor] = None,
             checkpoint_on: Optional[Tensor] = None,
-            hiddens: Optional[Tensor] = None
+            hiddens: Optional[Tensor] = None,
     ):
 
         super().__init__(None, early_stop_on, checkpoint_on, hiddens)
@@ -311,10 +309,10 @@ def log(
         super().log(name, value, prog_bar, logger, on_step, on_epoch, reduce_fx, enable_graph)
 
 
-if __name__ == '__main__':
-    import torch
-    result = TrainResult()
-    result.hiddens = torch.tensor(1)
-    result.log('some', 123)
-    print(result)
-    result.minimize = torch.tensor(1)
+# if __name__ == '__main__':
+#     import torch
+#     result = TrainResult()
+#     result.hiddens = torch.tensor(1)
+#     result.log('some', 123)
+#     print(result)
+#     result.minimize = torch.tensor(1)

From d650daf92c7e6839c9e1430152f5f63352f8c948 Mon Sep 17 00:00:00 2001
From: Jirka <jirka@pytorchlightning.ai>
Date: Mon, 20 Jul 2020 19:23:33 +0200
Subject: [PATCH 143/168] simple

---
 pytorch_lightning/utilities/parsing.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pytorch_lightning/utilities/parsing.py b/pytorch_lightning/utilities/parsing.py
index 7acaea4fd26c0..920e14bbefeb4 100644
--- a/pytorch_lightning/utilities/parsing.py
+++ b/pytorch_lightning/utilities/parsing.py
@@ -1,5 +1,6 @@
 import inspect
 from argparse import Namespace
+from typing import Dict
 
 
 def str_to_bool(val):
@@ -93,7 +94,7 @@ def collect_init_args(frame, path_args: list, inside: bool = False) -> list:
         return path_args
 
 
-class AttributeDict(dict):
+class AttributeDict(Dict):
     """Extended dictionary accesisable with dot notation.
 
     >>> ad = AttributeDict({'key1': 1, 'key2': 'abc'})

From 6abb73a43843431869d6a3fac068675596334c17 Mon Sep 17 00:00:00 2001
From: Jirka <jirka@pytorchlightning.ai>
Date: Mon, 20 Jul 2020 19:34:26 +0200
Subject: [PATCH 144/168] revert

---
 pytorch_lightning/core/step_result.py | 40 +++++++++++++++++++--------
 1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
index 139cefb7db7d5..73aeb7bb39fdd 100644
--- a/pytorch_lightning/core/step_result.py
+++ b/pytorch_lightning/core/step_result.py
@@ -136,7 +136,7 @@ def get_callback_metrics(self) -> dict:
 
         return result
 
-    def __get_meta_metrics(self, opt_names: Sequence[str]) -> dict:
+    def get_batch_log_metrics(self) -> dict:
         """
         Gets the metrics to log at the end of the batch step
         """
@@ -146,33 +146,51 @@ def __get_meta_metrics(self, opt_names: Sequence[str]) -> dict:
         for k, options in meta.items():
             if k == '_internal':
                 continue
-            if all(options[n] for n in opt_names):
+            if options['logger'] and options['on_step']:
                 result[k] = self[k]
         return result
 
-    def get_batch_log_metrics(self) -> dict:
-        """
-        Gets the metrics to log at the end of the batch step
-        """
-        return self.__get_meta_metrics(self, opt_names=['logger', 'on_step'])
-
     def get_epoch_log_metrics(self) -> dict:
         """
         Gets the metrics to log at the end of the batch step
         """
-        return self.__get_meta_metrics(self, opt_names=['logger', 'on_epoch'])
+        result = {}
+
+        meta = self['meta']
+        for k, options in meta.items():
+            if k == '_internal':
+                continue
+            if options['logger'] and options['on_epoch']:
+                result[k] = self[k]
+        return result
 
     def get_epoch_pbar_metrics(self):
         """
         Gets the metrics to log at the end of the batch step
         """
-        return self.__get_meta_metrics(self, opt_names=['prog_bar', 'on_epoch'])
+        result = {}
+
+        meta = self['meta']
+        for k, options in meta.items():
+            if k == '_internal':
+                continue
+            if options['prog_bar'] and options['on_epoch']:
+                result[k] = self[k]
+        return result
 
     def get_batch_pbar_metrics(self):
         """
         Gets the metrics to log at the end of the batch step
         """
-        return self.__get_meta_metrics(self, opt_names=['prog_bar', 'on_epoch'])
+        result = {}
+
+        meta = self['meta']
+        for k, options in meta.items():
+            if k == '_internal':
+                continue
+            if options['prog_bar'] and options['on_step']:
+                result[k] = self[k]
+        return result
 
     def detach(self):
         for k, v in self.items():

From 6333f2121991b79677abd6b4b478e729924d220f Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 13:36:05 -0400
Subject: [PATCH 145/168] finished tests for structured results on train epoch

---
 pytorch_lightning/callbacks/base.py        | 4 ++--
 pytorch_lightning/trainer/callback_hook.py | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/pytorch_lightning/callbacks/base.py b/pytorch_lightning/callbacks/base.py
index 37ef84c796ec2..7c1d05547790c 100644
--- a/pytorch_lightning/callbacks/base.py
+++ b/pytorch_lightning/callbacks/base.py
@@ -54,11 +54,11 @@ def on_train_epoch_end(self, trainer, pl_module):
         """Called when the train epoch ends."""
         pass
 
-    def on_val_epoch_start(self, trainer, pl_module):
+    def on_validation_epoch_start(self, trainer, pl_module):
         """Called when the val epoch begins."""
         pass
 
-    def on_val_epoch_end(self, trainer, pl_module):
+    def on_validation_epoch_end(self, trainer, pl_module):
         """Called when the val epoch ends."""
         pass
 
diff --git a/pytorch_lightning/trainer/callback_hook.py b/pytorch_lightning/trainer/callback_hook.py
index 6266cccc25f1e..616d500fc27b9 100644
--- a/pytorch_lightning/trainer/callback_hook.py
+++ b/pytorch_lightning/trainer/callback_hook.py
@@ -61,15 +61,15 @@ def on_train_epoch_end(self):
         for callback in self.callbacks:
             callback.on_train_epoch_end(self, self.get_model())
 
-    def on_val_epoch_start(self):
+    def on_validation_epoch_start(self):
         """Called when the epoch begins."""
         for callback in self.callbacks:
-            callback.on_val_epoch_start(self, self.get_model())
+            callback.on_validation_epoch_start(self, self.get_model())
 
-    def on_val_epoch_end(self):
+    def on_validation_epoch_end(self):
         """Called when the epoch begins."""
         for callback in self.callbacks:
-            callback.on_val_epoch_end(self, self.get_model())
+            callback.on_validation_epoch_end(self, self.get_model())
 
     def on_test_epoch_start(self):
         """Called when the epoch begins."""

From f8591b4f0dd4140f68b65b4cea73c83772e48fe2 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 13:36:58 -0400
Subject: [PATCH 146/168] finished tests for structured results on train epoch

---
 pytorch_lightning/core/hooks.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pytorch_lightning/core/hooks.py b/pytorch_lightning/core/hooks.py
index f2762e39cced0..d63698f20dadf 100644
--- a/pytorch_lightning/core/hooks.py
+++ b/pytorch_lightning/core/hooks.py
@@ -127,13 +127,13 @@ def on_train_epoch_end(self) -> None:
         """
         # do something when the epoch ends
 
-    def on_val_epoch_start(self) -> None:
+    def on_validation_epoch_start(self) -> None:
         """
         Called in the validation loop at the very beginning of the epoch.
         """
         # do something when the epoch starts
 
-    def on_val_epoch_end(self) -> None:
+    def on_validation_epoch_end(self) -> None:
         """
         Called in the training loop at the very end of the epoch.
         """

From ff088ca2f6191e264a7d34a4fd18b10f6459807c Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 13:43:32 -0400
Subject: [PATCH 147/168] Update tests/base/deterministic_model.py

Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
---
 tests/base/deterministic_model.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py
index dc4123b978921..2b892dc78e02d 100644
--- a/tests/base/deterministic_model.py
+++ b/tests/base/deterministic_model.py
@@ -3,7 +3,6 @@
 from torch import nn
 from torch.utils.data import Dataset, DataLoader
 from pytorch_lightning import TrainResult
-import pdb
 
 from pytorch_lightning.core.lightning import LightningModule
 

From 595fd4b8f3540a03c12b6c68cabe7a59ca577e2c Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 14:02:39 -0400
Subject: [PATCH 148/168] finished tests for structured results on train epoch

---
 tests/trainer/test_trainer_steps_result_return.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index a0ddeacdb9c99..434accf05aad9 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -497,7 +497,7 @@ def test_full_train_loop_with_results_obj_dp(tmpdir):
     trainer = Trainer(
         default_root_dir=tmpdir,
         distributed_backend='dp',
-        gpus=2,
+        gpus=[0, 1],
         max_epochs=epochs,
         early_stop_callback=True,
         row_log_interval=2,

From 74cd04973f6c3fd9eb7bc74f41ffe9f988fd01d7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Mon, 20 Jul 2020 23:01:30 +0200
Subject: [PATCH 149/168] docstring typos

---
 pytorch_lightning/core/hooks.py            | 6 +++---
 pytorch_lightning/trainer/callback_hook.py | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/pytorch_lightning/core/hooks.py b/pytorch_lightning/core/hooks.py
index d63698f20dadf..aa4e274298034 100644
--- a/pytorch_lightning/core/hooks.py
+++ b/pytorch_lightning/core/hooks.py
@@ -135,19 +135,19 @@ def on_validation_epoch_start(self) -> None:
 
     def on_validation_epoch_end(self) -> None:
         """
-        Called in the training loop at the very end of the epoch.
+        Called in the validation loop at the very end of the epoch.
         """
         # do something when the epoch ends
 
     def on_test_epoch_start(self) -> None:
         """
-        Called in the training loop at the very beginning of the epoch.
+        Called in the test loop at the very beginning of the epoch.
         """
         # do something when the epoch starts
 
     def on_test_epoch_end(self) -> None:
         """
-        Called in the training loop at the very end of the epoch.
+        Called in the test loop at the very end of the epoch.
         """
         # do something when the epoch ends
 
diff --git a/pytorch_lightning/trainer/callback_hook.py b/pytorch_lightning/trainer/callback_hook.py
index 616d500fc27b9..89b5e712c9190 100644
--- a/pytorch_lightning/trainer/callback_hook.py
+++ b/pytorch_lightning/trainer/callback_hook.py
@@ -57,7 +57,7 @@ def on_train_epoch_start(self):
             callback.on_train_epoch_start(self, self.get_model())
 
     def on_train_epoch_end(self):
-        """Called when the epoch begins."""
+        """Called when the epoch ends."""
         for callback in self.callbacks:
             callback.on_train_epoch_end(self, self.get_model())
 
@@ -67,7 +67,7 @@ def on_validation_epoch_start(self):
             callback.on_validation_epoch_start(self, self.get_model())
 
     def on_validation_epoch_end(self):
-        """Called when the epoch begins."""
+        """Called when the epoch ends."""
         for callback in self.callbacks:
             callback.on_validation_epoch_end(self, self.get_model())
 
@@ -77,7 +77,7 @@ def on_test_epoch_start(self):
             callback.on_test_epoch_start(self, self.get_model())
 
     def on_test_epoch_end(self):
-        """Called when the epoch begins."""
+        """Called when the epoch ends."""
         for callback in self.callbacks:
             callback.on_test_epoch_end(self, self.get_model())
 

From fe91a2b947ab171bd211e629edd7e05a9efe8066 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 17:30:56 -0400
Subject: [PATCH 150/168] finished tests for structured results on train epoch

---
 tests/trainer/test_trainer_steps_result_return.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 434accf05aad9..0978dc78d3f29 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -487,7 +487,7 @@ def test_full_train_loop_with_results_obj_dp(tmpdir):
     model.validation_step = None
     model.test_step = None
     model.training_step = model.training_step_full_loop_result_obj_dp
-    model.training_step_end = model.training_step_end_full_loop_result_obj_dp
+    # model.training_step_end = model.training_step_end_full_loop_result_obj_dp
     model.training_epoch_end = model.training_epoch_end_full_loop_result_obj_dp
     model.val_dataloader = None
     model.test_dataloader = None

From 7dfda42c4eb1beab48c88315183b5fb050b49b08 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 17:32:07 -0400
Subject: [PATCH 151/168] finished tests for structured results on train epoch

---
 tests/base/model_train_steps.py                   | 2 +-
 tests/trainer/test_trainer_steps_result_return.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/base/model_train_steps.py b/tests/base/model_train_steps.py
index 5a30c7f2874b2..3ed06a0427444 100644
--- a/tests/base/model_train_steps.py
+++ b/tests/base/model_train_steps.py
@@ -46,7 +46,7 @@ def training_step_full_loop_result_obj_dp(self, batch, batch_idx):
         """
         x, y = batch
         x = x.view(x.size(0), -1)
-        y_hat = self.forward(x)
+        y_hat = self(x)
         loss_val = y_hat.sum()
         result = TrainResult(minimize=loss_val)
         result.log('train_step_metric', loss_val + 1)
diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 0978dc78d3f29..434accf05aad9 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -487,7 +487,7 @@ def test_full_train_loop_with_results_obj_dp(tmpdir):
     model.validation_step = None
     model.test_step = None
     model.training_step = model.training_step_full_loop_result_obj_dp
-    # model.training_step_end = model.training_step_end_full_loop_result_obj_dp
+    model.training_step_end = model.training_step_end_full_loop_result_obj_dp
     model.training_epoch_end = model.training_epoch_end_full_loop_result_obj_dp
     model.val_dataloader = None
     model.test_dataloader = None

From 4f48912e25fa50f729367612ffbc7a2f48d41032 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 17:33:20 -0400
Subject: [PATCH 152/168] finished tests for structured results on train epoch

---
 tests/base/model_train_steps.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/base/model_train_steps.py b/tests/base/model_train_steps.py
index 3ed06a0427444..df575a80a9419 100644
--- a/tests/base/model_train_steps.py
+++ b/tests/base/model_train_steps.py
@@ -45,6 +45,7 @@ def training_step_full_loop_result_obj_dp(self, batch, batch_idx):
         Full loop flow train step (result obj + dp)
         """
         x, y = batch
+        print(x.device, self.device)
         x = x.view(x.size(0), -1)
         y_hat = self(x)
         loss_val = y_hat.sum()

From de5cbb91162c44bcd4ecba35d1c6e0b974e1dede Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 17:34:24 -0400
Subject: [PATCH 153/168] finished tests for structured results on train epoch

---
 tests/base/model_train_steps.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/base/model_train_steps.py b/tests/base/model_train_steps.py
index df575a80a9419..71e5b9603c25f 100644
--- a/tests/base/model_train_steps.py
+++ b/tests/base/model_train_steps.py
@@ -45,7 +45,9 @@ def training_step_full_loop_result_obj_dp(self, batch, batch_idx):
         Full loop flow train step (result obj + dp)
         """
         x, y = batch
+        print('-' * 100)
         print(x.device, self.device)
+        print('-' * 100)
         x = x.view(x.size(0), -1)
         y_hat = self(x)
         loss_val = y_hat.sum()

From 6ccf0cc08e22f685a13e1333fabbab01e3e03833 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 18:13:46 -0400
Subject: [PATCH 154/168] finished tests for structured results on train epoch

---
 .../test_trainer_steps_result_return.py        | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 434accf05aad9..bbe677786614d 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -483,17 +483,9 @@ def test_use_callbacks_with_train_loop_only(tmpdir):
 def test_full_train_loop_with_results_obj_dp(tmpdir):
     os.environ['PL_DEV_DEBUG'] = '1'
 
-    model = EvalModelTemplate()
-    model.validation_step = None
-    model.test_step = None
-    model.training_step = model.training_step_full_loop_result_obj_dp
-    model.training_step_end = model.training_step_end_full_loop_result_obj_dp
-    model.training_epoch_end = model.training_epoch_end_full_loop_result_obj_dp
-    model.val_dataloader = None
-    model.test_dataloader = None
-
     batches = 3
     epochs = 3
+
     trainer = Trainer(
         default_root_dir=tmpdir,
         distributed_backend='dp',
@@ -504,6 +496,14 @@ def test_full_train_loop_with_results_obj_dp(tmpdir):
         limit_train_batches=batches,
         weights_summary=None,
     )
+    model = EvalModelTemplate()
+    model.validation_step = None
+    model.test_step = None
+    model.training_step = model.training_step_full_loop_result_obj_dp
+    model.training_step_end = model.training_step_end_full_loop_result_obj_dp
+    model.training_epoch_end = model.training_epoch_end_full_loop_result_obj_dp
+    model.val_dataloader = None
+    model.test_dataloader = None
 
     trainer.fit(model)
 

From e671a792744eb036127ad3f652c0b536697e5d4e Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 18:17:18 -0400
Subject: [PATCH 155/168] finished tests for structured results on train epoch

---
 .../test_trainer_steps_result_return.py       | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index bbe677786614d..9acca9ce820b2 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -479,6 +479,31 @@ def test_use_callbacks_with_train_loop_only(tmpdir):
         assert ckpt_val['monitor'] == 'checkpoint_on'
 
 
+def test_xxx(tmpdir):
+    import tests.base.develop_pipelines as tpipes
+    from pytorch_lightning.core import memory
+    import tests.base.develop_utils as tutils
+
+    tutils.set_random_master_port()
+
+    trainer_options = dict(
+        default_root_dir=tmpdir,
+        max_epochs=1,
+        limit_train_batches=10,
+        limit_val_batches=10,
+        gpus=[0, 1],
+        distributed_backend='dp',
+        progress_bar_refresh_rate=0
+    )
+
+    model = EvalModelTemplate()
+
+    tpipes.run_model_test(trainer_options, model)
+
+    # test memory helper functions
+    memory.get_memory_profile('min_max')
+
+
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
 def test_full_train_loop_with_results_obj_dp(tmpdir):
     os.environ['PL_DEV_DEBUG'] = '1'

From f3ee6c23e9d289e0d4509c06ee20290ad36ad7ce Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 18:18:20 -0400
Subject: [PATCH 156/168] finished tests for structured results on train epoch

---
 tests/trainer/test_trainer_steps_result_return.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 9acca9ce820b2..72f7cc01ebd34 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -508,7 +508,7 @@ def test_xxx(tmpdir):
 def test_full_train_loop_with_results_obj_dp(tmpdir):
     os.environ['PL_DEV_DEBUG'] = '1'
 
-    batches = 3
+    batches = 10
     epochs = 3
 
     trainer = Trainer(

From be99f0a3657954a8065b66b987d474e6fe92840b Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 18:19:24 -0400
Subject: [PATCH 157/168] finished tests for structured results on train epoch

---
 tests/trainer/test_trainer_steps_result_return.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 72f7cc01ebd34..df473e657860a 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -484,7 +484,7 @@ def test_xxx(tmpdir):
     from pytorch_lightning.core import memory
     import tests.base.develop_utils as tutils
 
-    tutils.set_random_master_port()
+    # tutils.set_random_master_port()
 
     trainer_options = dict(
         default_root_dir=tmpdir,
@@ -497,11 +497,13 @@ def test_xxx(tmpdir):
     )
 
     model = EvalModelTemplate()
+    trainer = Trainer(**trainer_options)
+    trainer.fit(model)
 
-    tpipes.run_model_test(trainer_options, model)
+    # tpipes.run_model_test(trainer_options, model)
 
     # test memory helper functions
-    memory.get_memory_profile('min_max')
+    # memory.get_memory_profile('min_max')
 
 
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")

From d767547bef775a212f9b6b488a16643f9ff68106 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 18:20:04 -0400
Subject: [PATCH 158/168] finished tests for structured results on train epoch

---
 tests/trainer/test_trainer_steps_result_return.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index df473e657860a..b27caba8bd2b6 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -515,13 +515,12 @@ def test_full_train_loop_with_results_obj_dp(tmpdir):
 
     trainer = Trainer(
         default_root_dir=tmpdir,
-        distributed_backend='dp',
+        max_epochs=1,
+        limit_train_batches=10,
+        limit_val_batches=10,
         gpus=[0, 1],
-        max_epochs=epochs,
-        early_stop_callback=True,
-        row_log_interval=2,
-        limit_train_batches=batches,
-        weights_summary=None,
+        distributed_backend='dp',
+        progress_bar_refresh_rate=0
     )
     model = EvalModelTemplate()
     model.validation_step = None

From de16f8abcdf2e8c3cd5ec059b1f671374647fc1c Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 18:21:07 -0400
Subject: [PATCH 159/168] finished tests for structured results on train epoch

---
 .../test_trainer_steps_result_return.py       | 41 ++++---------------
 1 file changed, 8 insertions(+), 33 deletions(-)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index b27caba8bd2b6..637171262d600 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -478,34 +478,6 @@ def test_use_callbacks_with_train_loop_only(tmpdir):
         assert ckpt_val['epoch'] == expected_epoch
         assert ckpt_val['monitor'] == 'checkpoint_on'
 
-
-def test_xxx(tmpdir):
-    import tests.base.develop_pipelines as tpipes
-    from pytorch_lightning.core import memory
-    import tests.base.develop_utils as tutils
-
-    # tutils.set_random_master_port()
-
-    trainer_options = dict(
-        default_root_dir=tmpdir,
-        max_epochs=1,
-        limit_train_batches=10,
-        limit_val_batches=10,
-        gpus=[0, 1],
-        distributed_backend='dp',
-        progress_bar_refresh_rate=0
-    )
-
-    model = EvalModelTemplate()
-    trainer = Trainer(**trainer_options)
-    trainer.fit(model)
-
-    # tpipes.run_model_test(trainer_options, model)
-
-    # test memory helper functions
-    # memory.get_memory_profile('min_max')
-
-
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
 def test_full_train_loop_with_results_obj_dp(tmpdir):
     os.environ['PL_DEV_DEBUG'] = '1'
@@ -515,12 +487,13 @@ def test_full_train_loop_with_results_obj_dp(tmpdir):
 
     trainer = Trainer(
         default_root_dir=tmpdir,
-        max_epochs=1,
-        limit_train_batches=10,
-        limit_val_batches=10,
-        gpus=[0, 1],
         distributed_backend='dp',
-        progress_bar_refresh_rate=0
+        gpus=[0, 1],
+        max_epochs=epochs,
+        early_stop_callback=True,
+        row_log_interval=2,
+        limit_train_batches=batches,
+        weights_summary=None,
     )
     model = EvalModelTemplate()
     model.validation_step = None
@@ -531,6 +504,8 @@ def test_full_train_loop_with_results_obj_dp(tmpdir):
     model.val_dataloader = None
     model.test_dataloader = None
 
+    model = EvalModelTemplate()
+
     trainer.fit(model)
 
     # make sure the loop was good

From 072cb09a9d0b5b24af14abb710df7e8b82668cc5 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 18:22:18 -0400
Subject: [PATCH 160/168] finished tests for structured results on train epoch

---
 tests/trainer/test_trainer_steps_result_return.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 637171262d600..2389f33013652 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -496,15 +496,15 @@ def test_full_train_loop_with_results_obj_dp(tmpdir):
         weights_summary=None,
     )
     model = EvalModelTemplate()
-    model.validation_step = None
-    model.test_step = None
+    # model.validation_step = None
+    # model.test_step = None
     model.training_step = model.training_step_full_loop_result_obj_dp
     model.training_step_end = model.training_step_end_full_loop_result_obj_dp
     model.training_epoch_end = model.training_epoch_end_full_loop_result_obj_dp
-    model.val_dataloader = None
-    model.test_dataloader = None
+    # model.val_dataloader = None
+    # model.test_dataloader = None
 
-    model = EvalModelTemplate()
+    # model = EvalModelTemplate()
 
     trainer.fit(model)
 

From ea2676157c9efd4b88127f85a1c8fcc1b25c88b9 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 18:23:18 -0400
Subject: [PATCH 161/168] finished tests for structured results on train epoch

---
 tests/trainer/test_trainer_steps_result_return.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 2389f33013652..fd989a23a30f6 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -499,8 +499,8 @@ def test_full_train_loop_with_results_obj_dp(tmpdir):
     # model.validation_step = None
     # model.test_step = None
     model.training_step = model.training_step_full_loop_result_obj_dp
-    model.training_step_end = model.training_step_end_full_loop_result_obj_dp
-    model.training_epoch_end = model.training_epoch_end_full_loop_result_obj_dp
+    # model.training_step_end = model.training_step_end_full_loop_result_obj_dp
+    # model.training_epoch_end = model.training_epoch_end_full_loop_result_obj_dp
     # model.val_dataloader = None
     # model.test_dataloader = None
 

From f74e3b0bae9255de586b47249c25590c04b5d2c0 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 18:24:23 -0400
Subject: [PATCH 162/168] finished tests for structured results on train epoch

---
 tests/base/model_train_steps.py                   |  2 +-
 tests/trainer/test_trainer_steps_result_return.py | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/base/model_train_steps.py b/tests/base/model_train_steps.py
index 71e5b9603c25f..1f921bfabe241 100644
--- a/tests/base/model_train_steps.py
+++ b/tests/base/model_train_steps.py
@@ -40,7 +40,7 @@ def training_step__inf_loss(self, batch, batch_idx, optimizer_idx=None):
                 output /= 0
         return output
 
-    def training_step_full_loop_result_obj_dp(self, batch, batch_idx):
+    def training_step_full_loop_result_obj_dp(self, batch, batch_idx, optimizer_idx=None):
         """
         Full loop flow train step (result obj + dp)
         """
diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index fd989a23a30f6..637171262d600 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -496,15 +496,15 @@ def test_full_train_loop_with_results_obj_dp(tmpdir):
         weights_summary=None,
     )
     model = EvalModelTemplate()
-    # model.validation_step = None
-    # model.test_step = None
+    model.validation_step = None
+    model.test_step = None
     model.training_step = model.training_step_full_loop_result_obj_dp
-    # model.training_step_end = model.training_step_end_full_loop_result_obj_dp
-    # model.training_epoch_end = model.training_epoch_end_full_loop_result_obj_dp
-    # model.val_dataloader = None
-    # model.test_dataloader = None
+    model.training_step_end = model.training_step_end_full_loop_result_obj_dp
+    model.training_epoch_end = model.training_epoch_end_full_loop_result_obj_dp
+    model.val_dataloader = None
+    model.test_dataloader = None
 
-    # model = EvalModelTemplate()
+    model = EvalModelTemplate()
 
     trainer.fit(model)
 

From cab63d457e63fed80e79f1d4c400248e36fe0c9e Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 18:25:39 -0400
Subject: [PATCH 163/168] finished tests for structured results on train epoch

---
 tests/base/model_train_steps.py               |  3 ---
 .../test_trainer_steps_result_return.py       | 25 ++++++++-----------
 2 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/tests/base/model_train_steps.py b/tests/base/model_train_steps.py
index 1f921bfabe241..828bdb3c5fa89 100644
--- a/tests/base/model_train_steps.py
+++ b/tests/base/model_train_steps.py
@@ -45,9 +45,6 @@ def training_step_full_loop_result_obj_dp(self, batch, batch_idx, optimizer_idx=
         Full loop flow train step (result obj + dp)
         """
         x, y = batch
-        print('-' * 100)
-        print(x.device, self.device)
-        print('-' * 100)
         x = x.view(x.size(0), -1)
         y_hat = self(x)
         loss_val = y_hat.sum()
diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 637171262d600..a2b03e035b25c 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -485,6 +485,15 @@ def test_full_train_loop_with_results_obj_dp(tmpdir):
     batches = 10
     epochs = 3
 
+    model = EvalModelTemplate()
+    model.validation_step = None
+    model.test_step = None
+    model.training_step = model.training_step_full_loop_result_obj_dp
+    model.training_step_end = model.training_step_end_full_loop_result_obj_dp
+    model.training_epoch_end = model.training_epoch_end_full_loop_result_obj_dp
+    model.val_dataloader = None
+    model.test_dataloader = None
+
     trainer = Trainer(
         default_root_dir=tmpdir,
         distributed_backend='dp',
@@ -495,25 +504,11 @@ def test_full_train_loop_with_results_obj_dp(tmpdir):
         limit_train_batches=batches,
         weights_summary=None,
     )
-    model = EvalModelTemplate()
-    model.validation_step = None
-    model.test_step = None
-    model.training_step = model.training_step_full_loop_result_obj_dp
-    model.training_step_end = model.training_step_end_full_loop_result_obj_dp
-    model.training_epoch_end = model.training_epoch_end_full_loop_result_obj_dp
-    model.val_dataloader = None
-    model.test_dataloader = None
-
-    model = EvalModelTemplate()
 
     trainer.fit(model)
 
-    # make sure the loop was good
-    assert model.training_step_called
-    assert model.training_step_end_called
-    assert model.training_epoch_end_called
-
     # make sure we saw all the correct keys
+    import pdb; pdb.set_trace()
     seen_keys = set()
     for metric in trainer.dev_debugger.logged_metrics:
         seen_keys.update(metric.keys())

From db26566c30295a11dad2795a1458e409f1f4cebd Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 18:26:34 -0400
Subject: [PATCH 164/168] finished tests for structured results on train epoch

---
 tests/base/model_train_steps.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/base/model_train_steps.py b/tests/base/model_train_steps.py
index 828bdb3c5fa89..6022b864787ab 100644
--- a/tests/base/model_train_steps.py
+++ b/tests/base/model_train_steps.py
@@ -46,7 +46,7 @@ def training_step_full_loop_result_obj_dp(self, batch, batch_idx, optimizer_idx=
         """
         x, y = batch
         x = x.view(x.size(0), -1)
-        y_hat = self(x)
+        y_hat = self(x.to(self.device))
         loss_val = y_hat.sum()
         result = TrainResult(minimize=loss_val)
         result.log('train_step_metric', loss_val + 1)

From a1010dda35a9bb77b152201e639f2e6fe9624595 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 18:27:20 -0400
Subject: [PATCH 165/168] finished tests for structured results on train epoch

---
 tests/trainer/test_trainer_steps_result_return.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index a2b03e035b25c..794e8bfc4aa62 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -508,11 +508,11 @@ def test_full_train_loop_with_results_obj_dp(tmpdir):
     trainer.fit(model)
 
     # make sure we saw all the correct keys
-    import pdb; pdb.set_trace()
     seen_keys = set()
     for metric in trainer.dev_debugger.logged_metrics:
         seen_keys.update(metric.keys())
 
+    import pdb; pdb.set_trace()
     assert 'train_step_metric' in seen_keys
     assert 'train_step_end_metric' in seen_keys
     assert 'train_epoch_end_metric' in seen_keys

From 30e17aa1995b0dce3a26c8da1e8b47f6eddcdf84 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 18:28:32 -0400
Subject: [PATCH 166/168] finished tests for structured results on train epoch

---
 tests/trainer/test_trainer_steps_result_return.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/trainer/test_trainer_steps_result_return.py b/tests/trainer/test_trainer_steps_result_return.py
index 794e8bfc4aa62..16353bb8b20dc 100644
--- a/tests/trainer/test_trainer_steps_result_return.py
+++ b/tests/trainer/test_trainer_steps_result_return.py
@@ -277,8 +277,8 @@ def test_training_step_result_log_step_and_epoch(tmpdir):
     assert isinstance(train_step_out, TrainResult)
 
     assert 'minimize' in train_step_out
-    assert f'step_epoch_log_and_pbar_acc1' in train_step_out
-    assert f'step_epoch_log_acc2' in train_step_out
+    assert 'step_epoch_log_and_pbar_acc1' in train_step_out
+    assert 'step_epoch_log_acc2' in train_step_out
 
     # make sure the optimizer closure returns the correct things
     opt_closure_result = trainer.optimizer_closure(batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens)
@@ -351,8 +351,8 @@ def test_training_step_epoch_end_result(tmpdir):
     assert isinstance(train_step_out, TrainResult)
 
     assert 'minimize' in train_step_out
-    assert f'step_epoch_log_and_pbar_acc1' in train_step_out
-    assert f'step_epoch_log_acc2' in train_step_out
+    assert 'step_epoch_log_and_pbar_acc1' in train_step_out
+    assert 'step_epoch_log_acc2' in train_step_out
 
     # make sure the optimizer closure returns the correct things
     opt_closure_result = trainer.optimizer_closure(batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens)
@@ -478,6 +478,7 @@ def test_use_callbacks_with_train_loop_only(tmpdir):
         assert ckpt_val['epoch'] == expected_epoch
         assert ckpt_val['monitor'] == 'checkpoint_on'
 
+
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
 def test_full_train_loop_with_results_obj_dp(tmpdir):
     os.environ['PL_DEV_DEBUG'] = '1'
@@ -512,7 +513,6 @@ def test_full_train_loop_with_results_obj_dp(tmpdir):
     for metric in trainer.dev_debugger.logged_metrics:
         seen_keys.update(metric.keys())
 
-    import pdb; pdb.set_trace()
     assert 'train_step_metric' in seen_keys
     assert 'train_step_end_metric' in seen_keys
     assert 'train_epoch_end_metric' in seen_keys

From a7f05440a8488a212139db4d10b42274fe014f28 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 18:42:27 -0400
Subject: [PATCH 167/168] Update pytorch_lightning/core/step_result.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com>
---
 pytorch_lightning/core/step_result.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch_lightning/core/step_result.py b/pytorch_lightning/core/step_result.py
index 73aeb7bb39fdd..1dc88db15ccb5 100644
--- a/pytorch_lightning/core/step_result.py
+++ b/pytorch_lightning/core/step_result.py
@@ -23,7 +23,7 @@ def __init__(
         if hiddens is not None:
             self.hiddens = hiddens
         if minimize is not None:
-            err = 'Minimize can only be used in training_end, training_step_end, training_epoch_end'
+            err = 'Minimize can only be used in training_step, training_step_end, training_epoch_end'
             self._assert_grad_tensor_metric('minimize', minimize, err)
             self.minimize = minimize
 

From 704d2019375b42588f105eea8ea32948cfeddc37 Mon Sep 17 00:00:00 2001
From: William Falcon <waf2107@columbia.edu>
Date: Mon, 20 Jul 2020 18:42:36 -0400
Subject: [PATCH 168/168] Update pytorch_lightning/overrides/data_parallel.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com>
---
 pytorch_lightning/overrides/data_parallel.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pytorch_lightning/overrides/data_parallel.py b/pytorch_lightning/overrides/data_parallel.py
index 229d9133b1efd..c9c793cc89a2f 100644
--- a/pytorch_lightning/overrides/data_parallel.py
+++ b/pytorch_lightning/overrides/data_parallel.py
@@ -188,8 +188,6 @@ def _worker(i, module, input, kwargs, device=None):
                 if not isinstance(input, (list, tuple)):
                     input = (input,)
 
-                if hasattr(module, '_device'):
-                    module._device = device
                 module = module.to(device)
 
                 # ---------------