From 4130ec41e8442495473c7e0b6b0e3fa6b10feb7d Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 11 Jul 2020 23:10:47 -0400 Subject: [PATCH 01/25] add tests for single scalar return from training --- tests/base/deterministic_model.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py index c387997da57d7..db9355a043e93 100644 --- a/tests/base/deterministic_model.py +++ b/tests/base/deterministic_model.py @@ -15,6 +15,10 @@ def __init__(self, weights=None): self.training_step_end_called = False self.training_epoch_end_called = False + self.validation_step_called = False + self.validation_step_end_called = False + self.validation_epoch_end_called = False + self.l1 = nn.Linear(2, 3, bias=False) if weights is None: weights = torch.tensor([ @@ -162,6 +166,17 @@ def training_epoch_end_dict(self, outputs): return {'log': logs, 'progress_bar': pbar} + def validation_step_no_return(self, batch, batch_idx): + acc = self.step(batch, batch_idx) + + def validation_step_scalar_return(self, batch, batch_idx): + acc = self.step(batch, batch_idx) + return acc + + def validation_step_arbitary_dict_return(self, batch, batch_idx): + acc = self.step(batch, batch_idx) + return {'some': acc, 'value': 'a'} + def validation_step_dict_return(self, batch, batch_idx): acc = self.step(batch, batch_idx) From 4b12043d099afb31a46d0238011f20ea4e6d475d Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 11 Jul 2020 23:10:57 -0400 Subject: [PATCH 02/25] add tests for single scalar return from training --- tests/trainer/test_eval_loop_dict_return.py | 166 ++++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100644 tests/trainer/test_eval_loop_dict_return.py diff --git a/tests/trainer/test_eval_loop_dict_return.py b/tests/trainer/test_eval_loop_dict_return.py new file mode 100644 index 0000000000000..62244a5b9e464 --- /dev/null +++ b/tests/trainer/test_eval_loop_dict_return.py @@ -0,0 +1,166 @@ +""" +Tests to ensure that the training loop works with a dict +""" +from pytorch_lightning import Trainer +from tests.base.deterministic_model import DeterministicModel + +# train step + val step (no return) +# train step + val step (scalar return) +# train loop + val step (arbitrary dict return) +# train loop + val step (structured return) +# train loop + val step + val step end +# train loop + val step + val step end + val epoch end +# train loop + val step + val epoch end + + +def test_validation_step_dict(tmpdir): + """ + test that the train + val loop can be used + """ + model = DeterministicModel() + model.training_step = model.training_step_dict_return + model.validation_step = model.validation_step_dict_return + + trainer = Trainer( + default_root_dir=tmpdir, + fast_dev_run=True, + weights_summary=None, + ) + trainer.fit(model) + + # make sure correct steps were called + assert model.training_step_called + assert not model.training_step_end_called + assert not model.training_epoch_end_called + + # make sure training outputs what is expected + for batch_idx, batch in enumerate(model.train_dataloader()): + break + + out = trainer.run_training_batch(batch, batch_idx) + assert out.signal == 0 + assert out.batch_log_metrics['log_acc1'] == 12.0 + assert out.batch_log_metrics['log_acc2'] == 7.0 + + train_step_out = out.training_step_output_for_epoch_end + pbar_metrics = train_step_out['progress_bar'] + assert 'log' in train_step_out + assert 'progress_bar' in train_step_out + assert train_step_out['train_step_test'] == 549 + assert pbar_metrics['pbar_acc1'] == 17.0 + assert pbar_metrics['pbar_acc2'] == 19.0 + + # make sure the optimizer closure returns the correct things + opt_closure_result = trainer.optimizer_closure(batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens) + assert opt_closure_result['loss'] == (42.0 * 3) + (15.0 * 3) + + +def training_step_with_step_end(tmpdir): + """ + Checks train_step + training_step_end + """ + model = DeterministicModel() + model.training_step = model.training_step_for_step_end_dict + model.training_step_end = model.training_step_end_dict + model.val_dataloader = None + + trainer = Trainer(fast_dev_run=True, weights_summary=None) + trainer.fit(model) + + # make sure correct steps were called + assert model.training_step_called + assert model.training_step_end_called + assert not model.training_epoch_end_called + + # make sure training outputs what is expected + for batch_idx, batch in enumerate(model.train_dataloader()): + break + + out = trainer.run_training_batch(batch, batch_idx) + assert out.signal == 0 + assert out.batch_log_metrics['log_acc1'] == 14.0 + assert out.batch_log_metrics['log_acc2'] == 9.0 + + train_step_end_out = out.training_step_output_for_epoch_end + pbar_metrics = train_step_end_out['progress_bar'] + assert 'train_step_end' in train_step_end_out + assert pbar_metrics['pbar_acc1'] == 19.0 + assert pbar_metrics['pbar_acc2'] == 21.0 + + +def test_full_training_loop_dict(tmpdir): + """ + Checks train_step + training_step_end + training_epoch_end + """ + model = DeterministicModel() + model.training_step = model.training_step_for_step_end_dict + model.training_step_end = model.training_step_end_dict + model.training_epoch_end = model.training_epoch_end_dict + model.val_dataloader = None + + trainer = Trainer( + default_root_dir=tmpdir, + max_epochs=1, + weights_summary=None, + ) + trainer.fit(model) + + # make sure correct steps were called + assert model.training_step_called + assert model.training_step_end_called + assert model.training_epoch_end_called + + # assert epoch end metrics were added + assert trainer.callback_metrics['epoch_end_log_1'] == 178 + assert trainer.progress_bar_metrics['epoch_end_pbar_1'] == 234 + + # make sure training outputs what is expected + for batch_idx, batch in enumerate(model.train_dataloader()): + break + + out = trainer.run_training_batch(batch, batch_idx) + assert out.signal == 0 + assert out.batch_log_metrics['log_acc1'] == 14.0 + assert out.batch_log_metrics['log_acc2'] == 9.0 + + train_step_end_out = out.training_step_output_for_epoch_end + pbar_metrics = train_step_end_out['progress_bar'] + assert pbar_metrics['pbar_acc1'] == 19.0 + assert pbar_metrics['pbar_acc2'] == 21.0 + + +def test_train_step_epoch_end(tmpdir): + """ + Checks train_step + training_epoch_end (NO training_step_end) + """ + model = DeterministicModel() + model.training_step = model.training_step_dict_return + model.training_step_end = None + model.training_epoch_end = model.training_epoch_end_dict + model.val_dataloader = None + + trainer = Trainer(max_epochs=1, weights_summary=None) + trainer.fit(model) + + # make sure correct steps were called + assert model.training_step_called + assert not model.training_step_end_called + assert model.training_epoch_end_called + + # assert epoch end metrics were added + assert trainer.callback_metrics['epoch_end_log_1'] == 178 + assert trainer.progress_bar_metrics['epoch_end_pbar_1'] == 234 + + # make sure training outputs what is expected + for batch_idx, batch in enumerate(model.train_dataloader()): + break + + out = trainer.run_training_batch(batch, batch_idx) + assert out.signal == 0 + assert out.batch_log_metrics['log_acc1'] == 12.0 + assert out.batch_log_metrics['log_acc2'] == 7.0 + + train_step_end_out = out.training_step_output_for_epoch_end + pbar_metrics = train_step_end_out['progress_bar'] + assert pbar_metrics['pbar_acc1'] == 17.0 + assert pbar_metrics['pbar_acc2'] == 19.0 From 3466e7e1a5ed8ec78c284a5fad2a9ddd981feab2 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Mon, 13 Jul 2020 11:51:06 -0400 Subject: [PATCH 03/25] add tests for single scalar return from training --- pl_examples/domain_templates/imagenet.py | 2 +- pytorch_lightning/trainer/evaluation_loop.py | 12 ++-- pytorch_lightning/trainer/logging.py | 7 ++ pytorch_lightning/trainer/trainer.py | 2 +- tests/base/deterministic_model.py | 13 ++++ tests/trainer/test_eval_loop_dict_return.py | 75 ++++++++++++-------- 6 files changed, 74 insertions(+), 37 deletions(-) diff --git a/pl_examples/domain_templates/imagenet.py b/pl_examples/domain_templates/imagenet.py index 19a85b87949df..20fb1cae24732 100644 --- a/pl_examples/domain_templates/imagenet.py +++ b/pl_examples/domain_templates/imagenet.py @@ -245,7 +245,7 @@ def main(args: Namespace) -> None: ) if args.evaluate: - trainer.run_evaluation() + trainer.test() else: trainer.fit(model) diff --git a/pytorch_lightning/trainer/evaluation_loop.py b/pytorch_lightning/trainer/evaluation_loop.py index 16f68f1e13502..2bfb54c5d9372 100644 --- a/pytorch_lightning/trainer/evaluation_loop.py +++ b/pytorch_lightning/trainer/evaluation_loop.py @@ -315,7 +315,7 @@ def _evaluate( # with a single dataloader don't pass an array if len(dataloaders) == 1: - outputs = outputs[0] + eval_results = outputs[0] # give model a chance to do something with the outputs (and method defined) if isinstance(model, (LightningDistributedDataParallel, LightningDataParallel)): @@ -324,22 +324,22 @@ def _evaluate( if test_mode: if self.is_overridden('test_end', model=model): # TODO: remove in v1.0.0 - eval_results = model.test_end(outputs) + eval_results = model.test_end(eval_results) rank_zero_warn('Method `test_end` was deprecated in v0.7 and will be removed in v1.0.' ' Use `test_epoch_end` instead.', DeprecationWarning) elif self.is_overridden('test_epoch_end', model=model): - eval_results = model.test_epoch_end(outputs) + eval_results = model.test_epoch_end(eval_results) else: if self.is_overridden('validation_end', model=model): # TODO: remove in v1.0.0 - eval_results = model.validation_end(outputs) + eval_results = model.validation_end(eval_results) rank_zero_warn('Method `validation_end` was deprecated in v0.7 and will be removed in v1.0.' ' Use `validation_epoch_end` instead.', DeprecationWarning) elif self.is_overridden('validation_epoch_end', model=model): - eval_results = model.validation_epoch_end(outputs) + eval_results = model.validation_epoch_end(eval_results) # enable train mode again model.train() @@ -429,7 +429,7 @@ def run_evaluation(self, test_mode: bool = False): else: self.on_validation_end() - return callback_metrics + return callback_metrics, eval_results def evaluation_forward(self, model, batch, batch_idx, dataloader_idx, test_mode: bool = False): # make dataloader_idx arg in validation_step optional diff --git a/pytorch_lightning/trainer/logging.py b/pytorch_lightning/trainer/logging.py index 35f5d5d35b9ca..e5911a87923e0 100644 --- a/pytorch_lightning/trainer/logging.py +++ b/pytorch_lightning/trainer/logging.py @@ -109,6 +109,13 @@ def process_output(self, output, train=False): hiddens = None return output, progress_bar_metrics, log_metrics, callback_metrics, hiddens + # -------------------------- + # handle lists + # -------------------------- + if isinstance(output, list): + # TODO: what to do when given a list? + output = {} + # --------------- # EXTRACT CALLBACK KEYS # --------------- diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 770dc4b314688..762d7eca2b090 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -1163,7 +1163,7 @@ def run_pretrain_routine(self, model: LightningModule): if self.testing: # only load test dataloader for testing # self.reset_test_dataloader(ref_model) - results = self.run_evaluation(test_mode=True) + results, _ = self.run_evaluation(test_mode=True) # remove all cuda tensors if results is not None and isinstance(results, dict) and len(results) > 0: diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py index db9355a043e93..52aed0e6acdd8 100644 --- a/tests/base/deterministic_model.py +++ b/tests/base/deterministic_model.py @@ -167,23 +167,36 @@ def training_epoch_end_dict(self, outputs): return {'log': logs, 'progress_bar': pbar} def validation_step_no_return(self, batch, batch_idx): + self.validation_step_called = True acc = self.step(batch, batch_idx) def validation_step_scalar_return(self, batch, batch_idx): + self.validation_step_called = True acc = self.step(batch, batch_idx) return acc def validation_step_arbitary_dict_return(self, batch, batch_idx): + self.validation_step_called = True acc = self.step(batch, batch_idx) return {'some': acc, 'value': 'a'} def validation_step_dict_return(self, batch, batch_idx): + self.validation_step_called = True acc = self.step(batch, batch_idx) logs = {'log_acc1': torch.tensor(12).type_as(acc), 'log_acc2': torch.tensor(7).type_as(acc)} pbar = {'pbar_acc1': torch.tensor(17).type_as(acc), 'pbar_acc2': torch.tensor(19).type_as(acc)} return {'val_loss': acc, 'log': logs, 'progress_bar': pbar} + def validation_step_end(self, outputs): + self.validation_step_end_called = True + + def validation_epoch_end(self, outputs): + self.validation_epoch_end_called = True + + # ----------------------------- + # DATA + # ----------------------------- def train_dataloader(self): return DataLoader(DummyDataset(), batch_size=3, shuffle=False) diff --git a/tests/trainer/test_eval_loop_dict_return.py b/tests/trainer/test_eval_loop_dict_return.py index 62244a5b9e464..2d99221d8735a 100644 --- a/tests/trainer/test_eval_loop_dict_return.py +++ b/tests/trainer/test_eval_loop_dict_return.py @@ -13,13 +13,15 @@ # train loop + val step + val epoch end -def test_validation_step_dict(tmpdir): +def test_validation_step_no_return(tmpdir): """ - test that the train + val loop can be used + Test that val step can return nothing """ model = DeterministicModel() model.training_step = model.training_step_dict_return - model.validation_step = model.validation_step_dict_return + model.validation_step = model.validation_step_no_return + model.validation_step_end = None + model.validation_epoch_end = None trainer = Trainer( default_root_dir=tmpdir, @@ -28,32 +30,50 @@ def test_validation_step_dict(tmpdir): ) trainer.fit(model) + # out are the results of the full loop + # eval_results are output of _evaluate + out, eval_results = trainer.run_evaluation(test_mode=False) + assert len(out) == 0 + assert len(eval_results) == 0 + # make sure correct steps were called - assert model.training_step_called - assert not model.training_step_end_called - assert not model.training_epoch_end_called + assert model.validation_step_called + assert not model.validation_step_end_called + assert not model.validation_epoch_end_called - # make sure training outputs what is expected - for batch_idx, batch in enumerate(model.train_dataloader()): - break - out = trainer.run_training_batch(batch, batch_idx) - assert out.signal == 0 - assert out.batch_log_metrics['log_acc1'] == 12.0 - assert out.batch_log_metrics['log_acc2'] == 7.0 +def test_validation_step_scalar_return(tmpdir): + """ + Test that val step can return a scalar + """ + model = DeterministicModel() + model.training_step = model.training_step_dict_return + model.validation_step = model.validation_step_scalar_return + model.validation_step_end = None + model.validation_epoch_end = None - train_step_out = out.training_step_output_for_epoch_end - pbar_metrics = train_step_out['progress_bar'] - assert 'log' in train_step_out - assert 'progress_bar' in train_step_out - assert train_step_out['train_step_test'] == 549 - assert pbar_metrics['pbar_acc1'] == 17.0 - assert pbar_metrics['pbar_acc2'] == 19.0 + trainer = Trainer( + default_root_dir=tmpdir, + weights_summary=None, + limit_train_batches=2, + limit_val_batches=2 + ) + trainer.fit(model) + + # out are the results of the full loop + # eval_results are output of _evaluate + out, eval_results = trainer.run_evaluation(test_mode=False) + assert len(out) == 0 + assert len(eval_results) == 2 + assert eval_results[0] == 171 and eval_results[1] == 171 + + # make sure correct steps were called + assert model.validation_step_called + assert not model.validation_step_end_called + assert not model.validation_epoch_end_called - # make sure the optimizer closure returns the correct things - opt_closure_result = trainer.optimizer_closure(batch, batch_idx, 0, trainer.optimizers[0], trainer.hiddens) - assert opt_closure_result['loss'] == (42.0 * 3) + (15.0 * 3) +test_validation_step_scalar_return('') def training_step_with_step_end(tmpdir): """ @@ -73,8 +93,7 @@ def training_step_with_step_end(tmpdir): assert not model.training_epoch_end_called # make sure training outputs what is expected - for batch_idx, batch in enumerate(model.train_dataloader()): - break + batch_idx, batch = 0, next(iter(model.train_dataloader())) out = trainer.run_training_batch(batch, batch_idx) assert out.signal == 0 @@ -115,8 +134,7 @@ def test_full_training_loop_dict(tmpdir): assert trainer.progress_bar_metrics['epoch_end_pbar_1'] == 234 # make sure training outputs what is expected - for batch_idx, batch in enumerate(model.train_dataloader()): - break + batch_idx, batch = 0, next(iter(model.train_dataloader())) out = trainer.run_training_batch(batch, batch_idx) assert out.signal == 0 @@ -152,8 +170,7 @@ def test_train_step_epoch_end(tmpdir): assert trainer.progress_bar_metrics['epoch_end_pbar_1'] == 234 # make sure training outputs what is expected - for batch_idx, batch in enumerate(model.train_dataloader()): - break + batch_idx, batch = 0, next(iter(model.train_dataloader())) out = trainer.run_training_batch(batch, batch_idx) assert out.signal == 0 From 2d1b213e1601288c1dd227d976eda3b1bb41b8e1 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 14 Jul 2020 06:27:42 -0400 Subject: [PATCH 04/25] fixing val step only --- pytorch_lightning/trainer/evaluation_loop.py | 32 ++++++++++++-------- pytorch_lightning/trainer/logging.py | 7 ----- pytorch_lightning/trainer/trainer.py | 27 +++++++++++------ 3 files changed, 36 insertions(+), 30 deletions(-) diff --git a/pytorch_lightning/trainer/evaluation_loop.py b/pytorch_lightning/trainer/evaluation_loop.py index 2bfb54c5d9372..c8da00ae22542 100644 --- a/pytorch_lightning/trainer/evaluation_loop.py +++ b/pytorch_lightning/trainer/evaluation_loop.py @@ -393,23 +393,29 @@ def run_evaluation(self, test_mode: bool = False): # enable no returns callback_metrics = {} if eval_results is not None and len(eval_results) > 0: - _, prog_bar_metrics, log_metrics, callback_metrics, _ = self.process_output(eval_results) - # add metrics to prog bar - self.add_progress_bar_metrics(prog_bar_metrics) + # in eval, the user may return something at every validation step without final reduction + if not isinstance(eval_results, list): + eval_results = [eval_results] - # log results of test - if test_mode and self.is_global_zero: - print('-' * 80) - print('TEST RESULTS') - pprint(callback_metrics) - print('-' * 80) + for result in eval_results: + _, prog_bar_metrics, log_metrics, callback_metrics, _ = self.process_output(result) - # log metrics - self.log_metrics(log_metrics, {}) + # add metrics to prog bar + self.add_progress_bar_metrics(prog_bar_metrics) - # track metrics for callbacks - self.callback_metrics.update(callback_metrics) + # log results of test + if test_mode and self.is_global_zero: + print('-' * 80) + print('TEST RESULTS') + pprint(callback_metrics) + print('-' * 80) + + # log metrics + self.log_metrics(log_metrics, {}) + + # track metrics for callbacks + self.callback_metrics.update(callback_metrics) # hook model.on_post_performance_check() diff --git a/pytorch_lightning/trainer/logging.py b/pytorch_lightning/trainer/logging.py index e5911a87923e0..35f5d5d35b9ca 100644 --- a/pytorch_lightning/trainer/logging.py +++ b/pytorch_lightning/trainer/logging.py @@ -109,13 +109,6 @@ def process_output(self, output, train=False): hiddens = None return output, progress_bar_metrics, log_metrics, callback_metrics, hiddens - # -------------------------- - # handle lists - # -------------------------- - if isinstance(output, list): - # TODO: what to do when given a list? - output = {} - # --------------- # EXTRACT CALLBACK KEYS # --------------- diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 762d7eca2b090..917b73158c464 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -1179,6 +1179,20 @@ def run_pretrain_routine(self, model: LightningModule): self.disable_validation = not (self.is_overridden('validation_step') and self.limit_val_batches > 0) \ and not self.fast_dev_run + # run a few val batches before training starts + self._run_sanity_check(ref_model, model) + + # clear cache before training + if self.on_gpu and self.root_gpu is not None: + # use context because of: + # https://discuss.pytorch.org/t/out-of-memory-when-i-use-torch-cuda-empty-cache/57898 + with torch.cuda.device(f'cuda:{self.root_gpu}'): + torch.cuda.empty_cache() + + # CORE TRAINING LOOP + self.train() + + def _run_sanity_check(self, ref_model, model): # run tiny validation (if validation defined) # to make sure program won't crash during val if not self.disable_validation and self.num_sanity_val_steps > 0: @@ -1197,21 +1211,14 @@ def run_pretrain_routine(self, model: LightningModule): # allow no returns from eval if eval_results is not None and len(eval_results) > 0: + # when we get a list back, used only the last item + if isinstance(eval_results, list): + eval_results = eval_results[-1] _, _, _, callback_metrics, _ = self.process_output(eval_results) self.callback_metrics = callback_metrics self.on_sanity_check_end() - # clear cache before training - if self.on_gpu and self.root_gpu is not None: - # use context because of: - # https://discuss.pytorch.org/t/out-of-memory-when-i-use-torch-cuda-empty-cache/57898 - with torch.cuda.device(f'cuda:{self.root_gpu}'): - torch.cuda.empty_cache() - - # CORE TRAINING LOOP - self.train() - def test( self, model: Optional[LightningModule] = None, From 07115f43805be5fe48705b743590bfcba058907e Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 14 Jul 2020 06:30:09 -0400 Subject: [PATCH 05/25] fixing val step only --- pytorch_lightning/trainer/trainer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 917b73158c464..85c2dc1f7c0dd 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -1125,7 +1125,6 @@ def run_pretrain_routine(self, model: LightningModule): if self.logger is not None: # save exp to get started self.logger.log_hyperparams(ref_model.hparams) - self.logger.save() if self.use_ddp or self.use_ddp2: From 034860cd6691ed7b147443150a0f7af3a38c67ef Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 14 Jul 2020 06:33:42 -0400 Subject: [PATCH 06/25] fixing val step only --- tests/trainer/test_eval_loop_dict_return.py | 37 ++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/tests/trainer/test_eval_loop_dict_return.py b/tests/trainer/test_eval_loop_dict_return.py index 2d99221d8735a..347e29352d4d1 100644 --- a/tests/trainer/test_eval_loop_dict_return.py +++ b/tests/trainer/test_eval_loop_dict_return.py @@ -73,7 +73,42 @@ def test_validation_step_scalar_return(tmpdir): assert not model.validation_epoch_end_called -test_validation_step_scalar_return('') +def test_validation_step_arbitrary_dict_return(tmpdir): + """ + Test that val step can return a scalar + """ + model = DeterministicModel() + model.training_step = model.training_step_dict_return + model.validation_step = model.validation_step_arbitary_dict_return + model.validation_step_end = None + model.validation_epoch_end = None + + trainer = Trainer( + default_root_dir=tmpdir, + weights_summary=None, + limit_train_batches=2, + limit_val_batches=2 + ) + trainer.fit(model) + + # out are the results of the full loop + # eval_results are output of _evaluate + callback_metrics, eval_results = trainer.run_evaluation(test_mode=False) + assert len(callback_metrics) == 2 + assert len(eval_results) == 2 + assert eval_results[0]['some'] == 171 + assert eval_results[1]['some'] == 171 + + assert eval_results[0]['value'] == 'a' + assert eval_results[1]['value'] == 'a' + + # make sure correct steps were called + assert model.validation_step_called + assert not model.validation_step_end_called + assert not model.validation_epoch_end_called + +test_validation_step_arbitrary_dict_return('') + def training_step_with_step_end(tmpdir): """ From 2023f4dcf471ea1ada63fb215d64e0d2cc5d4729 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 14 Jul 2020 06:40:47 -0400 Subject: [PATCH 07/25] fixing val step only --- tests/base/deterministic_model.py | 2 +- tests/trainer/test_eval_loop_dict_return.py | 40 ++++++++++++++++++--- 2 files changed, 37 insertions(+), 5 deletions(-) diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py index 52aed0e6acdd8..7afd9d6905517 100644 --- a/tests/base/deterministic_model.py +++ b/tests/base/deterministic_model.py @@ -184,7 +184,7 @@ def validation_step_dict_return(self, batch, batch_idx): self.validation_step_called = True acc = self.step(batch, batch_idx) - logs = {'log_acc1': torch.tensor(12).type_as(acc), 'log_acc2': torch.tensor(7).type_as(acc)} + logs = {'log_acc1': torch.tensor(12 + batch_idx).type_as(acc), 'log_acc2': torch.tensor(7).type_as(acc)} pbar = {'pbar_acc1': torch.tensor(17).type_as(acc), 'pbar_acc2': torch.tensor(19).type_as(acc)} return {'val_loss': acc, 'log': logs, 'progress_bar': pbar} diff --git a/tests/trainer/test_eval_loop_dict_return.py b/tests/trainer/test_eval_loop_dict_return.py index 347e29352d4d1..2ec5231fcb710 100644 --- a/tests/trainer/test_eval_loop_dict_return.py +++ b/tests/trainer/test_eval_loop_dict_return.py @@ -4,9 +4,6 @@ from pytorch_lightning import Trainer from tests.base.deterministic_model import DeterministicModel -# train step + val step (no return) -# train step + val step (scalar return) -# train loop + val step (arbitrary dict return) # train loop + val step (structured return) # train loop + val step + val step end # train loop + val step + val step end + val epoch end @@ -107,7 +104,42 @@ def test_validation_step_arbitrary_dict_return(tmpdir): assert not model.validation_step_end_called assert not model.validation_epoch_end_called -test_validation_step_arbitrary_dict_return('') + +def test_validation_step_dict_return(tmpdir): + """ + Test that val step can return a scalar + """ + model = DeterministicModel() + model.training_step = model.training_step_dict_return + model.validation_step = model.validation_step_dict_return + model.validation_step_end = None + model.validation_epoch_end = None + + trainer = Trainer( + default_root_dir=tmpdir, + weights_summary=None, + limit_train_batches=2, + limit_val_batches=2 + ) + trainer.fit(model) + + # out are the results of the full loop + # eval_results are output of _evaluate + callback_metrics, eval_results = trainer.run_evaluation(test_mode=False) + assert len(callback_metrics) == 5 + assert len(eval_results) == 2 + assert eval_results[0]['log']['log_acc1'] == 12 + assert eval_results[1]['log']['log_acc1'] == 13 + + for k in ['val_loss', 'log', 'progress_bar']: + assert k in eval_results[0] + assert k in eval_results[1] + + # ensure all the keys ended up as candidates for callbacks + assert len(trainer.callback_metrics) == 7 + + +test_validation_step_dict_return('') def training_step_with_step_end(tmpdir): From c56acea431bcdcd861f0605ba2eb1ee662e0817a Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 14 Jul 2020 06:47:15 -0400 Subject: [PATCH 08/25] fixing val step only --- tests/trainer/test_eval_loop_dict_return.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/trainer/test_eval_loop_dict_return.py b/tests/trainer/test_eval_loop_dict_return.py index 2ec5231fcb710..8fae348f2befc 100644 --- a/tests/trainer/test_eval_loop_dict_return.py +++ b/tests/trainer/test_eval_loop_dict_return.py @@ -72,7 +72,7 @@ def test_validation_step_scalar_return(tmpdir): def test_validation_step_arbitrary_dict_return(tmpdir): """ - Test that val step can return a scalar + Test that val step can return an arbitrary dict """ model = DeterministicModel() model.training_step = model.training_step_dict_return @@ -107,7 +107,8 @@ def test_validation_step_arbitrary_dict_return(tmpdir): def test_validation_step_dict_return(tmpdir): """ - Test that val step can return a scalar + Test that val step can return a dict with all the expected keys and they end up + in the correct place """ model = DeterministicModel() model.training_step = model.training_step_dict_return From a5098ea8d82af213eaeeaf6313d27ac8a35593e0 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 14 Jul 2020 06:47:21 -0400 Subject: [PATCH 09/25] fixing val step only --- tests/trainer/test_eval_loop_dict_return.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/trainer/test_eval_loop_dict_return.py b/tests/trainer/test_eval_loop_dict_return.py index 8fae348f2befc..18c51768376b8 100644 --- a/tests/trainer/test_eval_loop_dict_return.py +++ b/tests/trainer/test_eval_loop_dict_return.py @@ -4,7 +4,6 @@ from pytorch_lightning import Trainer from tests.base.deterministic_model import DeterministicModel -# train loop + val step (structured return) # train loop + val step + val step end # train loop + val step + val step end + val epoch end # train loop + val step + val epoch end From b4d8b364b810daf01c8d59097b68d4f73bb15bed Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 14 Jul 2020 06:58:38 -0400 Subject: [PATCH 10/25] fixing val step only --- pytorch_lightning/trainer/evaluation_loop.py | 3 +- tests/base/deterministic_model.py | 15 ++- tests/trainer/test_eval_loop_dict_return.py | 133 +++++++------------ 3 files changed, 64 insertions(+), 87 deletions(-) diff --git a/pytorch_lightning/trainer/evaluation_loop.py b/pytorch_lightning/trainer/evaluation_loop.py index c8da00ae22542..2ec67caa9bd15 100644 --- a/pytorch_lightning/trainer/evaluation_loop.py +++ b/pytorch_lightning/trainer/evaluation_loop.py @@ -307,7 +307,8 @@ def _evaluate( self.on_validation_batch_end() # track outputs for collation - dl_outputs.append(output) + if output is not None: + dl_outputs.append(output) outputs.append(dl_outputs) diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py index 7afd9d6905517..31e005c92bc3a 100644 --- a/tests/base/deterministic_model.py +++ b/tests/base/deterministic_model.py @@ -188,9 +188,22 @@ def validation_step_dict_return(self, batch, batch_idx): pbar = {'pbar_acc1': torch.tensor(17).type_as(acc), 'pbar_acc2': torch.tensor(19).type_as(acc)} return {'val_loss': acc, 'log': logs, 'progress_bar': pbar} - def validation_step_end(self, outputs): + def validation_step_end_no_return(self, val_step_output): + assert len(val_step_output) == 3 + assert val_step_output['val_loss'] == 171 + assert val_step_output['log']['log_acc1'] >= 12 + assert val_step_output['progress_bar']['pbar_acc1'] == 17 self.validation_step_end_called = True + def validation_step_end(self, val_step_output): + assert len(val_step_output) == 3 + assert val_step_output['val_loss'] == 171 + assert val_step_output['log']['log_acc1'] >= 12 + assert val_step_output['progress_bar']['pbar_acc1'] == 17 + self.validation_step_end_called = True + + return val_step_output + def validation_epoch_end(self, outputs): self.validation_epoch_end_called = True diff --git a/tests/trainer/test_eval_loop_dict_return.py b/tests/trainer/test_eval_loop_dict_return.py index 18c51768376b8..655c4b1399d2a 100644 --- a/tests/trainer/test_eval_loop_dict_return.py +++ b/tests/trainer/test_eval_loop_dict_return.py @@ -138,113 +138,76 @@ def test_validation_step_dict_return(tmpdir): # ensure all the keys ended up as candidates for callbacks assert len(trainer.callback_metrics) == 7 - -test_validation_step_dict_return('') + # make sure correct steps were called + assert model.validation_step_called + assert not model.validation_step_end_called + assert not model.validation_epoch_end_called -def training_step_with_step_end(tmpdir): +def test_val_step_step_end_no_return(tmpdir): """ - Checks train_step + training_step_end + Test that val step + val step end work """ model = DeterministicModel() - model.training_step = model.training_step_for_step_end_dict - model.training_step_end = model.training_step_end_dict - model.val_dataloader = None + model.training_step = model.training_step_dict_return + model.validation_step = model.validation_step_dict_return + model.validation_step_end = model.validation_step_end_no_return + model.validation_epoch_end = None - trainer = Trainer(fast_dev_run=True, weights_summary=None) + trainer = Trainer( + default_root_dir=tmpdir, + weights_summary=None, + limit_train_batches=2, + limit_val_batches=2 + ) trainer.fit(model) - # make sure correct steps were called - assert model.training_step_called - assert model.training_step_end_called - assert not model.training_epoch_end_called - - # make sure training outputs what is expected - batch_idx, batch = 0, next(iter(model.train_dataloader())) - - out = trainer.run_training_batch(batch, batch_idx) - assert out.signal == 0 - assert out.batch_log_metrics['log_acc1'] == 14.0 - assert out.batch_log_metrics['log_acc2'] == 9.0 + # out are the results of the full loop + # eval_results are output of _evaluate + callback_metrics, eval_results = trainer.run_evaluation(test_mode=False) + assert len(callback_metrics) == 0 + assert len(eval_results) == 0 - train_step_end_out = out.training_step_output_for_epoch_end - pbar_metrics = train_step_end_out['progress_bar'] - assert 'train_step_end' in train_step_end_out - assert pbar_metrics['pbar_acc1'] == 19.0 - assert pbar_metrics['pbar_acc2'] == 21.0 + # make sure correct steps were called + assert model.validation_step_called + assert model.validation_step_end_called + assert not model.validation_epoch_end_called -def test_full_training_loop_dict(tmpdir): +def test_val_step_step_end(tmpdir): """ - Checks train_step + training_step_end + training_epoch_end + Test that val step + val step end work """ model = DeterministicModel() - model.training_step = model.training_step_for_step_end_dict - model.training_step_end = model.training_step_end_dict - model.training_epoch_end = model.training_epoch_end_dict - model.val_dataloader = None + model.training_step = model.training_step_dict_return + model.validation_step = model.validation_step_dict_return + model.validation_step_end = model.validation_step_end + model.validation_epoch_end = None trainer = Trainer( default_root_dir=tmpdir, - max_epochs=1, weights_summary=None, + limit_train_batches=2, + limit_val_batches=2 ) trainer.fit(model) - # make sure correct steps were called - assert model.training_step_called - assert model.training_step_end_called - assert model.training_epoch_end_called - - # assert epoch end metrics were added - assert trainer.callback_metrics['epoch_end_log_1'] == 178 - assert trainer.progress_bar_metrics['epoch_end_pbar_1'] == 234 - - # make sure training outputs what is expected - batch_idx, batch = 0, next(iter(model.train_dataloader())) - - out = trainer.run_training_batch(batch, batch_idx) - assert out.signal == 0 - assert out.batch_log_metrics['log_acc1'] == 14.0 - assert out.batch_log_metrics['log_acc2'] == 9.0 - - train_step_end_out = out.training_step_output_for_epoch_end - pbar_metrics = train_step_end_out['progress_bar'] - assert pbar_metrics['pbar_acc1'] == 19.0 - assert pbar_metrics['pbar_acc2'] == 21.0 - + # out are the results of the full loop + # eval_results are output of _evaluate + callback_metrics, eval_results = trainer.run_evaluation(test_mode=False) + assert len(callback_metrics) == 5 + assert len(eval_results) == 2 + assert eval_results[0]['log']['log_acc1'] == 12 + assert eval_results[1]['log']['log_acc1'] == 13 -def test_train_step_epoch_end(tmpdir): - """ - Checks train_step + training_epoch_end (NO training_step_end) - """ - model = DeterministicModel() - model.training_step = model.training_step_dict_return - model.training_step_end = None - model.training_epoch_end = model.training_epoch_end_dict - model.val_dataloader = None + for k in ['val_loss', 'log', 'progress_bar']: + assert k in eval_results[0] + assert k in eval_results[1] - trainer = Trainer(max_epochs=1, weights_summary=None) - trainer.fit(model) + # ensure all the keys ended up as candidates for callbacks + assert len(trainer.callback_metrics) == 8 # make sure correct steps were called - assert model.training_step_called - assert not model.training_step_end_called - assert model.training_epoch_end_called - - # assert epoch end metrics were added - assert trainer.callback_metrics['epoch_end_log_1'] == 178 - assert trainer.progress_bar_metrics['epoch_end_pbar_1'] == 234 - - # make sure training outputs what is expected - batch_idx, batch = 0, next(iter(model.train_dataloader())) - - out = trainer.run_training_batch(batch, batch_idx) - assert out.signal == 0 - assert out.batch_log_metrics['log_acc1'] == 12.0 - assert out.batch_log_metrics['log_acc2'] == 7.0 - - train_step_end_out = out.training_step_output_for_epoch_end - pbar_metrics = train_step_end_out['progress_bar'] - assert pbar_metrics['pbar_acc1'] == 17.0 - assert pbar_metrics['pbar_acc2'] == 19.0 + assert model.validation_step_called + assert model.validation_step_end_called + assert not model.validation_epoch_end_called From 265eb2df8d7b2e4c18425f2ba712fd1a9333c65b Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 14 Jul 2020 07:23:10 -0400 Subject: [PATCH 11/25] fixing val step only --- tests/base/deterministic_model.py | 11 +++ tests/trainer/test_eval_loop_dict_return.py | 87 ++++++++++++++++++++- 2 files changed, 96 insertions(+), 2 deletions(-) diff --git a/tests/base/deterministic_model.py b/tests/base/deterministic_model.py index 31e005c92bc3a..a4988673c60a4 100644 --- a/tests/base/deterministic_model.py +++ b/tests/base/deterministic_model.py @@ -202,11 +202,22 @@ def validation_step_end(self, val_step_output): assert val_step_output['progress_bar']['pbar_acc1'] == 17 self.validation_step_end_called = True + val_step_output['val_step_end'] = torch.tensor(1802) + return val_step_output def validation_epoch_end(self, outputs): + assert len(outputs) == self.trainer.num_val_batches[0] + + for i, out in enumerate(outputs): + assert out['log']['log_acc1'] >= 12 + i + self.validation_epoch_end_called = True + result = outputs[-1] + result['val_epoch_end'] = torch.tensor(1233) + return result + # ----------------------------- # DATA # ----------------------------- diff --git a/tests/trainer/test_eval_loop_dict_return.py b/tests/trainer/test_eval_loop_dict_return.py index 655c4b1399d2a..381bf0a7c0a07 100644 --- a/tests/trainer/test_eval_loop_dict_return.py +++ b/tests/trainer/test_eval_loop_dict_return.py @@ -195,7 +195,8 @@ def test_val_step_step_end(tmpdir): # out are the results of the full loop # eval_results are output of _evaluate callback_metrics, eval_results = trainer.run_evaluation(test_mode=False) - assert len(callback_metrics) == 5 + assert len(callback_metrics) == 6 + assert callback_metrics['val_step_end'] == 1802 assert len(eval_results) == 2 assert eval_results[0]['log']['log_acc1'] == 12 assert eval_results[1]['log']['log_acc1'] == 13 @@ -205,9 +206,91 @@ def test_val_step_step_end(tmpdir): assert k in eval_results[1] # ensure all the keys ended up as candidates for callbacks - assert len(trainer.callback_metrics) == 8 + assert len(trainer.callback_metrics) == 9 # make sure correct steps were called assert model.validation_step_called assert model.validation_step_end_called assert not model.validation_epoch_end_called + + +def test_no_val_step_end(tmpdir): + """ + Test that val step + val epoch end + """ + model = DeterministicModel() + model.training_step = model.training_step_dict_return + model.validation_step = model.validation_step_dict_return + model.validation_step_end = None + model.validation_epoch_end = model.validation_epoch_end + + trainer = Trainer( + default_root_dir=tmpdir, + weights_summary=None, + limit_train_batches=2, + limit_val_batches=3, + num_sanity_val_steps=0 + ) + trainer.fit(model) + + # out are the results of the full loop + # eval_results are output of _evaluate + callback_metrics, eval_results = trainer.run_evaluation(test_mode=False) + assert len(callback_metrics) == 6 + assert len(eval_results) == 1 + + eval_results = eval_results[0] + assert 'val_step_end' not in eval_results + assert eval_results['val_epoch_end'] == 1233 + + for k in ['val_loss', 'log', 'progress_bar']: + assert k in eval_results + + # ensure all the keys ended up as candidates for callbacks + assert len(trainer.callback_metrics) == 9 + + # make sure correct steps were called + assert model.validation_step_called + assert not model.validation_step_end_called + assert model.validation_epoch_end_called + + +def test_full_val_loop(tmpdir): + """ + Test that val step + val step + val epoch end + """ + model = DeterministicModel() + model.training_step = model.training_step_dict_return + model.validation_step = model.validation_step_dict_return + model.validation_step_end = model.validation_step_end + model.validation_epoch_end = model.validation_epoch_end + + trainer = Trainer( + default_root_dir=tmpdir, + weights_summary=None, + limit_train_batches=2, + limit_val_batches=3, + num_sanity_val_steps=0 + ) + trainer.fit(model) + + # out are the results of the full loop + # eval_results are output of _evaluate + callback_metrics, eval_results = trainer.run_evaluation(test_mode=False) + assert len(callback_metrics) == 7 + assert len(eval_results) == 1 + + eval_results = eval_results[0] + assert eval_results['val_step_end'] == 1802 + assert eval_results['val_epoch_end'] == 1233 + + for k in ['val_loss', 'log', 'progress_bar']: + assert k in eval_results + + # ensure all the keys ended up as candidates for callbacks + assert len(trainer.callback_metrics) == 10 + + # make sure correct steps were called + assert model.validation_step_called + assert model.validation_step_end_called + assert model.validation_epoch_end_called From 9366372e45a3bd552b2fbb6d3d1e5686d0a86709 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 14 Jul 2020 07:23:28 -0400 Subject: [PATCH 12/25] fixing val step only --- tests/trainer/test_eval_loop_dict_return.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/trainer/test_eval_loop_dict_return.py b/tests/trainer/test_eval_loop_dict_return.py index 381bf0a7c0a07..1007dcf24003d 100644 --- a/tests/trainer/test_eval_loop_dict_return.py +++ b/tests/trainer/test_eval_loop_dict_return.py @@ -4,10 +4,6 @@ from pytorch_lightning import Trainer from tests.base.deterministic_model import DeterministicModel -# train loop + val step + val step end -# train loop + val step + val step end + val epoch end -# train loop + val step + val epoch end - def test_validation_step_no_return(tmpdir): """ From 7d38dceeb12caa799fff96959f0b3e4d3ed3369f Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 14 Jul 2020 07:25:54 -0400 Subject: [PATCH 13/25] fixing val step only --- tests/trainer/test_eval_loop_dict_return.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/trainer/test_eval_loop_dict_return.py b/tests/trainer/test_eval_loop_dict_return.py index 1007dcf24003d..9bb301c56734e 100644 --- a/tests/trainer/test_eval_loop_dict_return.py +++ b/tests/trainer/test_eval_loop_dict_return.py @@ -142,7 +142,7 @@ def test_validation_step_dict_return(tmpdir): def test_val_step_step_end_no_return(tmpdir): """ - Test that val step + val step end work + Test that val step + val step end work (with no return in val step end) """ model = DeterministicModel() model.training_step = model.training_step_dict_return @@ -253,7 +253,7 @@ def test_no_val_step_end(tmpdir): def test_full_val_loop(tmpdir): """ - Test that val step + val step + val epoch end + Test that val step + val step end + val epoch end """ model = DeterministicModel() model.training_step = model.training_step_dict_return From 5e7f61be79566da3a77b5c1a5cf61bff8a6e5569 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 14 Jul 2020 10:27:54 -0400 Subject: [PATCH 14/25] fixing val step only --- pytorch_lightning/trainer/evaluation_loop.py | 7 +++++-- pytorch_lightning/trainer/trainer.py | 21 ++++++++++---------- tests/trainer/test_eval_loop_dict_return.py | 21 +++++++++++++------- 3 files changed, 30 insertions(+), 19 deletions(-) diff --git a/pytorch_lightning/trainer/evaluation_loop.py b/pytorch_lightning/trainer/evaluation_loop.py index 2ec67caa9bd15..c21dc20fe50f6 100644 --- a/pytorch_lightning/trainer/evaluation_loop.py +++ b/pytorch_lightning/trainer/evaluation_loop.py @@ -392,7 +392,7 @@ def run_evaluation(self, test_mode: bool = False): eval_results = self._evaluate(self.model, dataloaders, max_batches, test_mode) # enable no returns - callback_metrics = {} + eval_loop_results = [] if eval_results is not None and len(eval_results) > 0: # in eval, the user may return something at every validation step without final reduction @@ -418,6 +418,9 @@ def run_evaluation(self, test_mode: bool = False): # track metrics for callbacks self.callback_metrics.update(callback_metrics) + if len(callback_metrics) > 0: + eval_loop_results.append(callback_metrics) + # hook model.on_post_performance_check() @@ -436,7 +439,7 @@ def run_evaluation(self, test_mode: bool = False): else: self.on_validation_end() - return callback_metrics, eval_results + return eval_loop_results, eval_results def evaluation_forward(self, model, batch, batch_idx, dataloader_idx, test_mode: bool = False): # make dataloader_idx arg in validation_step optional diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 85c2dc1f7c0dd..84ccdf136b248 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -128,7 +128,8 @@ class Trainer( >>> trainer = Trainer(max_epochs=1, progress_bar_refresh_rate=0) >>> trainer.fit(model, train_loader) 1 - >>> trainer.test(model, train_loader) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + >>> test_outputs = trainer.test(model, train_loader) + >>> len(test_outputs)# doctest: +ELLIPSIS +NORMALIZE_WHITESPACE 1 """ DEPRECATED_IN_0_9 = ('use_amp', 'show_progress_bar', 'training_tqdm_dict', 'num_tpu_cores') @@ -1162,18 +1163,18 @@ def run_pretrain_routine(self, model: LightningModule): if self.testing: # only load test dataloader for testing # self.reset_test_dataloader(ref_model) - results, _ = self.run_evaluation(test_mode=True) + eval_loop_results, _ = self.run_evaluation(test_mode=True) - # remove all cuda tensors - if results is not None and isinstance(results, dict) and len(results) > 0: - for k, v in results.items(): - if isinstance(v, torch.Tensor): - results[k] = v.cpu().item() - - return results - else: + if len(eval_loop_results) == 0: return 1 + # remove the tensors from the eval results + for i, result in eval_loop_results: + if isinstance(result, dict): + for k, v in result.items(): + if isinstance(v, torch.Tensor): + result[k] = v.cpu().item() + # check if we should run validation during training self.disable_validation = not (self.is_overridden('validation_step') and self.limit_val_batches > 0) \ and not self.fast_dev_run diff --git a/tests/trainer/test_eval_loop_dict_return.py b/tests/trainer/test_eval_loop_dict_return.py index 9bb301c56734e..ef3a18fa1d979 100644 --- a/tests/trainer/test_eval_loop_dict_return.py +++ b/tests/trainer/test_eval_loop_dict_return.py @@ -48,7 +48,8 @@ def test_validation_step_scalar_return(tmpdir): default_root_dir=tmpdir, weights_summary=None, limit_train_batches=2, - limit_val_batches=2 + limit_val_batches=2, + max_epochs=2 ) trainer.fit(model) @@ -79,7 +80,8 @@ def test_validation_step_arbitrary_dict_return(tmpdir): default_root_dir=tmpdir, weights_summary=None, limit_train_batches=2, - limit_val_batches=2 + limit_val_batches=2, + max_epochs=2 ) trainer.fit(model) @@ -115,7 +117,8 @@ def test_validation_step_dict_return(tmpdir): default_root_dir=tmpdir, weights_summary=None, limit_train_batches=2, - limit_val_batches=2 + limit_val_batches=2, + max_epochs=2 ) trainer.fit(model) @@ -154,7 +157,8 @@ def test_val_step_step_end_no_return(tmpdir): default_root_dir=tmpdir, weights_summary=None, limit_train_batches=2, - limit_val_batches=2 + limit_val_batches=2, + max_epochs=2 ) trainer.fit(model) @@ -184,7 +188,8 @@ def test_val_step_step_end(tmpdir): default_root_dir=tmpdir, weights_summary=None, limit_train_batches=2, - limit_val_batches=2 + limit_val_batches=2, + max_epochs=2 ) trainer.fit(model) @@ -225,7 +230,8 @@ def test_no_val_step_end(tmpdir): weights_summary=None, limit_train_batches=2, limit_val_batches=3, - num_sanity_val_steps=0 + num_sanity_val_steps=0, + max_epochs=2 ) trainer.fit(model) @@ -266,7 +272,8 @@ def test_full_val_loop(tmpdir): weights_summary=None, limit_train_batches=2, limit_val_batches=3, - num_sanity_val_steps=0 + num_sanity_val_steps=0, + max_epochs=2 ) trainer.fit(model) From 5b4b4ed115046078b0d622eeba48554541b70a16 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 14 Jul 2020 10:30:09 -0400 Subject: [PATCH 15/25] fixing val step only --- pytorch_lightning/trainer/trainer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 84ccdf136b248..ae03be6fbf6f7 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -129,8 +129,8 @@ class Trainer( >>> trainer.fit(model, train_loader) 1 >>> test_outputs = trainer.test(model, train_loader) - >>> len(test_outputs)# doctest: +ELLIPSIS +NORMALIZE_WHITESPACE - 1 + >>> len(test_outputs) + 4 """ DEPRECATED_IN_0_9 = ('use_amp', 'show_progress_bar', 'training_tqdm_dict', 'num_tpu_cores') From c65d5a84a70e71154fa75af526caa841b5669cb8 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 14 Jul 2020 10:39:10 -0400 Subject: [PATCH 16/25] fixing val step only --- pytorch_lightning/trainer/trainer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index ae03be6fbf6f7..c7905db586e52 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -128,8 +128,8 @@ class Trainer( >>> trainer = Trainer(max_epochs=1, progress_bar_refresh_rate=0) >>> trainer.fit(model, train_loader) 1 - >>> test_outputs = trainer.test(model, train_loader) - >>> len(test_outputs) + >>> test_outputs = trainer.test(model, train_loader) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + >>> len(test_outputs) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE 4 """ DEPRECATED_IN_0_9 = ('use_amp', 'show_progress_bar', 'training_tqdm_dict', 'num_tpu_cores') From aaa8f7890568ecc870fd2f5e461e2e3fa2cee4ac Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 14 Jul 2020 11:14:00 -0400 Subject: [PATCH 17/25] fixing val step only --- pytorch_lightning/trainer/evaluation_loop.py | 3 ++- pytorch_lightning/trainer/trainer.py | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/trainer/evaluation_loop.py b/pytorch_lightning/trainer/evaluation_loop.py index c21dc20fe50f6..f79c0dc724b56 100644 --- a/pytorch_lightning/trainer/evaluation_loop.py +++ b/pytorch_lightning/trainer/evaluation_loop.py @@ -176,6 +176,7 @@ class TrainerEvaluationLoopMixin(ABC): use_tpu: bool reload_dataloaders_every_epoch: ... tpu_id: int + verbose_test: bool # Callback system on_validation_batch_start: Callable @@ -406,7 +407,7 @@ def run_evaluation(self, test_mode: bool = False): self.add_progress_bar_metrics(prog_bar_metrics) # log results of test - if test_mode and self.is_global_zero: + if test_mode and self.is_global_zero and self.verbose_test: print('-' * 80) print('TEST RESULTS') pprint(callback_metrics) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index c7905db586e52..4b3738dc1d70f 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -128,7 +128,7 @@ class Trainer( >>> trainer = Trainer(max_epochs=1, progress_bar_refresh_rate=0) >>> trainer.fit(model, train_loader) 1 - >>> test_outputs = trainer.test(model, train_loader) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE + >>> test_outputs = trainer.test(model, train_loader, verbose=False) >>> len(test_outputs) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE 4 """ @@ -397,6 +397,9 @@ def __init__( self.test_dataloaders = None self.val_dataloaders = None + # when true, prints test results + self.verbose_test = True + # when .test() is called, it sets this self.tested_ckpt_path = None @@ -1223,7 +1226,8 @@ def test( self, model: Optional[LightningModule] = None, test_dataloaders: Optional[Union[DataLoader, List[DataLoader]]] = None, - ckpt_path: Optional[str] = 'best' + ckpt_path: Optional[str] = 'best', + verbose: bool = True ): r""" @@ -1238,6 +1242,11 @@ def test( ckpt_path: Either ``best`` or path to the checkpoint you wish to test. If ``None``, use the weights from the last epoch to test. Default to ``best``. + verbose: If True, prints the test results + + Returns: + The final test result dictionary. If no test_epoch_end is defined returns a list of dictionaries + Example:: # Option 1 @@ -1277,6 +1286,8 @@ def test( # -------------------- # SETUP HOOK # -------------------- + self.verbose_test = verbose + if self.global_rank != 0: return From 08119511faad583a123fc9c0a9961ef985cba297 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 14 Jul 2020 11:24:57 -0400 Subject: [PATCH 18/25] fixing val step only --- pytorch_lightning/trainer/trainer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 4b3738dc1d70f..d2c3406ef0d7e 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -1178,6 +1178,8 @@ def run_pretrain_routine(self, model: LightningModule): if isinstance(v, torch.Tensor): result[k] = v.cpu().item() + return eval_loop_results + # check if we should run validation during training self.disable_validation = not (self.is_overridden('validation_step') and self.limit_val_batches > 0) \ and not self.fast_dev_run From 4c7c80da4e1fed36216c3f099ea8dc7f73fa20e2 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 14 Jul 2020 11:26:58 -0400 Subject: [PATCH 19/25] fixing val step only --- pytorch_lightning/trainer/trainer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index d2c3406ef0d7e..70e9c0f16dfba 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -129,8 +129,8 @@ class Trainer( >>> trainer.fit(model, train_loader) 1 >>> test_outputs = trainer.test(model, train_loader, verbose=False) - >>> len(test_outputs) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE - 4 + >>> len(test_outputs) + 25 """ DEPRECATED_IN_0_9 = ('use_amp', 'show_progress_bar', 'training_tqdm_dict', 'num_tpu_cores') From f66dec9b2ca65e6012e386544b4b2907d698a030 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 14 Jul 2020 11:36:41 -0400 Subject: [PATCH 20/25] fixing val step only --- pytorch_lightning/trainer/trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 70e9c0f16dfba..1f611ab7ac57c 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -1172,7 +1172,7 @@ def run_pretrain_routine(self, model: LightningModule): return 1 # remove the tensors from the eval results - for i, result in eval_loop_results: + for i, result in enumerate(eval_loop_results): if isinstance(result, dict): for k, v in result.items(): if isinstance(v, torch.Tensor): From 3fb798253c7936e155ea589cae225e5567c19eec Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 14 Jul 2020 11:47:07 -0400 Subject: [PATCH 21/25] fixing val step only --- pytorch_lightning/trainer/evaluation_loop.py | 2 +- tests/trainer/test_dataloaders.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/evaluation_loop.py b/pytorch_lightning/trainer/evaluation_loop.py index f79c0dc724b56..4519d05c74fdf 100644 --- a/pytorch_lightning/trainer/evaluation_loop.py +++ b/pytorch_lightning/trainer/evaluation_loop.py @@ -313,7 +313,7 @@ def _evaluate( outputs.append(dl_outputs) - eval_results = {} + eval_results = outputs # with a single dataloader don't pass an array if len(dataloaders) == 1: diff --git a/tests/trainer/test_dataloaders.py b/tests/trainer/test_dataloaders.py index e76ef0e556352..05c003a766049 100644 --- a/tests/trainer/test_dataloaders.py +++ b/tests/trainer/test_dataloaders.py @@ -115,6 +115,8 @@ def test_multiple_val_dataloader(tmpdir): tpipes.run_prediction(dataloader, trainer.model) +test_multiple_val_dataloader('') + @pytest.mark.parametrize('ckpt_path', [None, 'best', 'specific']) def test_multiple_test_dataloader(tmpdir, ckpt_path): """Verify multiple test_dataloader.""" From aa95e4fdaefbce6a9d4766c781ff47f7c00733f0 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 14 Jul 2020 12:09:44 -0400 Subject: [PATCH 22/25] fixing val step only --- pytorch_lightning/trainer/evaluation_loop.py | 2 +- tests/trainer/test_dataloaders.py | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/pytorch_lightning/trainer/evaluation_loop.py b/pytorch_lightning/trainer/evaluation_loop.py index 4519d05c74fdf..440a4ea4e6ac3 100644 --- a/pytorch_lightning/trainer/evaluation_loop.py +++ b/pytorch_lightning/trainer/evaluation_loop.py @@ -387,7 +387,7 @@ def run_evaluation(self, test_mode: bool = False): # enable disabling validation step with limit_val_batches = 0 should_skip = sum(max_batches) == 0 if should_skip: - return + return [], [] # run evaluation eval_results = self._evaluate(self.model, dataloaders, max_batches, test_mode) diff --git a/tests/trainer/test_dataloaders.py b/tests/trainer/test_dataloaders.py index 05c003a766049..85b706e1dc9a4 100644 --- a/tests/trainer/test_dataloaders.py +++ b/tests/trainer/test_dataloaders.py @@ -115,8 +115,6 @@ def test_multiple_val_dataloader(tmpdir): tpipes.run_prediction(dataloader, trainer.model) -test_multiple_val_dataloader('') - @pytest.mark.parametrize('ckpt_path', [None, 'best', 'specific']) def test_multiple_test_dataloader(tmpdir, ckpt_path): """Verify multiple test_dataloader.""" @@ -297,7 +295,6 @@ def test_dataloaders_with_limit_percent_batches(tmpdir, limit_train_batches, lim ] assert trainer.num_test_batches == expected_test_batches - @pytest.mark.parametrize( ['limit_train_batches', 'limit_val_batches', 'limit_test_batches'], [ From d40c4dd15bb9c224dfb2a7676b6bef640ca7e3f7 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 14 Jul 2020 12:23:55 -0400 Subject: [PATCH 23/25] fixing val step only --- tests/trainer/test_eval_loop_dict_return.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tests/trainer/test_eval_loop_dict_return.py b/tests/trainer/test_eval_loop_dict_return.py index ef3a18fa1d979..d4e845badeb9b 100644 --- a/tests/trainer/test_eval_loop_dict_return.py +++ b/tests/trainer/test_eval_loop_dict_return.py @@ -125,7 +125,8 @@ def test_validation_step_dict_return(tmpdir): # out are the results of the full loop # eval_results are output of _evaluate callback_metrics, eval_results = trainer.run_evaluation(test_mode=False) - assert len(callback_metrics) == 5 + assert len(callback_metrics) == 2 + assert len(callback_metrics[0]) == 5 assert len(eval_results) == 2 assert eval_results[0]['log']['log_acc1'] == 12 assert eval_results[1]['log']['log_acc1'] == 13 @@ -135,7 +136,7 @@ def test_validation_step_dict_return(tmpdir): assert k in eval_results[1] # ensure all the keys ended up as candidates for callbacks - assert len(trainer.callback_metrics) == 7 + assert len(trainer.callback_metrics) == 8 # make sure correct steps were called assert model.validation_step_called @@ -196,7 +197,10 @@ def test_val_step_step_end(tmpdir): # out are the results of the full loop # eval_results are output of _evaluate callback_metrics, eval_results = trainer.run_evaluation(test_mode=False) - assert len(callback_metrics) == 6 + assert len(callback_metrics) == 2 + assert len(callback_metrics[0]) == 6 + + callback_metrics = callback_metrics[0] assert callback_metrics['val_step_end'] == 1802 assert len(eval_results) == 2 assert eval_results[0]['log']['log_acc1'] == 12 @@ -238,7 +242,8 @@ def test_no_val_step_end(tmpdir): # out are the results of the full loop # eval_results are output of _evaluate callback_metrics, eval_results = trainer.run_evaluation(test_mode=False) - assert len(callback_metrics) == 6 + assert len(callback_metrics) == 1 + assert len(callback_metrics[0]) == 6 assert len(eval_results) == 1 eval_results = eval_results[0] @@ -280,7 +285,8 @@ def test_full_val_loop(tmpdir): # out are the results of the full loop # eval_results are output of _evaluate callback_metrics, eval_results = trainer.run_evaluation(test_mode=False) - assert len(callback_metrics) == 7 + assert len(callback_metrics) == 1 + assert len(callback_metrics[0]) == 7 assert len(eval_results) == 1 eval_results = eval_results[0] From 2943f07d834079871acebca9825d60b66d6d94d4 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 14 Jul 2020 12:33:54 -0400 Subject: [PATCH 24/25] fixing val step only --- tests/models/test_test_loop.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/models/test_test_loop.py b/tests/models/test_test_loop.py index 89103116bd8f3..c65809ad25221 100644 --- a/tests/models/test_test_loop.py +++ b/tests/models/test_test_loop.py @@ -21,12 +21,12 @@ def test_single_gpu_test(tmpdir): trainer.fit(model) assert 'ckpt' in trainer.checkpoint_callback.best_model_path results = trainer.test() - assert 'test_acc' in results + assert 'test_acc' in results[0] old_weights = model.c_d1.weight.clone().detach().cpu() results = trainer.test(model) - assert 'test_acc' in results + assert 'test_acc' in results[0] # make sure weights didn't change new_weights = model.c_d1.weight.clone().detach().cpu() @@ -50,12 +50,12 @@ def test_dp_test(tmpdir): trainer.fit(model) assert 'ckpt' in trainer.checkpoint_callback.best_model_path results = trainer.test() - assert 'test_acc' in results + assert 'test_acc' in results[0] old_weights = model.c_d1.weight.clone().detach().cpu() results = trainer.test(model) - assert 'test_acc' in results + assert 'test_acc' in results[0] # make sure weights didn't change new_weights = model.c_d1.weight.clone().detach().cpu() @@ -79,12 +79,12 @@ def test_ddp_spawn_test(tmpdir): trainer.fit(model) assert 'ckpt' in trainer.checkpoint_callback.best_model_path results = trainer.test() - assert 'test_acc' in results + assert 'test_acc' in results[0] old_weights = model.c_d1.weight.clone().detach().cpu() results = trainer.test(model) - assert 'test_acc' in results + assert 'test_acc' in results[0] # make sure weights didn't change new_weights = model.c_d1.weight.clone().detach().cpu() From d8f8977cfe76a1a814d4491c1444f7092be074dc Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 14 Jul 2020 12:43:34 -0400 Subject: [PATCH 25/25] fixing val step only --- tests/models/test_restore.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/models/test_restore.py b/tests/models/test_restore.py index 9331d6c7a540f..244439f7634d7 100644 --- a/tests/models/test_restore.py +++ b/tests/models/test_restore.py @@ -52,7 +52,7 @@ def test_running_test_pretrained_model_distrib_dp(tmpdir): pretrained_model.cpu() # test we have good test accuracy - acc = results['test_acc'] + acc = results[0]['test_acc'] assert acc > 0.5, f"Model failed to get expected {0.5} accuracy. test_acc = {acc}" dataloaders = model.test_dataloader() @@ -102,7 +102,7 @@ def test_running_test_pretrained_model_distrib_ddp_spawn(tmpdir): results = new_trainer.test(pretrained_model) pretrained_model.cpu() - acc = results['test_acc'] + acc = results[0]['test_acc'] assert acc > 0.5, f"Model failed to get expected {0.5} accuracy. test_acc = {acc}" dataloaders = model.test_dataloader()