Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Missing training_step outputs in training_epoch_end #2327

Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions pytorch_lightning/trainer/training_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,8 +460,9 @@ def run_training_epoch(self):

# only track outputs when user implements training_epoch_end
# otherwise we will build up unnecessary memory
# combine all batch_outputs
if self.is_overridden('training_epoch_end', model=self.get_model()):
outputs.append(batch_output)
outputs = outputs + batch_output

# when returning -1 from train_step, we end epoch early
early_stop_epoch = batch_result == -1
Expand Down Expand Up @@ -553,6 +554,9 @@ def run_training_batch(self, batch, batch_idx):
# track metrics to log
all_log_metrics = []

# bookkeeping all split_batch and optimizer iteration batch outputs
all_batch_outputs = []

if batch is None:
return 0, grad_norm_dic, {}, {}

Expand Down Expand Up @@ -638,6 +642,9 @@ def optimizer_closure():
# calculate loss
loss, batch_output = optimizer_closure()

# track batch_output
all_batch_outputs.append(batch_output)

# check if loss or model weights are nan
if self.terminate_on_nan:
self.detect_nan_tensors(loss)
Expand Down Expand Up @@ -688,7 +695,7 @@ def optimizer_closure():
# track all metrics for callbacks
self.callback_metrics.update({k: v for d in all_callback_metrics for k, v in d.items()})

return 0, grad_norm_dic, all_log_metrics, batch_output
return 0, grad_norm_dic, all_log_metrics, all_batch_outputs

def _get_optimizers_iterable(self):
if not self.optimizer_frequencies:
Expand Down