From 9b742d27a9c8c4f59bd28b7bb25a8a611ef0931c Mon Sep 17 00:00:00 2001 From: William Falcon Date: Mon, 8 Jun 2020 12:52:15 -0400 Subject: [PATCH] training batch clean up --- pytorch_lightning/trainer/distrib_data_parallel.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pytorch_lightning/trainer/distrib_data_parallel.py b/pytorch_lightning/trainer/distrib_data_parallel.py index 83bfcd29e8d3a..3af1354a5963f 100644 --- a/pytorch_lightning/trainer/distrib_data_parallel.py +++ b/pytorch_lightning/trainer/distrib_data_parallel.py @@ -427,7 +427,13 @@ def ddp_train(self, process_idx, model, is_master=False, proc_offset=0): # try to init for 20 times at max in case ports are taken # where to store ip_table model.trainer = self + print('-'*100) + print('starting ddp') + print('-'*100) model.init_ddp_connection(self.proc_rank, self.world_size, self.is_slurm_managing_tasks) + print('-'*100) + print('ddp started') + print('-'*100) # CHOOSE OPTIMIZER # allow for lr schedulers as well