diff --git a/pytorch_lightning/trainer/distrib_data_parallel.py b/pytorch_lightning/trainer/distrib_data_parallel.py index 83bfcd29e8d3a..3af1354a5963f 100644 --- a/pytorch_lightning/trainer/distrib_data_parallel.py +++ b/pytorch_lightning/trainer/distrib_data_parallel.py @@ -427,7 +427,13 @@ def ddp_train(self, process_idx, model, is_master=False, proc_offset=0): # try to init for 20 times at max in case ports are taken # where to store ip_table model.trainer = self + print('-'*100) + print('starting ddp') + print('-'*100) model.init_ddp_connection(self.proc_rank, self.world_size, self.is_slurm_managing_tasks) + print('-'*100) + print('ddp started') + print('-'*100) # CHOOSE OPTIMIZER # allow for lr schedulers as well