diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index b6cdbb0cf130df..70a6940588c7ba 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -371,10 +371,12 @@ def __init__( ' val and test loop using a single batch') # set default save path if user didn't provide one - self.default_root_dir = default_root_dir - - if self.default_root_dir is None: + if default_root_dir is None: self.default_root_dir = os.getcwd() + else: + # we have to do str() because the unit tests violate type annotation and pass path objecto + self.default_root_dir = str(default_root_dir) + # training bookeeping self.total_batch_idx = 0 diff --git a/pytorch_lightning/trainer/training_io.py b/pytorch_lightning/trainer/training_io.py index 39e45a134a4cb4..f85ba273bf6d1c 100644 --- a/pytorch_lightning/trainer/training_io.py +++ b/pytorch_lightning/trainer/training_io.py @@ -375,7 +375,7 @@ def restore_hpc_weights_if_needed(self, model: LightningModule): did_restore = False # look for hpc weights - folderpath = self.weights_save_path + folderpath = str(self.weights_save_path) if gfile.exists(folderpath): files = gfile.listdir(folderpath) hpc_weight_paths = [x for x in files if 'hpc_ckpt' in x] @@ -452,6 +452,7 @@ def restore_training_state(self, checkpoint): # ---------------------------------- def hpc_save(self, folderpath: str, logger): # make sure the checkpoint folder exists + folderpath = str(folderpath) # because the tests pass a path object if not gfile.exists(folderpath): gfile.makedirs(folderpath) @@ -511,7 +512,7 @@ def hpc_load(self, folderpath, on_gpu): log.info(f'restored hpc model from: {filepath}') def max_ckpt_in_folder(self, path, name_key='ckpt_'): - files = gfile.listdir(path) + files = gfile.listdir(str(path)) files = [x for x in files if name_key in x] if len(files) == 0: return 0 diff --git a/tests/loggers/test_all.py b/tests/loggers/test_all.py index a6d0f683516e11..f8a8fead41f586 100644 --- a/tests/loggers/test_all.py +++ b/tests/loggers/test_all.py @@ -46,7 +46,7 @@ def log_metrics(self, metrics, step): super().log_metrics(metrics, step) self.history.append((step, metrics)) - logger_args = _get_logger_args(logger_class, str(tmpdir)) + logger_args = _get_logger_args(logger_class, tmpdir) logger = StoreHistoryLogger(**logger_args) trainer = Trainer( @@ -82,7 +82,7 @@ def test_loggers_pickle(tmpdir, monkeypatch, logger_class): import atexit monkeypatch.setattr(atexit, 'register', lambda _: None) - logger_args = _get_logger_args(logger_class, str(tmpdir)) + logger_args = _get_logger_args(logger_class, tmpdir) logger = logger_class(**logger_args) # test pickling loggers @@ -109,7 +109,7 @@ def test_logger_reset_correctly(tmpdir, extra_params): model = EvalModelTemplate() trainer = Trainer( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, **extra_params ) logger1 = trainer.logger diff --git a/tests/models/test_cpu.py b/tests/models/test_cpu.py index 0656723b3f334c..051db81d1b1657 100644 --- a/tests/models/test_cpu.py +++ b/tests/models/test_cpu.py @@ -19,7 +19,7 @@ def test_cpu_slurm_save_load(tmpdir): model = EvalModelTemplate(**hparams) # logger file to get meta - logger = tutils.get_default_logger(str(tmpdir)) + logger = tutils.get_default_logger(tmpdir) version = logger.version # fit model @@ -28,7 +28,7 @@ def test_cpu_slurm_save_load(tmpdir): logger=logger, train_percent_check=0.2, val_percent_check=0.2, - checkpoint_callback=ModelCheckpoint(str(tmpdir)), + checkpoint_callback=ModelCheckpoint(tmpdir) ) result = trainer.fit(model) real_global_step = trainer.global_step @@ -54,13 +54,17 @@ def test_cpu_slurm_save_load(tmpdir): # test HPC saving # simulate snapshot on slurm - saved_filepath = trainer.hpc_save(str(tmpdir), logger) + saved_filepath = trainer.hpc_save(tmpdir, logger) assert os.path.exists(saved_filepath) # new logger file to get meta - logger = tutils.get_default_logger(str(tmpdir), version=version) + logger = tutils.get_default_logger(tmpdir, version=version) - trainer = Trainer(max_epochs=1, logger=logger, checkpoint_callback=ModelCheckpoint(str(tmpdir)),) + trainer = Trainer( + max_epochs=1, + logger=logger, + checkpoint_callback=ModelCheckpoint(tmpdir), + ) model = EvalModelTemplate(**hparams) # set the epoch start hook so we can predict before the model does the full training @@ -83,7 +87,7 @@ def test_early_stopping_cpu_model(tmpdir): """Test each of the trainer options.""" stopping = EarlyStopping(monitor='val_loss', min_delta=0.1) trainer_options = dict( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, early_stop_callback=stopping, max_epochs=2, gradient_clip_val=1.0, @@ -112,7 +116,7 @@ def test_multi_cpu_model_ddp(tmpdir): tutils.set_random_master_port() trainer_options = dict( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, progress_bar_refresh_rate=0, max_epochs=1, train_percent_check=0.4, @@ -129,7 +133,7 @@ def test_multi_cpu_model_ddp(tmpdir): def test_lbfgs_cpu_model(tmpdir): """Test each of the trainer options.""" trainer_options = dict( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, max_epochs=1, progress_bar_refresh_rate=0, weights_summary='top', @@ -148,7 +152,7 @@ def test_lbfgs_cpu_model(tmpdir): def test_default_logger_callbacks_cpu_model(tmpdir): """Test each of the trainer options.""" trainer_options = dict( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, max_epochs=1, gradient_clip_val=1.0, overfit_pct=0.20, @@ -170,14 +174,14 @@ def test_running_test_after_fitting(tmpdir): model = EvalModelTemplate() # logger file to get meta - logger = tutils.get_default_logger(str(tmpdir)) + logger = tutils.get_default_logger(tmpdir) # logger file to get weights checkpoint = tutils.init_checkpoint_callback(logger) # fit model trainer = Trainer( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, progress_bar_refresh_rate=0, max_epochs=2, train_percent_check=0.4, @@ -201,7 +205,7 @@ def test_running_test_no_val(tmpdir): model = EvalModelTemplate() # logger file to get meta - logger = tutils.get_default_logger(str(tmpdir)) + logger = tutils.get_default_logger(tmpdir) # logger file to get weights checkpoint = tutils.init_checkpoint_callback(logger) @@ -280,7 +284,7 @@ def test_simple_cpu(tmpdir): # fit model trainer = Trainer( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, max_epochs=1, val_percent_check=0.1, train_percent_check=0.1, @@ -294,7 +298,7 @@ def test_simple_cpu(tmpdir): def test_cpu_model(tmpdir): """Make sure model trains on CPU.""" trainer_options = dict( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, progress_bar_refresh_rate=0, max_epochs=1, train_percent_check=0.4, @@ -309,7 +313,7 @@ def test_cpu_model(tmpdir): def test_all_features_cpu_model(tmpdir): """Test each of the trainer options.""" trainer_options = dict( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, gradient_clip_val=1.0, overfit_pct=0.20, track_grad_norm=2, @@ -383,7 +387,7 @@ def train_dataloader(self): # fit model trainer = Trainer( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, max_epochs=1, truncated_bptt_steps=truncated_bptt_steps, val_percent_check=0, diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index 90f543bc2fb1c2..80249a727ccbbd 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -18,7 +18,7 @@ def test_single_gpu_model(tmpdir, gpus): """Make sure single GPU works (DP mode).""" trainer_options = dict( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, progress_bar_refresh_rate=0, max_epochs=1, train_percent_check=0.1, @@ -38,7 +38,7 @@ def test_multi_gpu_model(tmpdir, backend): tutils.set_random_master_port() trainer_options = dict( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, max_epochs=1, train_percent_check=0.4, val_percent_check=0.2, @@ -84,7 +84,7 @@ def test_ddp_all_dataloaders_passed_to_fit(tmpdir): def test_multi_gpu_none_backend(tmpdir): """Make sure when using multiple GPUs the user can't use `distributed_backend = None`.""" trainer_options = dict( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, progress_bar_refresh_rate=0, max_epochs=1, train_percent_check=0.1, diff --git a/tests/trainer/test_lr_finder.py b/tests/trainer/test_lr_finder.py index c635d87788363b..d0becff0918c65 100755 --- a/tests/trainer/test_lr_finder.py +++ b/tests/trainer/test_lr_finder.py @@ -14,7 +14,7 @@ def test_error_on_more_than_1_optimizer(tmpdir): # logger file to get meta trainer = Trainer( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, max_epochs=1 ) @@ -29,7 +29,7 @@ def test_model_reset_correctly(tmpdir): # logger file to get meta trainer = Trainer( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, max_epochs=1 ) @@ -51,7 +51,7 @@ def test_trainer_reset_correctly(tmpdir): # logger file to get meta trainer = Trainer( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, max_epochs=1 ) @@ -81,7 +81,7 @@ def test_trainer_arg_bool(tmpdir): # logger file to get meta trainer = Trainer( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, max_epochs=2, auto_lr_find=True ) @@ -100,7 +100,7 @@ def test_trainer_arg_str(tmpdir): before_lr = model.my_fancy_lr # logger file to get meta trainer = Trainer( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, max_epochs=2, auto_lr_find='my_fancy_lr' ) @@ -120,7 +120,7 @@ def test_call_to_trainer_method(tmpdir): before_lr = hparams.get('learning_rate') # logger file to get meta trainer = Trainer( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, max_epochs=2, ) @@ -144,7 +144,7 @@ def test_accumulation_and_early_stopping(tmpdir): before_lr = hparams.get('learning_rate') # logger file to get meta trainer = Trainer( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, accumulate_grad_batches=2, ) @@ -167,7 +167,7 @@ def test_suggestion_parameters_work(tmpdir): # logger file to get meta trainer = Trainer( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, max_epochs=3, ) @@ -187,7 +187,7 @@ def test_suggestion_with_non_finite_values(tmpdir): # logger file to get meta trainer = Trainer( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, max_epochs=3 ) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 0c1063ea4ca8ee..f391260c139f42 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -30,12 +30,12 @@ def test_no_val_module(monkeypatch, tmpdir, tmpdir_server, url_ckpt): model = EvalModelTemplate() # logger file to get meta - logger = tutils.get_default_logger(str(tmpdir)) + logger = tutils.get_default_logger(tmpdir) trainer = Trainer( max_epochs=1, logger=logger, - checkpoint_callback=ModelCheckpoint(str(tmpdir)) + checkpoint_callback=ModelCheckpoint(tmpdir) ) # fit model result = trainer.fit(model) @@ -51,7 +51,7 @@ def test_no_val_module(monkeypatch, tmpdir, tmpdir_server, url_ckpt): assert LightningModule.CHECKPOINT_KEY_HYPER_PARAMS in ckpt.keys(), 'module_arguments missing from checkpoints' # load new model - hparams_path = tutils.get_data_path(logger, path_dir=str(tmpdir)) + hparams_path = tutils.get_data_path(logger, path_dir=tmpdir) hparams_path = os.path.join(hparams_path, 'hparams.yaml') ckpt_path = f'http://{tmpdir_server[0]}:{tmpdir_server[1]}/{os.path.basename(new_weights_path)}' if url_ckpt else new_weights_path model_2 = EvalModelTemplate.load_from_checkpoint( @@ -65,18 +65,18 @@ def test_no_val_module(monkeypatch, tmpdir, tmpdir_server, url_ckpt): def test_no_val_end_module(monkeypatch, tmpdir, tmpdir_server, url_ckpt): """Tests use case where trainer saves the model, and user loads it from tags independently.""" # set $TORCH_HOME, which determines torch hub's cache path, to tmpdir - monkeypatch.setenv('TORCH_HOME', str(tmpdir)) + monkeypatch.setenv('TORCH_HOME', tmpdir) model = EvalModelTemplate() # logger file to get meta - logger = tutils.get_default_logger(str(tmpdir)) + logger = tutils.get_default_logger(tmpdir) # fit model trainer = Trainer( max_epochs=1, logger=logger, - checkpoint_callback=ModelCheckpoint(str(tmpdir)) + checkpoint_callback=ModelCheckpoint(tmpdir) ) result = trainer.fit(model) @@ -88,7 +88,7 @@ def test_no_val_end_module(monkeypatch, tmpdir, tmpdir_server, url_ckpt): trainer.save_checkpoint(new_weights_path) # load new model - hparams_path = tutils.get_data_path(logger, path_dir=str(tmpdir)) + hparams_path = tutils.get_data_path(logger, path_dir=tmpdir) hparams_path = os.path.join(hparams_path, 'hparams.yaml') ckpt_path = f'http://{tmpdir_server[0]}:{tmpdir_server[1]}/{os.path.basename(new_weights_path)}' if url_ckpt else new_weights_path model_2 = EvalModelTemplate.load_from_checkpoint( @@ -163,7 +163,7 @@ def _optimizer_step(self, epoch, batch_idx, optimizer, train_percent_check=0.1, val_percent_check=0.1, max_epochs=2, - default_root_dir=str(tmpdir)) + default_root_dir=tmpdir) # for the test trainer.optimizer_step = _optimizer_step @@ -179,13 +179,13 @@ def test_loading_meta_tags(tmpdir): hparams = EvalModelTemplate.get_default_hparams() # save tags - logger = tutils.get_default_logger(str(tmpdir)) + logger = tutils.get_default_logger(tmpdir) logger.log_hyperparams(Namespace(some_str='a_str', an_int=1, a_float=2.0)) logger.log_hyperparams(hparams) logger.save() # load hparams - path_expt_dir = tutils.get_data_path(logger, path_dir=str(tmpdir)) + path_expt_dir = tutils.get_data_path(logger, path_dir=tmpdir) hparams_path = os.path.join(path_expt_dir, TensorBoardLogger.NAME_HPARAMS_FILE) hparams = load_hparams_from_yaml(hparams_path) @@ -204,13 +204,13 @@ def test_loading_yaml(tmpdir): hparams = EvalModelTemplate.get_default_hparams() # save tags - logger = tutils.get_default_logger(str(tmpdir)) + logger = tutils.get_default_logger(tmpdir) logger.log_hyperparams(Namespace(some_str='a_str', an_int=1, a_float=2.0)) logger.log_hyperparams(hparams) logger.save() # load hparams - path_expt_dir = tutils.get_data_path(logger, path_dir=str(tmpdir)) + path_expt_dir = tutils.get_data_path(logger, path_dir=tmpdir) hparams_path = os.path.join(path_expt_dir, 'hparams.yaml') tags = load_hparams_from_yaml(hparams_path) @@ -262,7 +262,7 @@ def mock_save_function(filepath, *args): # simulated losses losses = [10, 9, 2.8, 5, 2.5] - checkpoint_callback = ModelCheckpoint(str(tmpdir), save_top_k=save_top_k, save_last=save_last, + checkpoint_callback = ModelCheckpoint(tmpdir, save_top_k=save_top_k, save_last=save_last, prefix=file_prefix, verbose=1) checkpoint_callback.save_function = mock_save_function trainer = Trainer() @@ -291,7 +291,7 @@ def test_model_checkpoint_only_weights(tmpdir): trainer = Trainer( max_epochs=1, - checkpoint_callback=ModelCheckpoint(str(tmpdir), save_weights_only=True) + checkpoint_callback=ModelCheckpoint(tmpdir, save_weights_only=True) ) # fit model result = trainer.fit(model) @@ -367,8 +367,8 @@ def increment_on_load_checkpoint(self, _): max_epochs=2, train_percent_check=0.65, val_percent_check=1, - checkpoint_callback=ModelCheckpoint(str(tmpdir), save_top_k=-1), - default_root_dir=str(tmpdir), + checkpoint_callback=ModelCheckpoint(tmpdir, save_top_k=-1), + default_root_dir=tmpdir, early_stop_callback=False, val_check_interval=1., ) @@ -423,7 +423,7 @@ def test_trainer_max_steps_and_epochs(tmpdir): # define less train steps than epochs trainer_options.update( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, max_epochs=3, max_steps=num_train_samples + 10 ) @@ -458,7 +458,7 @@ def test_trainer_min_steps_and_epochs(tmpdir): # define callback for stopping the model and default epochs trainer_options.update( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, early_stop_callback=EarlyStopping(monitor='val_loss', min_delta=1.0), val_check_interval=2, min_epochs=1, @@ -501,7 +501,7 @@ def test_benchmark_option(tmpdir): # fit model trainer = Trainer( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, max_epochs=1, benchmark=True, ) @@ -609,7 +609,7 @@ def training_step(self, batch, batch_idx, optimizer_idx=None): # fit model trainer = Trainer( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, max_steps=(model.test_batch_inf_loss + 1), terminate_on_nan=True ) @@ -634,7 +634,7 @@ def on_after_backward(self): model = CurrentModel() trainer = Trainer( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, max_steps=(model.test_batch_nan + 1), terminate_on_nan=True ) @@ -669,7 +669,7 @@ def on_batch_start(self, trainer, pl_module): train_percent_check=0.2, progress_bar_refresh_rate=0, logger=False, - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, ) assert not trainer.interrupted trainer.fit(model) @@ -693,7 +693,7 @@ def _optimizer_step(*args, **kwargs): max_steps=1, max_epochs=1, gradient_clip_val=1.0, - default_root_dir=str(tmpdir) + default_root_dir=tmpdir ) # for the test @@ -705,7 +705,7 @@ def _optimizer_step(*args, **kwargs): def test_gpu_choice(tmpdir): trainer_options = dict( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, ) # Only run if CUDA is available if not torch.cuda.is_available(): @@ -849,7 +849,7 @@ def __init__(self, **kwargs): def test_trainer_pickle(tmpdir): trainer = Trainer( max_epochs=1, - default_root_dir=str(tmpdir) + default_root_dir=tmpdir ) pickle.dumps(trainer) cloudpickle.dumps(trainer) diff --git a/tests/trainer/test_trainer_tricks.py b/tests/trainer/test_trainer_tricks.py index 5f301aa8e0ced3..973ed32e7cd927 100755 --- a/tests/trainer/test_trainer_tricks.py +++ b/tests/trainer/test_trainer_tricks.py @@ -15,7 +15,7 @@ def test_model_reset_correctly(tmpdir): # logger file to get meta trainer = Trainer( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, max_epochs=1 ) @@ -38,7 +38,7 @@ def test_trainer_reset_correctly(tmpdir): # logger file to get meta trainer = Trainer( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, max_epochs=1 ) @@ -77,7 +77,7 @@ def test_trainer_arg(tmpdir, scale_arg): before_batch_size = hparams.get('batch_size') # logger file to get meta trainer = Trainer( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, max_epochs=1, auto_scale_batch_size=scale_arg, ) @@ -99,7 +99,7 @@ def test_call_to_trainer_method(tmpdir, scale_method): before_batch_size = hparams.get('batch_size') # logger file to get meta trainer = Trainer( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, max_epochs=1, ) @@ -118,7 +118,7 @@ def test_error_on_dataloader_passed_to_fit(tmpdir): # only train passed to fit model = EvalModelTemplate() trainer = Trainer( - default_root_dir=str(tmpdir), + default_root_dir=tmpdir, max_epochs=1, val_percent_check=0.1, train_percent_check=0.2,