From 9a501ffd9abd25f9610cfcb786f89b7e49f101bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20=C5=81usakowski?= Date: Thu, 7 May 2020 10:45:48 +0200 Subject: [PATCH] Fix NeptuneLogger to work in ddp mode --- pytorch_lightning/loggers/neptune.py | 63 +++++++++++++++++----------- tests/loggers/test_neptune.py | 59 ++++++++++++++++---------- 2 files changed, 76 insertions(+), 46 deletions(-) diff --git a/pytorch_lightning/loggers/neptune.py b/pytorch_lightning/loggers/neptune.py index df1cfeb78f05ba..051dbadd89fe90 100644 --- a/pytorch_lightning/loggers/neptune.py +++ b/pytorch_lightning/loggers/neptune.py @@ -32,7 +32,7 @@ class NeptuneLogger(LightningLoggerBase): The Neptune logger can be used in the online mode or offline (silent) mode. To log experiment data in online mode, :class:`NeptuneLogger` requires an API key. - In offline mode, Neptune will log to a local directory. + In offline mode, the logger does not connect to Neptune. **ONLINE MODE** @@ -83,7 +83,7 @@ class NeptuneLogger(LightningLoggerBase): ... self.logger.experiment.log_artifact('model_checkpoint.pt', ...) ... self.logger.experiment.whatever_neptune_supports(...) - If you want to log objects after the training is finished use ``close_after_train=False``: + If you want to log objects after the training is finished use ``close_after_fit=False``: .. code-block:: python @@ -135,7 +135,7 @@ class NeptuneLogger(LightningLoggerBase): "namespace/project_name" for example "tom/minst-classification". If ``None``, the value of `NEPTUNE_PROJECT` environment variable will be taken. You need to create the project in https://neptune.ai first. - offline_mode: Optional default False. If ``True`` no logs will be sent + offline_mode: Optional default ``False``. If ``True`` no logs will be sent to Neptune. Usually used for debug purposes. close_after_fit: Optional default ``True``. If ``False`` the experiment will not be closed after training and additional metrics, @@ -167,6 +167,7 @@ class NeptuneLogger(LightningLoggerBase): Tags are displayed in the experiment’s Details section and can be viewed in the experiments view as a column. """ + def __init__(self, api_key: Optional[str] = None, project_name: Optional[str] = None, @@ -188,24 +189,20 @@ def __init__(self, self.params = params self.properties = properties self.tags = tags - self._experiment = None self._kwargs = kwargs + self._experiment_id = None + self._experiment = self._create_or_get_experiment() - if offline_mode: - self.mode = 'offline' - neptune.init(project_qualified_name='dry-run/project', - backend=neptune.OfflineBackend()) - else: - self.mode = 'online' - neptune.init(api_token=self.api_key, - project_qualified_name=self.project_name) - - log.info(f'NeptuneLogger was initialized in {self.mode} mode') + log.info(f'NeptuneLogger will work in {"offline" if self.offline_mode else "online"} mode') def __getstate__(self): state = self.__dict__.copy() - # cannot be pickled + + # Experiment cannot be pickled, and additionally its ID cannot be pickled in offline mode state['_experiment'] = None + if self.offline_mode: + state['_experiment_id'] = None + return state @property @@ -220,14 +217,11 @@ def experiment(self) -> Experiment: """ + # Note that even though we initialize self._experiment in __init__, + # it may still end up being None after being pickled and un-pickled if self._experiment is None: - self._experiment = neptune.create_experiment( - name=self.experiment_name, - params=self.params, - properties=self.properties, - tags=self.tags, - upload_source_files=self.upload_source_files, - **self._kwargs) + self._experiment = self._create_or_get_experiment() + return self._experiment @rank_zero_only @@ -261,14 +255,14 @@ def finalize(self, status: str) -> None: @property def name(self) -> str: - if self.mode == 'offline': + if self.offline_mode: return 'offline-name' else: return self.experiment.name @property def version(self) -> str: - if self.mode == 'offline': + if self.offline_mode: return 'offline-id-1234' else: return self.experiment.id @@ -363,3 +357,24 @@ def append_tags(self, tags: Union[str, Iterable[str]]) -> None: if str(tags) == tags: tags = [tags] # make it as an iterable is if it is not yet self.experiment.append_tags(*tags) + + def _create_or_get_experiment(self): + if self.offline_mode: + project = neptune.Session(backend=neptune.OfflineBackend()).get_project('dry-run/project') + else: + session = neptune.Session.with_default_backend(api_token=self.api_key) + project = session.get_project(self.project_name) + + if self._experiment_id is None: + exp = project.create_experiment( + name=self.experiment_name, + params=self.params, + properties=self.properties, + tags=self.tags, + upload_source_files=self.upload_source_files, + **self._kwargs) + else: + exp = project.get_experiments(id=self._experiment_id)[0] + + self._experiment_id = exp.id + return exp diff --git a/tests/loggers/test_neptune.py b/tests/loggers/test_neptune.py index 2ca3eaf513da77..b31b413175e5b4 100644 --- a/tests/loggers/test_neptune.py +++ b/tests/loggers/test_neptune.py @@ -10,53 +10,68 @@ @patch('pytorch_lightning.loggers.neptune.neptune') def test_neptune_online(neptune): - logger = NeptuneLogger(api_key='test', offline_mode=False, project_name='project') - neptune.init.assert_called_once_with(api_token='test', project_qualified_name='project') + logger = NeptuneLogger(api_key='test', project_name='project') - assert logger.name == neptune.create_experiment().name - assert logger.version == neptune.create_experiment().id + created_experiment = neptune.Session.with_default_backend().get_project().create_experiment() + + # It's important to check if the internal variable _experiment was initialized in __init__. + # Calling logger.experiment would cause a side-effect of initializing _experiment, + # if it wasn't already initialized. + assert logger._experiment == created_experiment + assert logger.name == created_experiment.name + assert logger.version == created_experiment.id @patch('pytorch_lightning.loggers.neptune.neptune') -def test_neptune_additional_methods(neptune): +def test_neptune_offline(neptune): logger = NeptuneLogger(offline_mode=True) + neptune.Session.assert_called_once_with(backend=neptune.OfflineBackend()) + assert logger.experiment == neptune.Session().get_project().create_experiment() + + +@patch('pytorch_lightning.loggers.neptune.neptune') +def test_neptune_additional_methods(neptune): + logger = NeptuneLogger(api_key='test', project_name='project') + + created_experiment = neptune.Session.with_default_backend().get_project().create_experiment() + logger.log_metric('test', torch.ones(1)) - neptune.create_experiment().log_metric.assert_called_once_with('test', torch.ones(1)) - neptune.create_experiment().log_metric.reset_mock() + created_experiment.log_metric.assert_called_once_with('test', torch.ones(1)) + created_experiment.log_metric.reset_mock() logger.log_metric('test', 1.0) - neptune.create_experiment().log_metric.assert_called_once_with('test', 1.0) - neptune.create_experiment().log_metric.reset_mock() + created_experiment.log_metric.assert_called_once_with('test', 1.0) + created_experiment.log_metric.reset_mock() logger.log_metric('test', 1.0, step=2) - neptune.create_experiment().log_metric.assert_called_once_with('test', x=2, y=1.0) - neptune.create_experiment().log_metric.reset_mock() + created_experiment.log_metric.assert_called_once_with('test', x=2, y=1.0) + created_experiment.log_metric.reset_mock() logger.log_text('test', 'text') - neptune.create_experiment().log_metric.assert_called_once_with('test', 'text') - neptune.create_experiment().log_metric.reset_mock() + created_experiment.log_metric.assert_called_once_with('test', 'text') + created_experiment.log_metric.reset_mock() logger.log_image('test', 'image file') - neptune.create_experiment().log_image.assert_called_once_with('test', 'image file') - neptune.create_experiment().log_image.reset_mock() + created_experiment.log_image.assert_called_once_with('test', 'image file') + created_experiment.log_image.reset_mock() logger.log_image('test', 'image file', step=2) - neptune.create_experiment().log_image.assert_called_once_with('test', x=2, y='image file') - neptune.create_experiment().log_image.reset_mock() + created_experiment.log_image.assert_called_once_with('test', x=2, y='image file') + created_experiment.log_image.reset_mock() logger.log_artifact('file') - neptune.create_experiment().log_artifact.assert_called_once_with('file', None) + created_experiment.log_artifact.assert_called_once_with('file', None) logger.set_property('property', 10) - neptune.create_experiment().set_property.assert_called_once_with('property', 10) + created_experiment.set_property.assert_called_once_with('property', 10) logger.append_tags('one tag') - neptune.create_experiment().append_tags.assert_called_once_with('one tag') - neptune.create_experiment().append_tags.reset_mock() + created_experiment.append_tags.assert_called_once_with('one tag') + created_experiment.append_tags.reset_mock() logger.append_tags(['two', 'tags']) - neptune.create_experiment().append_tags.assert_called_once_with('two', 'tags') + created_experiment.append_tags.assert_called_once_with('two', 'tags') def test_neptune_leave_open_experiment_after_fit(tmpdir):