Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

save_dir fix for MLflowLogger + save_dir tests for others #2502

Merged
merged 37 commits into from
Jul 9, 2020
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
0c0d564
mlflow rework
Jul 4, 2020
2d7223f
logger save_dir
Jul 4, 2020
33a93c2
folder
Jul 4, 2020
5699e82
mlflow
Jul 4, 2020
75a50ff
simplify
Jul 4, 2020
7c1bcc4
fix test
Jul 4, 2020
4a5d81a
add a test for file dir contents
Jul 4, 2020
0ae884d
new line
Jul 4, 2020
4bcc117
changelog
Jul 4, 2020
cbaeb02
docs
Jul 4, 2020
a4279af
Update CHANGELOG.md
awaelchli Jul 4, 2020
c883d46
test for comet logger
awaelchli Jul 5, 2020
986b807
improve mlflow checkpoint test
awaelchli Jul 5, 2020
70d1ede
prevent commet logger error on pytest exit
awaelchli Jul 5, 2020
66b0182
test tensorboard save dir structure
awaelchli Jul 5, 2020
520bbd5
wandb save dir test
awaelchli Jul 5, 2020
1cc0afa
skip test on windows
awaelchli Jul 5, 2020
84f9cf4
Merge branch 'master' into bugfix/mlflow-fixes
awaelchli Jul 5, 2020
f6179b5
Merge branch 'master' into bugfix/mlflow-rework
Jul 6, 2020
a57f7db
add mlflow to pickle tests
Jul 6, 2020
c19d155
wandb
Jul 6, 2020
af8e2ac
code factor
Jul 6, 2020
61eaa71
remove unused imports
Jul 6, 2020
d93d6c9
Merge branch 'master' into bugfix/mlflow-rework
Jul 7, 2020
c98df78
remove unused setter
Jul 7, 2020
4ed9f0d
wandb mock
awaelchli Jul 7, 2020
8d2e46f
Merge remote-tracking branch 'PyTorchLightning/bugfix/mlflow-fixes' i…
awaelchli Jul 7, 2020
ab8c5e2
Merge remote-tracking branch 'original/bugfix/mlflow-fixes' into bugf…
Jul 7, 2020
ae963ff
wip mock
Jul 8, 2020
522838f
wip mock
Jul 8, 2020
7039384
wandb tests with mocking
Jul 8, 2020
66d2600
clean up
Jul 8, 2020
e0794b0
clean up
Jul 8, 2020
fcc7f08
comments
Jul 8, 2020
8773634
include wandblogger in test
Jul 8, 2020
000f531
clean up
Jul 8, 2020
d375056
missing argument
awaelchli Jul 8, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

- Fixed using the same DDP python interpreter and actually running ([#2482](https://github.com/PyTorchLightning/pytorch-lightning/pull/2482))

- Fixed a problem with `MLflowLogger` creating multiple run folders ([#2502](https://github.com/PyTorchLightning/pytorch-lightning/pull/2502))

## [0.8.4] - 2020-07-01

Expand Down
9 changes: 3 additions & 6 deletions pytorch_lightning/callbacks/model_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,12 +239,9 @@ def on_train_start(self, trainer, pl_module):

if trainer.logger is not None:
# weights_save_path overrides anything
if getattr(trainer, 'weights_save_path', None) is not None:
save_dir = trainer.weights_save_path
else:
save_dir = (getattr(trainer.logger, 'save_dir', None)
or getattr(trainer.logger, '_save_dir', None)
or trainer.default_root_dir)
save_dir = (getattr(trainer, 'weights_save_path', None)
or getattr(trainer.logger, 'save_dir', None)
williamFalcon marked this conversation as resolved.
Show resolved Hide resolved
or trainer.default_root_dir)

version = trainer.logger.version if isinstance(
trainer.logger.version, str) else f'version_{trainer.logger.version}'
Expand Down
8 changes: 8 additions & 0 deletions pytorch_lightning/loggers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,14 @@ def close(self) -> None:
"""Do any cleanup that is necessary to close an experiment."""
self.save()

@property
def save_dir(self) -> Optional[str]:
"""
Return the root directory where experiment logs get saved, or `None` if the logger does not
save data locally.
"""
return None

@property
@abstractmethod
def name(self) -> str:
Expand Down
6 changes: 5 additions & 1 deletion pytorch_lightning/loggers/comet.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def __init__(self,
self.api_key = api_key
elif save_dir is not None:
self.mode = "offline"
self.save_dir = save_dir
self._save_dir = save_dir
else:
# If neither api_key nor save_dir are passed as arguments, raise an exception
raise MisconfigurationException("CometLogger requires either api_key or save_dir during initialization.")
Expand Down Expand Up @@ -219,6 +219,10 @@ def finalize(self, status: str) -> None:
self.experiment.end()
self.reset_experiment()

@property
def save_dir(self) -> Optional[str]:
return self._save_dir

@property
def name(self) -> str:
return str(self.experiment.project_name)
Expand Down
81 changes: 54 additions & 27 deletions pytorch_lightning/loggers/mlflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
MLflow
------
"""
import os
from argparse import Namespace
from time import time
from typing import Optional, Dict, Any, Union
Expand All @@ -11,16 +10,20 @@
import mlflow
from mlflow.tracking import MlflowClient
_MLFLOW_AVAILABLE = True
except ImportError: # pragma: no-cover
except ModuleNotFoundError: # pragma: no-cover
awaelchli marked this conversation as resolved.
Show resolved Hide resolved
mlflow = None
MlflowClient = None
_MLFLOW_AVAILABLE = False


from pytorch_lightning import _logger as log
from pytorch_lightning.loggers.base import LightningLoggerBase, rank_zero_experiment
from pytorch_lightning.utilities import rank_zero_only


LOCAL_FILE_URI_PREFIX = "file:"


class MLFlowLogger(LightningLoggerBase):
"""
Log using `MLflow <https://mlflow.org>`_. Install it with pip:
Expand Down Expand Up @@ -52,59 +55,71 @@ class MLFlowLogger(LightningLoggerBase):
Args:
experiment_name: The name of the experiment
tracking_uri: Address of local or remote tracking server.
If not provided, defaults to the service set by ``mlflow.tracking.set_tracking_uri``.
If not provided, defaults to `file:<save_dir>`.
tags: A dictionary tags for the experiment.
save_dir: A path to a local directory where the MLflow runs get saved.
Defaults to `./mlflow` if `tracking_uri` is not provided.
Has no effect if `tracking_uri` is provided.

"""

def __init__(self,
experiment_name: str = 'default',
tracking_uri: Optional[str] = None,
tags: Optional[Dict[str, Any]] = None,
save_dir: Optional[str] = None):
save_dir: Optional[str] = './mlruns'):

if not _MLFLOW_AVAILABLE:
raise ImportError('You want to use `mlflow` logger which is not installed yet,'
' install it with `pip install mlflow`.')
super().__init__()
if not tracking_uri and save_dir:
tracking_uri = f'file:{os.sep * 2}{save_dir}'
self._mlflow_client = MlflowClient(tracking_uri)
self.experiment_name = experiment_name
if not tracking_uri:
tracking_uri = f'{LOCAL_FILE_URI_PREFIX}{save_dir}'

self._experiment_name = experiment_name
self._experiment_id = None
self._tracking_uri = tracking_uri
self._run_id = None
self.tags = tags
self._mlflow_client = MlflowClient(tracking_uri)

@property
@rank_zero_experiment
def experiment(self) -> MlflowClient:
r"""
Actual MLflow object. To use mlflow features in your
Actual MLflow object. To use MLflow features in your
:class:`~pytorch_lightning.core.lightning.LightningModule` do the following.

Example::

self.logger.experiment.some_mlflow_function()

"""
return self._mlflow_client

@property
def run_id(self):
if self._run_id is not None:
return self._run_id

expt = self._mlflow_client.get_experiment_by_name(self.experiment_name)
expt = self._mlflow_client.get_experiment_by_name(self._experiment_name)

if expt:
self._expt_id = expt.experiment_id
self._experiment_id = expt.experiment_id
else:
log.warning(f'Experiment with name {self.experiment_name} not found. Creating it.')
self._expt_id = self._mlflow_client.create_experiment(name=self.experiment_name)
log.warning(f'Experiment with name {self._experiment_name} not found. Creating it.')
self._experiment_id = self._mlflow_client.create_experiment(name=self._experiment_name)

run = self._mlflow_client.create_run(experiment_id=self._expt_id, tags=self.tags)
self._run_id = run.info.run_id
if not self._run_id:
run = self._mlflow_client.create_run(experiment_id=self._experiment_id, tags=self.tags)
self._run_id = run.info.run_id
return self._mlflow_client

@property
def run_id(self):
# create the experiment if it does not exist to get the run id
_ = self.experiment
return self._run_id

@property
def experiment_id(self):
# create the experiment if it does not exist to get the experiment id
_ = self.experiment
return self._experiment_id

@rank_zero_only
def log_hyperparams(self, params: Union[Dict[str, Any], Namespace]) -> None:
params = self._convert_params(params)
Expand All @@ -126,14 +141,26 @@ def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) ->
@rank_zero_only
def finalize(self, status: str = 'FINISHED') -> None:
super().finalize(status)
if status == 'success':
status = 'FINISHED'
self.experiment.set_terminated(self.run_id, status)
status = 'FINISHED' if status == 'success' else status
if self.experiment.get_run(self.run_id):
self.experiment.set_terminated(self.run_id, status)

@property
def save_dir(self) -> Optional[str]:
"""
The root file directory in which MLflow experiments are saved.

Return:
Local path to the root experiment directory if the tracking uri is local.
Otherwhise returns `None`.
"""
if self._tracking_uri.startswith(LOCAL_FILE_URI_PREFIX):
return self._tracking_uri.lstrip(LOCAL_FILE_URI_PREFIX)

@property
def name(self) -> str:
return self.experiment_name
return self.experiment_id

@property
def version(self) -> str:
return self._run_id
return self.run_id
6 changes: 5 additions & 1 deletion pytorch_lightning/loggers/tensorboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def __init__(self,
version: Optional[Union[int, str]] = None,
**kwargs):
super().__init__()
self.save_dir = save_dir
self._save_dir = save_dir
self._name = name
self._version = version

Expand Down Expand Up @@ -82,6 +82,10 @@ def log_dir(self) -> str:
log_dir = os.path.join(self.root_dir, version)
return log_dir

@property
def save_dir(self) -> Optional[str]:
return self._save_dir

@property
@rank_zero_experiment
def experiment(self) -> SummaryWriter:
Expand Down
6 changes: 5 additions & 1 deletion pytorch_lightning/loggers/test_tube.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def __init__(self,
raise ImportError('You want to use `test_tube` logger which is not installed yet,'
' install it with `pip install test-tube`.')
super().__init__()
self.save_dir = save_dir
self._save_dir = save_dir
self._name = name
self.description = description
self.debug = debug
Expand Down Expand Up @@ -141,6 +141,10 @@ def close(self) -> None:
exp = self.experiment
exp.close()

@property
def save_dir(self) -> Optional[str]:
return self._save_dir

@property
def name(self) -> str:
if self._experiment is None:
Expand Down
6 changes: 5 additions & 1 deletion pytorch_lightning/loggers/wandb.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def experiment(self) -> Run:
group=self._group)
# save checkpoints in wandb dir to upload on W&B servers
if self._log_model:
self.save_dir = self._experiment.dir
self._save_dir = self._experiment.dir
return self._experiment

def watch(self, model: nn.Module, log: str = 'gradients', log_freq: int = 100):
Expand All @@ -133,6 +133,10 @@ def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) ->

self.experiment.log({'global_step': step, **metrics} if step is not None else metrics)

@property
def save_dir(self) -> Optional[str]:
return self._save_dir

@property
def name(self) -> Optional[str]:
# don't create an experiment if we don't have one
Expand Down
31 changes: 29 additions & 2 deletions tests/loggers/test_mlflow.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,36 @@
import os

from pytorch_lightning import Trainer
from pytorch_lightning.loggers import MLFlowLogger
from tests.base import EvalModelTemplate


def test_mlflow_logger_exists(tmpdir):
"""Verify that basic functionality of mlflow logger works."""
""" Test launching two independent loggers. """
logger = MLFlowLogger('test', save_dir=tmpdir)
# Test already exists
# same name leads to same experiment id, but different runs get recorded
logger2 = MLFlowLogger('test', save_dir=tmpdir)
assert logger.experiment_id == logger2.experiment_id
assert logger.run_id != logger2.run_id
logger3 = MLFlowLogger('new', save_dir=tmpdir)
assert logger3.experiment_id != logger.experiment_id


def test_mlflow_logger_dirs_creation(tmpdir):
awaelchli marked this conversation as resolved.
Show resolved Hide resolved
""" Test that the logger creates the folders in the right place. """
assert not os.listdir(tmpdir)
logger = MLFlowLogger('test', save_dir=tmpdir)
assert set(os.listdir(tmpdir)) == {'.trash'}
run_id = logger.run_id
exp_id = logger.experiment_id
for i in range(2):
_ = logger.experiment
assert set(os.listdir(tmpdir)) == {'.trash', exp_id}
assert set(os.listdir(tmpdir / exp_id)) == {run_id, 'meta.yaml'}

model = EvalModelTemplate()
trainer = Trainer(logger=logger, max_steps=3, limit_val_batches=3)
trainer.fit(model)
assert set(os.listdir(tmpdir / exp_id)) == {run_id, 'meta.yaml'}
assert 'epoch' in os.listdir(tmpdir / exp_id / run_id / 'metrics')
assert set(os.listdir(tmpdir / exp_id / run_id / 'params')) == model.hparams.keys()