diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e800c28964ff..bf8d002bce0e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added SyncBN for DDP ([#2801](https://github.com/PyTorchLightning/pytorch-lightning/pull/2801)) +- Added basic `CSVLogger` ([#2721](https://github.com/PyTorchLightning/pytorch-lightning/pull/2721)) + - Added SSIM metrics ([#2671](https://github.com/PyTorchLightning/pytorch-lightning/pull/2671)) - Added BLEU metrics ([#2535](https://github.com/PyTorchLightning/pytorch-lightning/pull/2535)) diff --git a/docs/source/loggers.rst b/docs/source/loggers.rst index 1877e9f3eff5a..e04ba1af5ca1c 100644 --- a/docs/source/loggers.rst +++ b/docs/source/loggers.rst @@ -339,4 +339,10 @@ Test-tube ^^^^^^^^^ .. autoclass:: pytorch_lightning.loggers.test_tube.TestTubeLogger + :noindex: + +CSVLogger +^^^^^^^^^ + +.. autoclass:: pytorch_lightning.loggers.csv_logs.CSVLogger :noindex: \ No newline at end of file diff --git a/pytorch_lightning/core/saving.py b/pytorch_lightning/core/saving.py index 5e3ef1d97236d..37c63de1804ab 100644 --- a/pytorch_lightning/core/saving.py +++ b/pytorch_lightning/core/saving.py @@ -313,7 +313,7 @@ def load_hparams_from_yaml(config_yaml: str) -> Dict[str, Any]: return {} with open(config_yaml) as fp: - tags = yaml.load(fp, Loader=yaml.SafeLoader) + tags = yaml.load(fp) return tags diff --git a/pytorch_lightning/loggers/__init__.py b/pytorch_lightning/loggers/__init__.py index daa2b99bb80c6..5f2f3044d0a65 100644 --- a/pytorch_lightning/loggers/__init__.py +++ b/pytorch_lightning/loggers/__init__.py @@ -2,11 +2,14 @@ from pytorch_lightning.loggers.base import LightningLoggerBase, LoggerCollection from pytorch_lightning.loggers.tensorboard import TensorBoardLogger +from pytorch_lightning.loggers.csv_logs import CSVLogger + __all__ = [ 'LightningLoggerBase', 'LoggerCollection', 'TensorBoardLogger', + 'CSVLogger', ] try: diff --git a/pytorch_lightning/loggers/csv_logs.py b/pytorch_lightning/loggers/csv_logs.py new file mode 100644 index 0000000000000..1e395abadb293 --- /dev/null +++ b/pytorch_lightning/loggers/csv_logs.py @@ -0,0 +1,204 @@ +""" +CSV logger +---------- + +CSV logger for basic experiment logging that does not require opening ports + +""" +import io +import os +import csv +import torch +from argparse import Namespace +from typing import Optional, Dict, Any, Union + +from pytorch_lightning import _logger as log +from pytorch_lightning.core.saving import save_hparams_to_yaml +from pytorch_lightning.loggers.base import LightningLoggerBase +from pytorch_lightning.utilities.distributed import rank_zero_warn, rank_zero_only + + +class ExperimentWriter(object): + r""" + Experiment writer for CSVLogger. + + Currently supports to log hyperparameters and metrics in YAML and CSV + format, respectively. + + Args: + log_dir: Directory for the experiment logs + """ + + NAME_HPARAMS_FILE = 'hparams.yaml' + NAME_METRICS_FILE = 'metrics.csv' + + def __init__(self, log_dir: str) -> None: + self.hparams = {} + self.metrics = [] + + self.log_dir = log_dir + if os.path.exists(self.log_dir): + rank_zero_warn( + f"Experiment logs directory {self.log_dir} exists and is not empty." + " Previous log files in this directory will be deleted when the new ones are saved!" + ) + os.makedirs(self.log_dir, exist_ok=True) + + self.metrics_file_path = os.path.join(self.log_dir, self.NAME_METRICS_FILE) + + def log_hparams(self, params: Dict[str, Any]) -> None: + """Record hparams""" + self.hparams.update(params) + + def log_metrics(self, metrics_dict: Dict[str, float], step: Optional[int] = None) -> None: + """Record metrics""" + def _handle_value(value): + if isinstance(value, torch.Tensor): + return value.item() + return value + + if step is None: + step = len(self.metrics) + + metrics = {k: _handle_value(v) for k, v in metrics_dict.items()} + metrics['step'] = step + self.metrics.append(metrics) + + def save(self) -> None: + """Save recorded hparams and metrics into files""" + hparams_file = os.path.join(self.log_dir, self.NAME_HPARAMS_FILE) + save_hparams_to_yaml(hparams_file, self.hparams) + + if not self.metrics: + return + + last_m = {} + for m in self.metrics: + last_m.update(m) + metrics_keys = list(last_m.keys()) + + with io.open(self.metrics_file_path, 'w', newline='') as f: + self.writer = csv.DictWriter(f, fieldnames=metrics_keys) + self.writer.writeheader() + self.writer.writerows(self.metrics) + + +class CSVLogger(LightningLoggerBase): + r""" + Log to local file system in yaml and CSV format. Logs are saved to + ``os.path.join(save_dir, name, version)``. + + Example: + >>> from pytorch_lightning import Trainer + >>> from pytorch_lightning.loggers import CSVLogger + >>> logger = CSVLogger("logs", name="my_exp_name") + >>> trainer = Trainer(logger=logger) + + Args: + save_dir: Save directory + name: Experiment name. Defaults to ``'default'``. + version: Experiment version. If version is not specified the logger inspects the save + directory for existing versions, then automatically assigns the next available version. + """ + + def __init__(self, + save_dir: str, + name: Optional[str] = "default", + version: Optional[Union[int, str]] = None): + + super().__init__() + self._save_dir = save_dir + self._name = name or '' + self._version = version + self._experiment = None + + @property + def root_dir(self) -> str: + """ + Parent directory for all checkpoint subdirectories. + If the experiment name parameter is ``None`` or the empty string, no experiment subdirectory is used + and the checkpoint will be saved in "save_dir/version_dir" + """ + if not self.name: + return self.save_dir + return os.path.join(self.save_dir, self.name) + + @property + def log_dir(self) -> str: + """ + The log directory for this run. By default, it is named + ``'version_${self.version}'`` but it can be overridden by passing a string value + for the constructor's version parameter instead of ``None`` or an int. + """ + # create a pseudo standard path ala test-tube + version = self.version if isinstance(self.version, str) else f"version_{self.version}" + log_dir = os.path.join(self.root_dir, version) + return log_dir + + @property + def save_dir(self) -> Optional[str]: + return self._save_dir + + @property + def experiment(self) -> ExperimentWriter: + r""" + + Actual ExperimentWriter object. To use ExperimentWriter features in your + :class:`~pytorch_lightning.core.lightning.LightningModule` do the following. + + Example:: + + self.logger.experiment.some_experiment_writer_function() + + """ + if self._experiment: + return self._experiment + + os.makedirs(self.root_dir, exist_ok=True) + self._experiment = ExperimentWriter(log_dir=self.log_dir) + return self._experiment + + @rank_zero_only + def log_hyperparams(self, params: Union[Dict[str, Any], Namespace]) -> None: + params = self._convert_params(params) + self.experiment.log_hparams(params) + + @rank_zero_only + def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None: + self.experiment.log_metrics(metrics, step) + + @rank_zero_only + def save(self) -> None: + super().save() + self.experiment.save() + + @rank_zero_only + def finalize(self, status: str) -> None: + self.save() + + @property + def name(self) -> str: + return self._name + + @property + def version(self) -> int: + if self._version is None: + self._version = self._get_next_version() + return self._version + + def _get_next_version(self): + root_dir = os.path.join(self._save_dir, self.name) + + if not os.path.isdir(root_dir): + log.warning('Missing logger folder: %s', root_dir) + return 0 + + existing_versions = [] + for d in os.listdir(root_dir): + if os.path.isdir(os.path.join(root_dir, d)) and d.startswith("version_"): + existing_versions.append(int(d.split("_")[1])) + + if len(existing_versions) == 0: + return 0 + + return max(existing_versions) + 1 diff --git a/tests/loggers/test_all.py b/tests/loggers/test_all.py index 5bd81d7116948..7978aa8e41ace 100644 --- a/tests/loggers/test_all.py +++ b/tests/loggers/test_all.py @@ -5,11 +5,13 @@ import platform from unittest import mock +import cloudpickle import pytest import tests.base.develop_utils as tutils from pytorch_lightning import Trainer, Callback from pytorch_lightning.loggers import ( + CSVLogger, TensorBoardLogger, MLFlowLogger, NeptuneLogger, @@ -34,6 +36,7 @@ def _get_logger_args(logger_class, save_dir): @pytest.mark.parametrize("logger_class", [ TensorBoardLogger, + CSVLogger, CometLogger, MLFlowLogger, NeptuneLogger, @@ -85,6 +88,7 @@ def log_metrics(self, metrics, step): @pytest.mark.parametrize("logger_class", [ + CSVLogger, TensorBoardLogger, CometLogger, MLFlowLogger, @@ -148,6 +152,7 @@ def name(self): @pytest.mark.parametrize("logger_class", [ TensorBoardLogger, + CSVLogger, CometLogger, MLFlowLogger, NeptuneLogger, @@ -170,6 +175,7 @@ def test_loggers_pickle(tmpdir, monkeypatch, logger_class): # test pickling loggers pickle.dumps(logger) + cloudpickle.dumps(logger) trainer = Trainer( max_epochs=1, @@ -226,6 +232,7 @@ def on_train_batch_start(self, trainer, pl_module): @pytest.mark.skipif(platform.system() == "Windows", reason="Distributed training is not supported on Windows") @pytest.mark.parametrize("logger_class", [ TensorBoardLogger, + # CSVLogger, # todo CometLogger, MLFlowLogger, NeptuneLogger, diff --git a/tests/loggers/test_csv.py b/tests/loggers/test_csv.py new file mode 100644 index 0000000000000..3bc8330075e6a --- /dev/null +++ b/tests/loggers/test_csv.py @@ -0,0 +1,97 @@ +from argparse import Namespace + +import pytest +import torch +import os + +from pytorch_lightning.core.saving import load_hparams_from_yaml +from pytorch_lightning.loggers import CSVLogger +from pytorch_lightning.loggers.csv_logs import ExperimentWriter + + +def test_file_logger_automatic_versioning(tmpdir): + """Verify that automatic versioning works""" + + root_dir = tmpdir.mkdir("exp") + root_dir.mkdir("version_0") + root_dir.mkdir("version_1") + + logger = CSVLogger(save_dir=tmpdir, name="exp") + + assert logger.version == 2 + + +def test_file_logger_manual_versioning(tmpdir): + """Verify that manual versioning works""" + + root_dir = tmpdir.mkdir("exp") + root_dir.mkdir("version_0") + root_dir.mkdir("version_1") + root_dir.mkdir("version_2") + + logger = CSVLogger(save_dir=tmpdir, name="exp", version=1) + + assert logger.version == 1 + + +def test_file_logger_named_version(tmpdir): + """Verify that manual versioning works for string versions, e.g. '2020-02-05-162402' """ + + exp_name = "exp" + tmpdir.mkdir(exp_name) + expected_version = "2020-02-05-162402" + + logger = CSVLogger(save_dir=tmpdir, name=exp_name, version=expected_version) + logger.log_hyperparams({"a": 1, "b": 2}) + logger.save() + assert logger.version == expected_version + assert os.listdir(tmpdir / exp_name) == [expected_version] + assert os.listdir(tmpdir / exp_name / expected_version) + + +@pytest.mark.parametrize("name", ['', None]) +def test_file_logger_no_name(tmpdir, name): + """Verify that None or empty name works""" + logger = CSVLogger(save_dir=tmpdir, name=name) + logger.save() + assert logger.root_dir == tmpdir + assert os.listdir(tmpdir / 'version_0') + + +@pytest.mark.parametrize("step_idx", [10, None]) +def test_file_logger_log_metrics(tmpdir, step_idx): + logger = CSVLogger(tmpdir) + metrics = { + "float": 0.3, + "int": 1, + "FloatTensor": torch.tensor(0.1), + "IntTensor": torch.tensor(1) + } + logger.log_metrics(metrics, step_idx) + logger.save() + + path_csv = os.path.join(logger.log_dir, ExperimentWriter.NAME_METRICS_FILE) + with open(path_csv, 'r') as fp: + lines = fp.readlines() + assert len(lines) == 2 + assert all([n in lines[0] for n in metrics]) + + +def test_file_logger_log_hyperparams(tmpdir): + logger = CSVLogger(tmpdir) + hparams = { + "float": 0.3, + "int": 1, + "string": "abc", + "bool": True, + "dict": {'a': {'b': 'c'}}, + "list": [1, 2, 3], + "namespace": Namespace(foo=Namespace(bar='buzz')), + "layer": torch.nn.BatchNorm1d + } + logger.log_hyperparams(hparams) + logger.save() + + path_yaml = os.path.join(logger.log_dir, ExperimentWriter.NAME_HPARAMS_FILE) + params = load_hparams_from_yaml(path_yaml) + assert all([n in params for n in hparams])