From f6a7a5278a6a105866d1e4463736a5cbaa67ddde Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Sat, 14 Mar 2020 18:01:57 +0100 Subject: [PATCH 1/7] enable Codecov (#1133) * update config * try Drone cache * drop Drone cache * move import * remove token --- .codecov.yml | 12 +++++++++--- .drone.yml | 9 ++++++--- README.md | 2 +- docs/source/_static/images/coverage.svg | 21 --------------------- pytorch_lightning/trainer/trainer.py | 3 +-- tox.ini | 4 ++-- 6 files changed, 19 insertions(+), 32 deletions(-) delete mode 100644 docs/source/_static/images/coverage.svg diff --git a/.codecov.yml b/.codecov.yml index 51d32ee8a3be37..726a198d5a3a8a 100644 --- a/.codecov.yml +++ b/.codecov.yml @@ -2,9 +2,15 @@ # Validation check: # $ curl --data-binary @.codecov.yml https://codecov.io/validate + +# https://docs.codecov.io/docs/codecovyml-reference codecov: + bot: "codecov-io" + strict_yaml_branch: "yaml-config" + require_ci_to_pass: yes notify: - require_ci_to_pass: yes + # after_n_builds: 2 + wait_for_ci: yes coverage: precision: 0 # 2 = xx.xx%, 0 = xx% @@ -16,7 +22,7 @@ coverage: default: against: auto target: 99% # specify the target coverage for each commit status - threshold: 20% # allow this little decrease on project + threshold: 30% # allow this little decrease on project # https://github.com/codecov/support/wiki/Filtering-Branches # branches: master if_ci_failed: error @@ -24,7 +30,7 @@ coverage: patch: default: against: auto - target: 40% # specify the target "X%" coverage to hit + target: 50% # specify the target "X%" coverage to hit # threshold: 50% # allow this much decrease on patch changes: false diff --git a/.drone.yml b/.drone.yml index 60a6375bbe7d78..4f2259ef2c7f40 100644 --- a/.drone.yml +++ b/.drone.yml @@ -7,6 +7,10 @@ name: torch-GPU steps: - name: testing image: nvcr.io/nvidia/pytorch:20.02-py3 + environment: + SLURM_LOCALID: 0 + CODECOV_TOKEN: + from_secret: codecov_token commands: - python --version - pip install pip -U @@ -14,10 +18,9 @@ steps: - nvidia-smi #- pip install torch==1.3 - pip install -r requirements.txt --user - - pip install coverage pytest pytest-cov pytest-flake8 + - pip install coverage pytest pytest-cov pytest-flake8 codecov - pip install -r ./tests/requirements.txt --user - pip list - - export SLURM_LOCALID=0 - python -c "import torch ; print(' & '.join([torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())]) if torch.cuda.is_available() else 'only CPU')" - coverage run --source pytorch_lightning -m py.test pytorch_lightning tests pl_examples -v --doctest-modules # --flake8 - - coverage report + - codecov --token $CODECOV_TOKEN # --pr $DRONE_PULL_REQUEST --build $DRONE_BUILD_NUMBER --branch $DRONE_BRANCH --commit $DRONE_COMMIT --tag $DRONE_TAG diff --git a/README.md b/README.md index a9f742e0288692..ea289d73073c2f 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ [![PyPI Status](https://badge.fury.io/py/pytorch-lightning.svg)](https://badge.fury.io/py/pytorch-lightning) [![PyPI Status](https://pepy.tech/badge/pytorch-lightning)](https://pepy.tech/project/pytorch-lightning) -[![Coverage](docs/source/_static/images/coverage.svg)](https://github.com/PytorchLightning/pytorch-lightning/tree/master/tests#running-coverage) +[![codecov](https://codecov.io/gh/PyTorchLightning/pytorch-lightning/branch/master/graph/badge.svg)](https://codecov.io/gh/PyTorchLightning/pytorch-lightning) [![CodeFactor](https://www.codefactor.io/repository/github/pytorchlightning/pytorch-lightning/badge)](https://www.codefactor.io/repository/github/pytorchlightning/pytorch-lightning) [![ReadTheDocs](https://readthedocs.org/projects/pytorch-lightning/badge/?version=0.7.1)](https://pytorch-lightning.readthedocs.io/en/0.7.1/) diff --git a/docs/source/_static/images/coverage.svg b/docs/source/_static/images/coverage.svg deleted file mode 100644 index 6bfc8faf24d3c2..00000000000000 --- a/docs/source/_static/images/coverage.svg +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - - - - - - - - - - coverage - coverage - 99% - 99% - - diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 7668a1f6de4084..3f6d97091cd174 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -1,3 +1,4 @@ +import inspect import logging as log import os import sys @@ -437,8 +438,6 @@ def slurm_job_id(self) -> int: @classmethod def default_attributes(cls): - import inspect - init_signature = inspect.signature(Trainer) args = {} diff --git a/tox.ini b/tox.ini index 3243beb420a459..f181bc64dd14d2 100644 --- a/tox.ini +++ b/tox.ini @@ -38,8 +38,8 @@ commands = pip list check-manifest python setup.py check --metadata --strict - flake8 . - coverage run --source pytorch_lightning -m py.test pytorch_lightning tests pl_examples -v --doctest-modules + coverage run --source pytorch_lightning -m py.test pytorch_lightning tests -v --doctest-modules + coverage report python setup.py sdist twine check dist/* From da61398835a1ab3732049b4dc74471ed54c59ef8 Mon Sep 17 00:00:00 2001 From: monney Date: Sat, 14 Mar 2020 13:02:05 -0400 Subject: [PATCH 2/7] Add Support for Non-primitive types in TensorboardLogger (#1130) * Added support for non-primitive types to tensorboard logger * added EOF newline * PEP8 * Updated CHANGELOG for PR #1130. Moved _sanitize_params to base logger. Cleaned up _sanitize_params * Updated CHANGELOG for PR #1130. Moved _sanitize_params to base logger. Cleaned up _sanitize_params * changed convert_params to static method * PEP8 * Cleanup Doctest for _sanitize_params Co-Authored-By: Jirka Borovec * Removed OrderedDict import * Updated import order to conventions Co-authored-by: Manbir Gulati Co-authored-by: Jirka Borovec --- CHANGELOG.md | 2 ++ pytorch_lightning/loggers/base.py | 28 +++++++++++++++++++++++- pytorch_lightning/loggers/tensorboard.py | 6 +++-- tests/loggers/test_tensorboard.py | 6 ++++- 4 files changed, 38 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e17728f9424da..1d6cbc3d087812 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added type hints to `pytorch_lightning.core` ([#946](https://github.com/PyTorchLightning/pytorch-lightning/pull/946)) - Added support for IterableDataset in validation and testing ([#1104](https://github.com/PyTorchLightning/pytorch-lightning/pull/1104)) +- Added support for non-primitive types in hparams for TensorboardLogger ([#1130](https://github.com/PyTorchLightning/pytorch-lightning/pull/1130)) + ### Changed diff --git a/pytorch_lightning/loggers/base.py b/pytorch_lightning/loggers/base.py index 5295bee02fe8ff..8c3daa29cb96e5 100644 --- a/pytorch_lightning/loggers/base.py +++ b/pytorch_lightning/loggers/base.py @@ -4,6 +4,8 @@ from functools import wraps from typing import Union, Optional, Dict, Iterable, Any, Callable, List +import torch + def rank_zero_only(fn: Callable): """Decorate a logger method to run it only on the process with rank 0. @@ -42,7 +44,8 @@ def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None): """ pass - def _convert_params(self, params: Union[Dict[str, Any], Namespace]) -> Dict[str, Any]: + @staticmethod + def _convert_params(params: Union[Dict[str, Any], Namespace]) -> Dict[str, Any]: # in case converting from namespace if isinstance(params, Namespace): params = vars(params) @@ -52,6 +55,29 @@ def _convert_params(self, params: Union[Dict[str, Any], Namespace]) -> Dict[str, return params + @staticmethod + def _sanitize_params(params: Dict[str, Any]) -> Dict[str, Any]: + """Returns params with non-primitvies converted to strings for logging + + >>> params = {"float": 0.3, + ... "int": 1, + ... "string": "abc", + ... "bool": True, + ... "list": [1, 2, 3], + ... "namespace": Namespace(foo=3), + ... "layer": torch.nn.BatchNorm1d} + >>> import pprint + >>> pprint.pprint(LightningLoggerBase._sanitize_params(params)) # doctest: +NORMALIZE_WHITESPACE + {'bool': True, + 'float': 0.3, + 'int': 1, + 'layer': "", + 'list': '[1, 2, 3]', + 'namespace': 'Namespace(foo=3)', + 'string': 'abc'} + """ + return {k: v if type(v) in [bool, int, float, str, torch.Tensor] else str(v) for k, v in params.items()} + @abstractmethod def log_hyperparams(self, params: argparse.Namespace): """Record hyperparameters. diff --git a/pytorch_lightning/loggers/tensorboard.py b/pytorch_lightning/loggers/tensorboard.py index 9be1d82b7669a0..662ecdf4af4959 100644 --- a/pytorch_lightning/loggers/tensorboard.py +++ b/pytorch_lightning/loggers/tensorboard.py @@ -101,6 +101,7 @@ def experiment(self) -> SummaryWriter: @rank_zero_only def log_hyperparams(self, params: Union[Dict[str, Any], Namespace]) -> None: params = self._convert_params(params) + sanitized_params = self._sanitize_params(params) if parse_version(torch.__version__) < parse_version("1.3.0"): warn( @@ -110,13 +111,14 @@ def log_hyperparams(self, params: Union[Dict[str, Any], Namespace]) -> None: ) else: from torch.utils.tensorboard.summary import hparams - exp, ssi, sei = hparams(params, {}) + exp, ssi, sei = hparams(sanitized_params, {}) writer = self.experiment._get_file_writer() writer.add_summary(exp) writer.add_summary(ssi) writer.add_summary(sei) + # some alternative should be added - self.tags.update(params) + self.tags.update(sanitized_params) @rank_zero_only def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None: diff --git a/tests/loggers/test_tensorboard.py b/tests/loggers/test_tensorboard.py index b3f3d19242c8c1..e815384011f982 100644 --- a/tests/loggers/test_tensorboard.py +++ b/tests/loggers/test_tensorboard.py @@ -1,4 +1,5 @@ import pickle +from argparse import Namespace import pytest import torch @@ -108,6 +109,9 @@ def test_tensorboard_log_hyperparams(tmpdir): "float": 0.3, "int": 1, "string": "abc", - "bool": True + "bool": True, + "list": [1, 2, 3], + "namespace": Namespace(foo=3), + "layer": torch.nn.BatchNorm1d } logger.log_hyperparams(hparams) From c0bedd25872d12d1e1f55c3f02b31f78d006cdf1 Mon Sep 17 00:00:00 2001 From: "Martin.B" <51887611+bmartinn@users.noreply.github.com> Date: Sat, 14 Mar 2020 19:02:14 +0200 Subject: [PATCH 3/7] Add TRAINS experiment manager support (#1122) * Add allegro.ai TRAINS experiment manager support * improve docstring and type hinting, fix the bug in log_metrics, add support torch.Tensor to input into log_image * complete missing docstring of constructor's arguments * fix docs * pep8 * pep8 * remove redundant typing use logging fix typing and pep8 * remove deprecated interface * add TrainsLogger test * add TrainsLogger PR in CHANGELOG * add id/name property documentation * change logging as log Co-authored-by: bmartinn <> Co-authored-by: Sou Uchida --- CHANGELOG.md | 1 + docs/source/conf.py | 3 +- docs/source/experiment_logging.rst | 28 +++ docs/source/experiment_reporting.rst | 2 +- environment.yml | 1 + pytorch_lightning/loggers/__init__.py | 6 + pytorch_lightning/loggers/trains.py | 283 ++++++++++++++++++++++++++ requirements-extra.txt | 3 +- tests/loggers/test_trains.py | 48 +++++ 9 files changed, 372 insertions(+), 3 deletions(-) create mode 100644 pytorch_lightning/loggers/trains.py create mode 100644 tests/loggers/test_trains.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d6cbc3d087812..11a24b8924c5a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Added +- Added `TrainsLogger` class ([#1122](https://github.com/PyTorchLightning/pytorch-lightning/pull/1122)) - Added type hints to `pytorch_lightning.core` ([#946](https://github.com/PyTorchLightning/pytorch-lightning/pull/946)) - Added support for IterableDataset in validation and testing ([#1104](https://github.com/PyTorchLightning/pytorch-lightning/pull/1104)) - Added support for non-primitive types in hparams for TensorboardLogger ([#1130](https://github.com/PyTorchLightning/pytorch-lightning/pull/1130)) diff --git a/docs/source/conf.py b/docs/source/conf.py index d1c0b54b0a7f10..ce8508f18515df 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -297,7 +297,8 @@ def setup(app): MOCK_REQUIRE_PACKAGES.append(pkg.rstrip()) # TODO: better parse from package since the import name and package name may differ -MOCK_MANUAL_PACKAGES = ['torch', 'torchvision', 'test_tube', 'mlflow', 'comet_ml', 'wandb', 'neptune'] +MOCK_MANUAL_PACKAGES = ['torch', 'torchvision', 'test_tube', + 'mlflow', 'comet_ml', 'wandb', 'neptune', 'trains'] autodoc_mock_imports = MOCK_REQUIRE_PACKAGES + MOCK_MANUAL_PACKAGES # for mod_name in MOCK_REQUIRE_PACKAGES: # sys.modules[mod_name] = mock.Mock() diff --git a/docs/source/experiment_logging.rst b/docs/source/experiment_logging.rst index 1edd067511b48c..b3fe825aede77e 100644 --- a/docs/source/experiment_logging.rst +++ b/docs/source/experiment_logging.rst @@ -62,6 +62,34 @@ The Neptune.ai is available anywhere except ``__init__`` in your LightningModule some_img = fake_image() self.logger.experiment.add_image('generated_images', some_img, 0) +allegro.ai TRAINS +^^^^^^^^^^^^^^^^^ + +`allegro.ai `_ is a third-party logger. +To use TRAINS as your logger do the following. + +.. note:: See: :ref:`trains` docs. + +.. code-block:: python + + from pytorch_lightning.loggers import TrainsLogger + + trains_logger = TrainsLogger( + project_name="examples", + task_name="pytorch lightning test" + ) + trainer = Trainer(logger=trains_logger) + +The TrainsLogger is available anywhere in your LightningModule + +.. code-block:: python + + class MyModule(pl.LightningModule): + + def __init__(self, ...): + some_img = fake_image() + self.logger.log_image('debug', 'generated_image_0', some_img, 0) + Tensorboard ^^^^^^^^^^^ diff --git a/docs/source/experiment_reporting.rst b/docs/source/experiment_reporting.rst index a738a234c96745..aa5642ab204571 100644 --- a/docs/source/experiment_reporting.rst +++ b/docs/source/experiment_reporting.rst @@ -32,7 +32,7 @@ want to log using this trainer flag. Log metrics ^^^^^^^^^^^ -To plot metrics into whatever logger you passed in (tensorboard, comet, neptune, etc...) +To plot metrics into whatever logger you passed in (tensorboard, comet, neptune, TRAINS, etc...) 1. training_epoch_end, validation_epoch_end, test_epoch_end will all log anything in the "log" key of the return dict. diff --git a/environment.yml b/environment.yml index 6dae860611b03c..1aa7c8f14820e7 100644 --- a/environment.yml +++ b/environment.yml @@ -32,3 +32,4 @@ dependencies: - comet_ml>=1.0.56 - wandb>=0.8.21 - neptune-client>=0.4.4 + - trains>=0.13.3 diff --git a/pytorch_lightning/loggers/__init__.py b/pytorch_lightning/loggers/__init__.py index adcba876d26f5b..93d1b737aab9b6 100644 --- a/pytorch_lightning/loggers/__init__.py +++ b/pytorch_lightning/loggers/__init__.py @@ -119,3 +119,9 @@ def any_lightning_module_function_or_hook(...): __all__.append('WandbLogger') except ImportError: pass + +try: + from .trains import TrainsLogger + __all__.append('TrainsLogger') +except ImportError: + pass diff --git a/pytorch_lightning/loggers/trains.py b/pytorch_lightning/loggers/trains.py new file mode 100644 index 00000000000000..a56c6c126798e5 --- /dev/null +++ b/pytorch_lightning/loggers/trains.py @@ -0,0 +1,283 @@ +""" +Log using `allegro.ai TRAINS '_ + +.. code-block:: python + + from pytorch_lightning.loggers import TrainsLogger + trains_logger = TrainsLogger( + project_name="pytorch lightning", + task_name="default", + ) + trainer = Trainer(logger=trains_logger) + + +Use the logger anywhere in you LightningModule as follows: + +.. code-block:: python + + def train_step(...): + # example + self.logger.experiment.whatever_trains_supports(...) + + def any_lightning_module_function_or_hook(...): + self.logger.experiment.whatever_trains_supports(...) + +""" + +import logging as log +from argparse import Namespace +from pathlib import Path +from typing import Any, Dict, Optional, Union + +import PIL +import numpy as np +import pandas as pd +import torch + +try: + import trains +except ImportError: + raise ImportError('You want to use `TRAINS` logger which is not installed yet,' + ' install it with `pip install trains`.') + +from .base import LightningLoggerBase, rank_zero_only + + +class TrainsLogger(LightningLoggerBase): + """Logs using TRAINS + + Args: + project_name: The name of the experiment's project. Defaults to None. + task_name: The name of the experiment. Defaults to None. + task_type: The name of the experiment. Defaults to 'training'. + reuse_last_task_id: Start with the previously used task id. Defaults to True. + output_uri: Default location for output models. Defaults to None. + auto_connect_arg_parser: Automatically grab the ArgParser + and connect it with the task. Defaults to True. + auto_connect_frameworks: If True, automatically patch to trains backend. Defaults to True. + auto_resource_monitoring: If true, machine vitals will be + sent along side the task scalars. Defaults to True. + """ + + def __init__( + self, project_name: Optional[str] = None, task_name: Optional[str] = None, + task_type: str = 'training', reuse_last_task_id: bool = True, + output_uri: Optional[str] = None, auto_connect_arg_parser: bool = True, + auto_connect_frameworks: bool = True, auto_resource_monitoring: bool = True) -> None: + super().__init__() + self._trains = trains.Task.init( + project_name=project_name, task_name=task_name, task_type=task_type, + reuse_last_task_id=reuse_last_task_id, output_uri=output_uri, + auto_connect_arg_parser=auto_connect_arg_parser, + auto_connect_frameworks=auto_connect_frameworks, + auto_resource_monitoring=auto_resource_monitoring + ) + + @property + def experiment(self) -> trains.Task: + r"""Actual TRAINS object. To use TRAINS features do the following. + + Example: + .. code-block:: python + self.logger.experiment.some_trains_function() + + """ + return self._trains + + @property + def id(self) -> Union[str, None]: + """ + ID is a uuid (string) representing this specific experiment in the entire system. + """ + if not self._trains: + return None + return self._trains.id + + @rank_zero_only + def log_hyperparams(self, params: Union[Dict[str, Any], Namespace]) -> None: + """Log hyperparameters (numeric values) in TRAINS experiments + + Args: + params: + The hyperparameters that passed through the model. + """ + if not self._trains: + return None + if not params: + return + if isinstance(params, dict): + self._trains.connect(params) + else: + self._trains.connect(vars(params)) + + @rank_zero_only + def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None: + """Log metrics (numeric values) in TRAINS experiments. + This method will be called by Trainer. + + Args: + metrics: + The dictionary of the metrics. + If the key contains "/", it will be split by the delimiter, + then the elements will be logged as "title" and "series" respectively. + step: Step number at which the metrics should be recorded. Defaults to None. + """ + if not self._trains: + return None + + if not step: + step = self._trains.get_last_iteration() + + for k, v in metrics.items(): + if isinstance(v, str): + log.warning("Discarding metric with string value {}={}".format(k, v)) + continue + if isinstance(v, torch.Tensor): + v = v.item() + parts = k.split('/') + if len(parts) <= 1: + series = title = k + else: + title = parts[0] + series = '/'.join(parts[1:]) + self._trains.get_logger().report_scalar( + title=title, series=series, value=v, iteration=step) + + @rank_zero_only + def log_metric(self, title: str, series: str, value: float, step: Optional[int] = None) -> None: + """Log metrics (numeric values) in TRAINS experiments. + This method will be called by the users. + + Args: + title: The title of the graph to log, e.g. loss, accuracy. + series: The series name in the graph, e.g. classification, localization. + value: The value to log. + step: Step number at which the metrics should be recorded. Defaults to None. + """ + if not self._trains: + return None + + if not step: + step = self._trains.get_last_iteration() + + if isinstance(value, torch.Tensor): + value = value.item() + self._trains.get_logger().report_scalar( + title=title, series=series, value=value, iteration=step) + + @rank_zero_only + def log_text(self, text: str) -> None: + """Log console text data in TRAINS experiment + + Args: + text: The value of the log (data-point). + """ + if not self._trains: + return None + + self._trains.get_logger().report_text(text) + + @rank_zero_only + def log_image( + self, title: str, series: str, + image: Union[str, np.ndarray, PIL.Image.Image, torch.Tensor], + step: Optional[int] = None) -> None: + """Log Debug image in TRAINS experiment + + Args: + title: The title of the debug image, i.e. "failed", "passed". + series: The series name of the debug image, i.e. "Image 0", "Image 1". + image: + Debug image to log. Can be one of the following types: + Torch, Numpy, PIL image, path to image file (str) + If Numpy or Torch, the image is assume to be the following: + shape: CHW + color space: RGB + value range: [0., 1.] (float) or [0, 255] (uint8) + step: + Step number at which the metrics should be recorded. Defaults to None. + """ + if not self._trains: + return None + + if not step: + step = self._trains.get_last_iteration() + + if isinstance(image, str): + self._trains.get_logger().report_image( + title=title, series=series, local_path=image, iteration=step) + else: + if isinstance(image, torch.Tensor): + image = image.cpu().numpy() + if isinstance(image, np.ndarray): + image = image.transpose(1, 2, 0) + self._trains.get_logger().report_image( + title=title, series=series, image=image, iteration=step) + + @rank_zero_only + def log_artifact( + self, name: str, + artifact: Union[str, Path, Dict[str, Any], pd.DataFrame, np.ndarray, PIL.Image.Image], + metadata: Optional[Dict[str, Any]] = None, delete_after_upload: bool = False) -> None: + """Save an artifact (file/object) in TRAINS experiment storage. + + Args: + name: Artifact name. Notice! it will override previous artifact + if name already exists + artifact: Artifact object to upload. Currently supports: + - string / pathlib2.Path are treated as path to artifact file to upload + If wildcard or a folder is passed, zip file containing the + local files will be created and uploaded + - dict will be stored as .json file and uploaded + - pandas.DataFrame will be stored as .csv.gz (compressed CSV file) and uploaded + - numpy.ndarray will be stored as .npz and uploaded + - PIL.Image will be stored to .png file and uploaded + metadata: + Simple key/value dictionary to store on the artifact. Defaults to None. + delete_after_upload: + If True local artifact will be deleted (only applies if artifact_object is a + local file). Defaults to False. + """ + if not self._trains: + return None + + self._trains.upload_artifact( + name=name, artifact_object=artifact, metadata=metadata, + delete_after_upload=delete_after_upload + ) + + def save(self) -> None: + pass + + @rank_zero_only + def finalize(self, status: str) -> None: + if not self._trains: + return None + self._trains.close() + self._trains = None + + @property + def name(self) -> Union[str, None]: + """ + Name is a human readable non-unique name (str) of the experiment. + """ + if not self._trains: + return None + return self._trains.name + + @property + def version(self) -> Union[str, None]: + if not self._trains: + return None + return self._trains.id + + def __getstate__(self) -> Union[str, None]: + if not self._trains: + return None + return self._trains.id + + def __setstate__(self, state: str) -> None: + self._rank = 0 + self._trains = None + if state: + self._trains = trains.Task.get_task(task_id=state) diff --git a/requirements-extra.txt b/requirements-extra.txt index dd153091052e85..1265bc654a6e4e 100644 --- a/requirements-extra.txt +++ b/requirements-extra.txt @@ -2,4 +2,5 @@ neptune-client>=0.4.4 comet-ml>=1.0.56 mlflow>=1.0.0 test_tube>=0.7.5 -wandb>=0.8.21 \ No newline at end of file +wandb>=0.8.21 +trains>=0.13.3 diff --git a/tests/loggers/test_trains.py b/tests/loggers/test_trains.py new file mode 100644 index 00000000000000..1c8ca4167462a4 --- /dev/null +++ b/tests/loggers/test_trains.py @@ -0,0 +1,48 @@ +import pickle + +import tests.models.utils as tutils +from pytorch_lightning import Trainer +from pytorch_lightning.loggers import TrainsLogger +from tests.models import LightningTestModel + + +def test_trains_logger(tmpdir): + """Verify that basic functionality of TRAINS logger works.""" + tutils.reset_seed() + + hparams = tutils.get_hparams() + model = LightningTestModel(hparams) + logger = TrainsLogger(project_name="examples", task_name="pytorch lightning test") + + trainer_options = dict( + default_save_path=tmpdir, + max_epochs=1, + train_percent_check=0.05, + logger=logger + ) + trainer = Trainer(**trainer_options) + result = trainer.fit(model) + + print('result finished') + assert result == 1, "Training failed" + + +def test_trains_pickle(tmpdir): + """Verify that pickling trainer with TRAINS logger works.""" + tutils.reset_seed() + + # hparams = tutils.get_hparams() + # model = LightningTestModel(hparams) + + logger = TrainsLogger(project_name="examples", task_name="pytorch lightning test") + + trainer_options = dict( + default_save_path=tmpdir, + max_epochs=1, + logger=logger + ) + + trainer = Trainer(**trainer_options) + pkl_bytes = pickle.dumps(trainer) + trainer2 = pickle.loads(pkl_bytes) + trainer2.logger.log_metrics({"acc": 1.0}) From 2232eb35d1a31d72e5cdc41751b30e621ab207ee Mon Sep 17 00:00:00 2001 From: Ibraheem Moosa Date: Sat, 14 Mar 2020 23:02:31 +0600 Subject: [PATCH 4/7] Fix examples that uses type_as (#1129) --- docs/source/multi_gpu.rst | 2 +- pytorch_lightning/core/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/multi_gpu.rst b/docs/source/multi_gpu.rst index fdd5a92b467080..baf99d87d1aaf7 100644 --- a/docs/source/multi_gpu.rst +++ b/docs/source/multi_gpu.rst @@ -38,7 +38,7 @@ This will make your code scale to any arbitrary number of GPUs or TPUs with Ligh # with lightning def forward(self, x): z = torch.Tensor(2, 3) - z = z.type_as(x.type()) + z = z.type_as(x) Remove samplers ^^^^^^^^^^^^^^^ diff --git a/pytorch_lightning/core/__init__.py b/pytorch_lightning/core/__init__.py index fac76ffd61f046..9cd75606b5c6fd 100644 --- a/pytorch_lightning/core/__init__.py +++ b/pytorch_lightning/core/__init__.py @@ -228,7 +228,7 @@ def training_step(self, batch, batch_idx): # put the z on the appropriate gpu or tpu core z = sample_noise() - z = z.type_as(x.type()) + z = z.type_as(x) ---------- From 3ad6169f187ea41aa1534a1d9a3b978d053dca2b Mon Sep 17 00:00:00 2001 From: Jakub Date: Sat, 14 Mar 2020 18:02:40 +0100 Subject: [PATCH 5/7] Neptune Logger Improvements (#1084) * removed project and experiment from getstate * added tests for closing experiment, updated token in example to user neptuner * updated teoken * Update neptune.py added a link to example experiment * added exmaple experiment link * dropped duplication * flake fixes * merged with master, added changes information to CHANGELOG --- CHANGELOG.md | 1 + pytorch_lightning/loggers/neptune.py | 125 ++++++++++++++++++++------- tests/loggers/test_neptune.py | 31 ++++++- 3 files changed, 123 insertions(+), 34 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 11a24b8924c5a3..1c35a2ee7eb115 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -68,6 +68,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Changed +- Improved `NeptuneLogger` by adding `close_after_fit` argument to allow logging after training([#908](https://github.com/PyTorchLightning/pytorch-lightning/pull/1084)) - Changed default TQDM to use `tqdm.auto` for prettier outputs in IPython notebooks ([#752](https://github.com/PyTorchLightning/pytorch-lightning/pull/752)) - Changed `pytorch_lightning.logging` to `pytorch_lightning.loggers` ([#767](https://github.com/PyTorchLightning/pytorch-lightning/pull/767)) - Moved the default `tqdm_dict` definition from Trainer to `LightningModule`, so it can be overridden by the user ([#749](https://github.com/PyTorchLightning/pytorch-lightning/pull/749)) diff --git a/pytorch_lightning/loggers/neptune.py b/pytorch_lightning/loggers/neptune.py index 5e011d7426424d..372d215c34df2e 100644 --- a/pytorch_lightning/loggers/neptune.py +++ b/pytorch_lightning/loggers/neptune.py @@ -1,5 +1,5 @@ """ -Log using `neptune-logger `_ +Log using `neptune-logger `_ .. _neptune: @@ -30,12 +30,13 @@ class NeptuneLogger(LightningLoggerBase): """ def __init__(self, api_key: Optional[str] = None, project_name: Optional[str] = None, - offline_mode: bool = False, experiment_name: Optional[str] = None, + close_after_fit: Optional[bool] = True, offline_mode: bool = False, + experiment_name: Optional[str] = None, upload_source_files: Optional[List[str]] = None, params: Optional[Dict[str, Any]] = None, properties: Optional[Dict[str, Any]] = None, tags: Optional[List[str]] = None, **kwargs): r""" - Initialize a neptune.ml logger. + Initialize a neptune.ai logger. .. note:: Requires either an API Key (online mode) or a local directory path (offline mode) @@ -44,10 +45,11 @@ def __init__(self, api_key: Optional[str] = None, project_name: Optional[str] = # ONLINE MODE from pytorch_lightning.loggers import NeptuneLogger # arguments made to NeptuneLogger are passed on to the neptune.experiments.Experiment class + # We are using an api_key for the anonymous user "neptuner" but you can use your own. neptune_logger = NeptuneLogger( - api_key=os.environ["NEPTUNE_API_TOKEN"], - project_name="USER_NAME/PROJECT_NAME", + api_key="ANONYMOUS" + project_name="shared/pytorch-lightning-integration", experiment_name="default", # Optional, params={"max_epochs": 10}, # Optional, tags=["pytorch-lightning","mlp"] # Optional, @@ -85,40 +87,91 @@ def any_lightning_module_function_or_hook(...): self.logger.experiment.log_artifact("model_checkpoint.pt", prediction_image) # log model checkpoint self.logger.experiment.whatever_neptune_supports(...) + If you want to log objects after the training is finished use close_after_train=False: + + .. code-block:: python + + neptune_logger = NeptuneLogger( + ... + close_after_fit=False, + ...) + trainer = Trainer(logger=neptune_logger) + trainer.fit() + + # Log test metrics + trainer.test(model) + + # Log additional metrics + from sklearn.metrics import accuracy_score + + accuracy = accuracy_score(y_true, y_pred) + neptune_logger.experiment.log_metric('test_accuracy', accuracy) + + # Log charts + from scikitplot.metrics import plot_confusion_matrix + import matplotlib.pyplot as plt + + fig, ax = plt.subplots(figsize=(16, 12)) + plot_confusion_matrix(y_true, y_pred, ax=ax) + neptune_logger.experiment.log_image('confusion_matrix', fig) + + # Save checkpoints folder + neptune_logger.experiment.log_artifact('my/checkpoints') + + # When you are done, stop the experiment + neptune_logger.experiment.stop() + + You can go and see an example experiment here: + https://ui.neptune.ai/o/shared/org/pytorch-lightning-integration/e/PYTOR-66/charts + Args: - api_key (str | None): Required in online mode. Neputne API token, found on https://neptune.ml. + api_key: Required in online mode. + Neputne API token, found on https://neptune.ai Read how to get your API key - https://docs.neptune.ml/python-api/tutorials/get-started.html#copy-api-token. - project_name (str): Required in online mode. Qualified name of a project in a form of + https://docs.neptune.ai/python-api/tutorials/get-started.html#copy-api-token. + It is recommended to keep it in the `NEPTUNE_API_TOKEN` + environment variable and then you can leave `api_key=None` + project_name: Required in online mode. Qualified name of a project in a form of "namespace/project_name" for example "tom/minst-classification". If None, the value of NEPTUNE_PROJECT environment variable will be taken. - You need to create the project in https://neptune.ml first. - offline_mode (bool): Optional default False. If offline_mode=True no logs will be send to neptune. - Usually used for debug purposes. - experiment_name (str|None): Optional. Editable name of the experiment. - Name is displayed in the experiment’s Details (Metadata section) and in experiments view as a column. - upload_source_files (list|None): Optional. List of source files to be uploaded. - Must be list of str or single str. Uploaded sources are displayed in the experiment’s Source code tab. + You need to create the project in https://neptune.ai first. + offline_mode: Optional default False. If offline_mode=True no logs will be send + to neptune. Usually used for debug purposes. + close_after_fit: Optional default True. If close_after_fit=False the experiment + will not be closed after training and additional metrics, + images or artifacts can be logged. Also, remember to close the experiment explicitly + by running neptune_logger.experiment.stop(). + experiment_name: Optional. Editable name of the experiment. + Name is displayed in the experiment’s Details (Metadata section) and i + n experiments view as a column. + upload_source_files: Optional. List of source files to be uploaded. + Must be list of str or single str. Uploaded sources are displayed + in the experiment’s Source code tab. If None is passed, Python file from which experiment was created will be uploaded. - Pass empty list ([]) to upload no files. Unix style pathname pattern expansion is supported. + Pass empty list ([]) to upload no files. + Unix style pathname pattern expansion is supported. For example, you can pass '\*.py' to upload all python source files from the current directory. For recursion lookup use '\**/\*.py' (for Python 3.5 and later). For more information see glob library. - params (dict|None): Optional. Parameters of the experiment. After experiment creation params are read-only. - Parameters are displayed in the experiment’s Parameters section and each key-value pair can be - viewed in experiments view as a column. - properties (dict|None): Optional default is {}. Properties of the experiment. - They are editable after experiment is created. Properties are displayed in the experiment’s Details and + params: Optional. Parameters of the experiment. + After experiment creation params are read-only. + Parameters are displayed in the experiment’s Parameters section and + each key-value pair can be viewed in experiments view as a column. + properties: Optional default is {}. Properties of the experiment. + They are editable after experiment is created. + Properties are displayed in the experiment’s Details and each key-value pair can be viewed in experiments view as a column. - tags (list|None): Optional default []. Must be list of str. Tags of the experiment. + tags: Optional default []. Must be list of str. Tags of the experiment. They are editable after experiment is created (see: append_tag() and remove_tag()). - Tags are displayed in the experiment’s Details and can be viewed in experiments view as a column. + Tags are displayed in the experiment’s Details and can be viewed + in experiments view as a column. """ super().__init__() self.api_key = api_key self.project_name = project_name self.offline_mode = offline_mode + self.close_after_fit = close_after_fit self.experiment_name = experiment_name self.upload_source_files = upload_source_files self.params = params @@ -138,6 +191,12 @@ def any_lightning_module_function_or_hook(...): log.info(f'NeptuneLogger was initialized in {self.mode} mode') + def __getstate__(self): + state = self.__dict__.copy() + # cannot be pickled + state['_experiment'] = None + return state + @property def experiment(self) -> Experiment: r""" @@ -150,15 +209,14 @@ def experiment(self) -> Experiment: """ - if self._experiment is not None: - return self._experiment - else: - self._experiment = neptune.create_experiment(name=self.experiment_name, - params=self.params, - properties=self.properties, - tags=self.tags, - upload_source_files=self.upload_source_files, - **self._kwargs) + if self._experiment is None: + self._experiment = neptune.create_experiment( + name=self.experiment_name, + params=self.params, + properties=self.properties, + tags=self.tags, + upload_source_files=self.upload_source_files, + **self._kwargs) return self._experiment @rank_zero_only @@ -184,7 +242,8 @@ def log_metrics( @rank_zero_only def finalize(self, status: str) -> None: - self.experiment.stop() + if self.close_after_fit: + self.experiment.stop() @property def name(self) -> str: diff --git a/tests/loggers/test_neptune.py b/tests/loggers/test_neptune.py index 6130bfb532c7bd..5c2ab5b52029a3 100644 --- a/tests/loggers/test_neptune.py +++ b/tests/loggers/test_neptune.py @@ -1,5 +1,6 @@ import pickle -from unittest.mock import patch + +from unittest.mock import patch, MagicMock import torch @@ -96,3 +97,31 @@ def test_neptune_pickle(tmpdir): pkl_bytes = pickle.dumps(trainer) trainer2 = pickle.loads(pkl_bytes) trainer2.logger.log_metrics({'acc': 1.0}) + + +def test_neptune_leave_open_experiment_after_fit(tmpdir): + """Verify that neptune experiment was closed after training""" + tutils.reset_seed() + + hparams = tutils.get_hparams() + model = LightningTestModel(hparams) + + def _run_training(logger): + logger._experiment = MagicMock() + + trainer_options = dict( + default_save_path=tmpdir, + max_epochs=1, + train_percent_check=0.05, + logger=logger + ) + trainer = Trainer(**trainer_options) + trainer.fit(model) + return logger + + logger_close_after_fit = _run_training(NeptuneLogger(offline_mode=True)) + assert logger_close_after_fit._experiment.stop.call_count == 1 + + logger_open_after_fit = _run_training( + NeptuneLogger(offline_mode=True, close_after_fit=False)) + assert logger_open_after_fit._experiment.stop.call_count == 0 From 774d9be3577ce1f05d895134d5ce70c16877d08b Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Mon, 16 Mar 2020 00:46:39 +0100 Subject: [PATCH 6/7] Fix docs - missing Trainer (#1159) * drop pandas * formatting --- docs/source/early_stopping.rst | 4 ++-- docs/source/experiment_logging.rst | 12 ++++++------ docs/source/experiment_reporting.rst | 2 +- docs/source/fast_training.rst | 2 +- docs/source/introduction_guide.rst | 2 +- docs/source/training_tricks.rst | 4 ++-- pytorch_lightning/core/lightning.py | 4 ++-- pytorch_lightning/loggers/trains.py | 7 +++---- 8 files changed, 18 insertions(+), 19 deletions(-) diff --git a/docs/source/early_stopping.rst b/docs/source/early_stopping.rst index ce288d33bd4ab9..f729cfe12ca6d8 100644 --- a/docs/source/early_stopping.rst +++ b/docs/source/early_stopping.rst @@ -11,7 +11,7 @@ Enable Early Stopping --------------------- There are two ways to enable early stopping. -.. note:: See: :ref:`trainer` +.. seealso:: :ref:`trainer` .. code-block:: python @@ -35,4 +35,4 @@ To disable early stopping pass ``False`` to the `early_stop_callback`. Note that ``None`` will not disable early stopping but will lead to the default behaviour. -.. note:: See: :ref:`trainer` +.. seealso:: :ref:`trainer` diff --git a/docs/source/experiment_logging.rst b/docs/source/experiment_logging.rst index b3fe825aede77e..a05329124c1250 100644 --- a/docs/source/experiment_logging.rst +++ b/docs/source/experiment_logging.rst @@ -7,7 +7,7 @@ Comet.ml `Comet.ml `_ is a third-party logger. To use CometLogger as your logger do the following. -.. note:: See: :ref:`comet` docs. +.. seealso:: :ref:`comet` docs. .. code-block:: python @@ -38,7 +38,7 @@ Neptune.ai `Neptune.ai `_ is a third-party logger. To use Neptune.ai as your logger do the following. -.. note:: See: :ref:`neptune` docs. +.. seealso:: :ref:`neptune` docs. .. code-block:: python @@ -68,7 +68,7 @@ allegro.ai TRAINS `allegro.ai `_ is a third-party logger. To use TRAINS as your logger do the following. -.. note:: See: :ref:`trains` docs. +.. seealso:: :ref:`trains` docs. .. code-block:: python @@ -95,7 +95,7 @@ Tensorboard To use `Tensorboard `_ as your logger do the following. -.. note:: See: TensorBoardLogger :ref:`tf-logger` +.. seealso:: TensorBoardLogger :ref:`tf-logger` .. code-block:: python @@ -121,7 +121,7 @@ Test Tube `Test Tube `_ is a tensorboard logger but with nicer file structure. To use TestTube as your logger do the following. -.. note:: See: TestTube :ref:`testTube` +.. seealso:: TestTube :ref:`testTube` .. code-block:: python @@ -146,7 +146,7 @@ Wandb `Wandb `_ is a third-party logger. To use Wandb as your logger do the following. -.. note:: See: :ref:`wandb` docs +.. seealso:: :ref:`wandb` docs .. code-block:: python diff --git a/docs/source/experiment_reporting.rst b/docs/source/experiment_reporting.rst index aa5642ab204571..0063a92694128f 100644 --- a/docs/source/experiment_reporting.rst +++ b/docs/source/experiment_reporting.rst @@ -22,7 +22,7 @@ Control log writing frequency Writing to a logger can be expensive. In Lightning you can set the interval at which you want to log using this trainer flag. -.. note:: See: :ref:`trainer` +.. seealso:: :ref:`trainer` .. code-block:: python diff --git a/docs/source/fast_training.rst b/docs/source/fast_training.rst index 5e7d1c599d1717..4e5c189d3aa218 100644 --- a/docs/source/fast_training.rst +++ b/docs/source/fast_training.rst @@ -16,7 +16,7 @@ Force training for min or max epochs ------------------------------------- It can be useful to force training for a minimum number of epochs or limit to a max number. -.. note:: See: :ref:`trainer` +.. seealso:: :ref:`trainer` .. code-block:: python diff --git a/docs/source/introduction_guide.rst b/docs/source/introduction_guide.rst index b048c07175a163..2071f6733710d2 100644 --- a/docs/source/introduction_guide.rst +++ b/docs/source/introduction_guide.rst @@ -472,7 +472,7 @@ First, change the runtime to TPU (and reinstall lightning). Next, install the required xla library (adds support for PyTorch on TPUs) -.. code-block:: python +.. code-block:: import collections from datetime import datetime, timedelta diff --git a/docs/source/training_tricks.rst b/docs/source/training_tricks.rst index 416fcdb8a8ec3d..2904c11ad1a1f4 100644 --- a/docs/source/training_tricks.rst +++ b/docs/source/training_tricks.rst @@ -7,7 +7,7 @@ Accumulate gradients Accumulated gradients runs K small batches of size N before doing a backwards pass. The effect is a large effective batch size of size KxN. -.. note:: See: :ref:`trainer` +.. seealso:: :ref:`trainer` .. code-block:: python @@ -20,7 +20,7 @@ Gradient Clipping Gradient clipping may be enabled to avoid exploding gradients. Specifically, this will `clip the gradient norm `_ computed over all model parameters together. -.. note:: See: :ref:`trainer` +.. seealso:: :ref:`trainer` .. code-block:: python diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index b531c1542ee6db..fe8a1f5c392bfc 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -8,8 +8,8 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union import torch -import torch.distributed as dist from torch import Tensor +from torch.distributed import init_process_group from torch.nn.parallel import DistributedDataParallel from torch.optim import Adam from torch.optim.optimizer import Optimizer @@ -859,7 +859,7 @@ def init_ddp_connection(self): root_node = self.trainer.resolve_root_node_address(root_node) os.environ['MASTER_ADDR'] = root_node - dist.init_process_group('nccl', rank=proc_rank, world_size=world_size) + init_process_group('nccl', rank=proc_rank, world_size=world_size) def configure_apex( self, diff --git a/pytorch_lightning/loggers/trains.py b/pytorch_lightning/loggers/trains.py index a56c6c126798e5..7d2bd01e35d8bc 100644 --- a/pytorch_lightning/loggers/trains.py +++ b/pytorch_lightning/loggers/trains.py @@ -29,9 +29,7 @@ def any_lightning_module_function_or_hook(...): from pathlib import Path from typing import Any, Dict, Optional, Union -import PIL import numpy as np -import pandas as pd import torch try: @@ -79,6 +77,7 @@ def experiment(self) -> trains.Task: Example: .. code-block:: python + self.logger.experiment.some_trains_function() """ @@ -180,7 +179,7 @@ def log_text(self, text: str) -> None: @rank_zero_only def log_image( self, title: str, series: str, - image: Union[str, np.ndarray, PIL.Image.Image, torch.Tensor], + image: Union[str, np.ndarray, 'PIL.Image', torch.Tensor], step: Optional[int] = None) -> None: """Log Debug image in TRAINS experiment @@ -217,7 +216,7 @@ def log_image( @rank_zero_only def log_artifact( self, name: str, - artifact: Union[str, Path, Dict[str, Any], pd.DataFrame, np.ndarray, PIL.Image.Image], + artifact: Union[str, Path, Dict[str, Any], 'pandas.DataFrame', 'numpy.ndarray', 'PIL.Image.Image'], metadata: Optional[Dict[str, Any]] = None, delete_after_upload: bool = False) -> None: """Save an artifact (file/object) in TRAINS experiment storage. From 384e124490f7a629dc677fc5b658b69afade0a04 Mon Sep 17 00:00:00 2001 From: Nicki Skafte Date: Mon, 16 Mar 2020 19:35:10 +0100 Subject: [PATCH 7/7] ReduceLROnPlateau bug fix (#1126) * bug fix and test * update CHANGELOG.md Co-authored-by: Nicki Skafte --- CHANGELOG.md | 2 +- pytorch_lightning/trainer/trainer.py | 4 +-- tests/models/__init__.py | 3 ++- tests/models/mixins.py | 10 ++++++++ tests/trainer/test_optimizers.py | 37 +++++++++++++++++++++++++++- 5 files changed, 51 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c35a2ee7eb115..6e12875f459898 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,7 +28,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Fixed -- +- Fixed bug related to type cheking of `ReduceLROnPlateau` lr schedulers([#1114](https://github.com/PyTorchLightning/pytorch-lightning/issues/1114)) ## [0.7.1] - 2020-03-07 diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 3f6d97091cd174..826bd4eded8d33 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -707,8 +707,8 @@ def configure_schedulers(self, schedulers: list): if 'scheduler' not in scheduler: raise ValueError(f'Lr scheduler should have key `scheduler`', ' with item being a lr scheduler') - scheduler['reduce_on_plateau'] = \ - isinstance(scheduler, optim.lr_scheduler.ReduceLROnPlateau) + scheduler['reduce_on_plateau'] = isinstance( + scheduler['scheduler'], optim.lr_scheduler.ReduceLROnPlateau) lr_schedulers.append({**default_config, **scheduler}) diff --git a/tests/models/__init__.py b/tests/models/__init__.py index 4992e70a2ec30f..67206a63d0fe6b 100644 --- a/tests/models/__init__.py +++ b/tests/models/__init__.py @@ -24,7 +24,8 @@ LightInfTestDataloader, LightTestOptimizerWithSchedulingMixin, LightTestMultipleOptimizersWithSchedulingMixin, - LightTestOptimizersWithMixedSchedulingMixin + LightTestOptimizersWithMixedSchedulingMixin, + LightTestReduceLROnPlateauMixin ) diff --git a/tests/models/mixins.py b/tests/models/mixins.py index fd3f0ddea1b9f4..0be691726e209c 100644 --- a/tests/models/mixins.py +++ b/tests/models/mixins.py @@ -678,6 +678,16 @@ def configure_optimizers(self): [{'scheduler': lr_scheduler1, 'interval': 'step'}, lr_scheduler2] +class LightTestReduceLROnPlateauMixin: + def configure_optimizers(self): + if self.hparams.optimizer_name == 'lbfgs': + optimizer = optim.LBFGS(self.parameters(), lr=self.hparams.learning_rate) + else: + optimizer = optim.Adam(self.parameters(), lr=self.hparams.learning_rate) + lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer) + return [optimizer], [lr_scheduler] + + def _get_output_metric(output, name): if isinstance(output, dict): val = output[name] diff --git a/tests/trainer/test_optimizers.py b/tests/trainer/test_optimizers.py index bc5dde5f75013f..3ea0e3ff2aab76 100644 --- a/tests/trainer/test_optimizers.py +++ b/tests/trainer/test_optimizers.py @@ -10,9 +10,12 @@ from tests.models import ( TestModelBase, LightTrainDataloader, + LightValidationStepMixin, + LightValidationMixin, LightTestOptimizerWithSchedulingMixin, LightTestMultipleOptimizersWithSchedulingMixin, - LightTestOptimizersWithMixedSchedulingMixin + LightTestOptimizersWithMixedSchedulingMixin, + LightTestReduceLROnPlateauMixin ) @@ -144,3 +147,35 @@ class CurrentTestModel( # Called every 3 steps, meaning for 1 epoch of 11 batches, it is called 3 times assert init_lr * 0.1 == adjusted_lr2, \ 'lr for optimizer 2 not adjusted correctly' + + +def test_reduce_lr_on_plateau_scheduling(tmpdir): + tutils.reset_seed() + + class CurrentTestModel( + LightTestReduceLROnPlateauMixin, + LightTrainDataloader, + LightValidationMixin, + LightValidationStepMixin, + TestModelBase): + pass + + hparams = tutils.get_hparams() + model = CurrentTestModel(hparams) + + # logger file to get meta + trainer_options = dict( + default_save_path=tmpdir, + max_epochs=1, + val_percent_check=0.1, + train_percent_check=0.2 + ) + + # fit model + trainer = Trainer(**trainer_options) + results = trainer.fit(model) + + assert trainer.lr_schedulers[0] == \ + dict(scheduler=trainer.lr_schedulers[0]['scheduler'], monitor='val_loss', + interval='epoch', frequency=1, reduce_on_plateau=True), \ + 'lr schduler was not correctly converted to dict'