diff --git a/.codecov.yml b/.codecov.yml
index 51d32ee8a3be37..726a198d5a3a8a 100644
--- a/.codecov.yml
+++ b/.codecov.yml
@@ -2,9 +2,15 @@
# Validation check:
# $ curl --data-binary @.codecov.yml https://codecov.io/validate
+
+# https://docs.codecov.io/docs/codecovyml-reference
codecov:
+ bot: "codecov-io"
+ strict_yaml_branch: "yaml-config"
+ require_ci_to_pass: yes
notify:
- require_ci_to_pass: yes
+ # after_n_builds: 2
+ wait_for_ci: yes
coverage:
precision: 0 # 2 = xx.xx%, 0 = xx%
@@ -16,7 +22,7 @@ coverage:
default:
against: auto
target: 99% # specify the target coverage for each commit status
- threshold: 20% # allow this little decrease on project
+ threshold: 30% # allow this little decrease on project
# https://github.com/codecov/support/wiki/Filtering-Branches
# branches: master
if_ci_failed: error
@@ -24,7 +30,7 @@ coverage:
patch:
default:
against: auto
- target: 40% # specify the target "X%" coverage to hit
+ target: 50% # specify the target "X%" coverage to hit
# threshold: 50% # allow this much decrease on patch
changes: false
diff --git a/.drone.yml b/.drone.yml
index 60a6375bbe7d78..4f2259ef2c7f40 100644
--- a/.drone.yml
+++ b/.drone.yml
@@ -7,6 +7,10 @@ name: torch-GPU
steps:
- name: testing
image: nvcr.io/nvidia/pytorch:20.02-py3
+ environment:
+ SLURM_LOCALID: 0
+ CODECOV_TOKEN:
+ from_secret: codecov_token
commands:
- python --version
- pip install pip -U
@@ -14,10 +18,9 @@ steps:
- nvidia-smi
#- pip install torch==1.3
- pip install -r requirements.txt --user
- - pip install coverage pytest pytest-cov pytest-flake8
+ - pip install coverage pytest pytest-cov pytest-flake8 codecov
- pip install -r ./tests/requirements.txt --user
- pip list
- - export SLURM_LOCALID=0
- python -c "import torch ; print(' & '.join([torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())]) if torch.cuda.is_available() else 'only CPU')"
- coverage run --source pytorch_lightning -m py.test pytorch_lightning tests pl_examples -v --doctest-modules # --flake8
- - coverage report
+ - codecov --token $CODECOV_TOKEN # --pr $DRONE_PULL_REQUEST --build $DRONE_BUILD_NUMBER --branch $DRONE_BRANCH --commit $DRONE_COMMIT --tag $DRONE_TAG
diff --git a/CHANGELOG.md b/CHANGELOG.md
index cf45dfe1840d01..176b1826b37199 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,8 +8,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
### Added
+- Added `TrainsLogger` class ([#1122](https://github.com/PyTorchLightning/pytorch-lightning/pull/1122))
- Added type hints to `pytorch_lightning.core` ([#946](https://github.com/PyTorchLightning/pytorch-lightning/pull/946))
- Added support for IterableDataset in validation and testing ([#1104](https://github.com/PyTorchLightning/pytorch-lightning/pull/1104))
+- Added support for non-primitive types in hparams for TensorboardLogger ([#1130](https://github.com/PyTorchLightning/pytorch-lightning/pull/1130))
+
### Changed
@@ -25,6 +28,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
### Fixed
+- Fixed bug related to type cheking of `ReduceLROnPlateau` lr schedulers([#1114](https://github.com/PyTorchLightning/pytorch-lightning/issues/1114))
- Fixed a bug to ensure lightning checkpoints to be backward compatible ([#1132](https://github.com/PyTorchLightning/pytorch-lightning/pull/1132))
## [0.7.1] - 2020-03-07
@@ -65,6 +69,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
### Changed
+- Improved `NeptuneLogger` by adding `close_after_fit` argument to allow logging after training([#908](https://github.com/PyTorchLightning/pytorch-lightning/pull/1084))
- Changed default TQDM to use `tqdm.auto` for prettier outputs in IPython notebooks ([#752](https://github.com/PyTorchLightning/pytorch-lightning/pull/752))
- Changed `pytorch_lightning.logging` to `pytorch_lightning.loggers` ([#767](https://github.com/PyTorchLightning/pytorch-lightning/pull/767))
- Moved the default `tqdm_dict` definition from Trainer to `LightningModule`, so it can be overridden by the user ([#749](https://github.com/PyTorchLightning/pytorch-lightning/pull/749))
diff --git a/README.md b/README.md
index a9f742e0288692..ea289d73073c2f 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@
[![PyPI Status](https://badge.fury.io/py/pytorch-lightning.svg)](https://badge.fury.io/py/pytorch-lightning)
[![PyPI Status](https://pepy.tech/badge/pytorch-lightning)](https://pepy.tech/project/pytorch-lightning)
-[![Coverage](docs/source/_static/images/coverage.svg)](https://github.com/PytorchLightning/pytorch-lightning/tree/master/tests#running-coverage)
+[![codecov](https://codecov.io/gh/PyTorchLightning/pytorch-lightning/branch/master/graph/badge.svg)](https://codecov.io/gh/PyTorchLightning/pytorch-lightning)
[![CodeFactor](https://www.codefactor.io/repository/github/pytorchlightning/pytorch-lightning/badge)](https://www.codefactor.io/repository/github/pytorchlightning/pytorch-lightning)
[![ReadTheDocs](https://readthedocs.org/projects/pytorch-lightning/badge/?version=0.7.1)](https://pytorch-lightning.readthedocs.io/en/0.7.1/)
diff --git a/docs/source/_static/images/coverage.svg b/docs/source/_static/images/coverage.svg
deleted file mode 100644
index 6bfc8faf24d3c2..00000000000000
--- a/docs/source/_static/images/coverage.svg
+++ /dev/null
@@ -1,21 +0,0 @@
-
-
diff --git a/docs/source/conf.py b/docs/source/conf.py
index d1c0b54b0a7f10..ce8508f18515df 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -297,7 +297,8 @@ def setup(app):
MOCK_REQUIRE_PACKAGES.append(pkg.rstrip())
# TODO: better parse from package since the import name and package name may differ
-MOCK_MANUAL_PACKAGES = ['torch', 'torchvision', 'test_tube', 'mlflow', 'comet_ml', 'wandb', 'neptune']
+MOCK_MANUAL_PACKAGES = ['torch', 'torchvision', 'test_tube',
+ 'mlflow', 'comet_ml', 'wandb', 'neptune', 'trains']
autodoc_mock_imports = MOCK_REQUIRE_PACKAGES + MOCK_MANUAL_PACKAGES
# for mod_name in MOCK_REQUIRE_PACKAGES:
# sys.modules[mod_name] = mock.Mock()
diff --git a/docs/source/early_stopping.rst b/docs/source/early_stopping.rst
index ce288d33bd4ab9..f729cfe12ca6d8 100644
--- a/docs/source/early_stopping.rst
+++ b/docs/source/early_stopping.rst
@@ -11,7 +11,7 @@ Enable Early Stopping
---------------------
There are two ways to enable early stopping.
-.. note:: See: :ref:`trainer`
+.. seealso:: :ref:`trainer`
.. code-block:: python
@@ -35,4 +35,4 @@ To disable early stopping pass ``False`` to the `early_stop_callback`.
Note that ``None`` will not disable early stopping but will lead to the
default behaviour.
-.. note:: See: :ref:`trainer`
+.. seealso:: :ref:`trainer`
diff --git a/docs/source/experiment_logging.rst b/docs/source/experiment_logging.rst
index 1edd067511b48c..a05329124c1250 100644
--- a/docs/source/experiment_logging.rst
+++ b/docs/source/experiment_logging.rst
@@ -7,7 +7,7 @@ Comet.ml
`Comet.ml `_ is a third-party logger.
To use CometLogger as your logger do the following.
-.. note:: See: :ref:`comet` docs.
+.. seealso:: :ref:`comet` docs.
.. code-block:: python
@@ -38,7 +38,7 @@ Neptune.ai
`Neptune.ai `_ is a third-party logger.
To use Neptune.ai as your logger do the following.
-.. note:: See: :ref:`neptune` docs.
+.. seealso:: :ref:`neptune` docs.
.. code-block:: python
@@ -62,12 +62,40 @@ The Neptune.ai is available anywhere except ``__init__`` in your LightningModule
some_img = fake_image()
self.logger.experiment.add_image('generated_images', some_img, 0)
+allegro.ai TRAINS
+^^^^^^^^^^^^^^^^^
+
+`allegro.ai `_ is a third-party logger.
+To use TRAINS as your logger do the following.
+
+.. seealso:: :ref:`trains` docs.
+
+.. code-block:: python
+
+ from pytorch_lightning.loggers import TrainsLogger
+
+ trains_logger = TrainsLogger(
+ project_name="examples",
+ task_name="pytorch lightning test"
+ )
+ trainer = Trainer(logger=trains_logger)
+
+The TrainsLogger is available anywhere in your LightningModule
+
+.. code-block:: python
+
+ class MyModule(pl.LightningModule):
+
+ def __init__(self, ...):
+ some_img = fake_image()
+ self.logger.log_image('debug', 'generated_image_0', some_img, 0)
+
Tensorboard
^^^^^^^^^^^
To use `Tensorboard `_ as your logger do the following.
-.. note:: See: TensorBoardLogger :ref:`tf-logger`
+.. seealso:: TensorBoardLogger :ref:`tf-logger`
.. code-block:: python
@@ -93,7 +121,7 @@ Test Tube
`Test Tube `_ is a tensorboard logger but with nicer file structure.
To use TestTube as your logger do the following.
-.. note:: See: TestTube :ref:`testTube`
+.. seealso:: TestTube :ref:`testTube`
.. code-block:: python
@@ -118,7 +146,7 @@ Wandb
`Wandb `_ is a third-party logger.
To use Wandb as your logger do the following.
-.. note:: See: :ref:`wandb` docs
+.. seealso:: :ref:`wandb` docs
.. code-block:: python
diff --git a/docs/source/experiment_reporting.rst b/docs/source/experiment_reporting.rst
index a738a234c96745..0063a92694128f 100644
--- a/docs/source/experiment_reporting.rst
+++ b/docs/source/experiment_reporting.rst
@@ -22,7 +22,7 @@ Control log writing frequency
Writing to a logger can be expensive. In Lightning you can set the interval at which you
want to log using this trainer flag.
-.. note:: See: :ref:`trainer`
+.. seealso:: :ref:`trainer`
.. code-block:: python
@@ -32,7 +32,7 @@ want to log using this trainer flag.
Log metrics
^^^^^^^^^^^
-To plot metrics into whatever logger you passed in (tensorboard, comet, neptune, etc...)
+To plot metrics into whatever logger you passed in (tensorboard, comet, neptune, TRAINS, etc...)
1. training_epoch_end, validation_epoch_end, test_epoch_end will all log anything in the "log" key of the return dict.
diff --git a/docs/source/fast_training.rst b/docs/source/fast_training.rst
index 5e7d1c599d1717..4e5c189d3aa218 100644
--- a/docs/source/fast_training.rst
+++ b/docs/source/fast_training.rst
@@ -16,7 +16,7 @@ Force training for min or max epochs
-------------------------------------
It can be useful to force training for a minimum number of epochs or limit to a max number.
-.. note:: See: :ref:`trainer`
+.. seealso:: :ref:`trainer`
.. code-block:: python
diff --git a/docs/source/introduction_guide.rst b/docs/source/introduction_guide.rst
index b048c07175a163..2071f6733710d2 100644
--- a/docs/source/introduction_guide.rst
+++ b/docs/source/introduction_guide.rst
@@ -472,7 +472,7 @@ First, change the runtime to TPU (and reinstall lightning).
Next, install the required xla library (adds support for PyTorch on TPUs)
-.. code-block:: python
+.. code-block::
import collections
from datetime import datetime, timedelta
diff --git a/docs/source/multi_gpu.rst b/docs/source/multi_gpu.rst
index fdd5a92b467080..baf99d87d1aaf7 100644
--- a/docs/source/multi_gpu.rst
+++ b/docs/source/multi_gpu.rst
@@ -38,7 +38,7 @@ This will make your code scale to any arbitrary number of GPUs or TPUs with Ligh
# with lightning
def forward(self, x):
z = torch.Tensor(2, 3)
- z = z.type_as(x.type())
+ z = z.type_as(x)
Remove samplers
^^^^^^^^^^^^^^^
diff --git a/docs/source/training_tricks.rst b/docs/source/training_tricks.rst
index 416fcdb8a8ec3d..2904c11ad1a1f4 100644
--- a/docs/source/training_tricks.rst
+++ b/docs/source/training_tricks.rst
@@ -7,7 +7,7 @@ Accumulate gradients
Accumulated gradients runs K small batches of size N before doing a backwards pass.
The effect is a large effective batch size of size KxN.
-.. note:: See: :ref:`trainer`
+.. seealso:: :ref:`trainer`
.. code-block:: python
@@ -20,7 +20,7 @@ Gradient Clipping
Gradient clipping may be enabled to avoid exploding gradients. Specifically, this will `clip the gradient
norm `_ computed over all model parameters together.
-.. note:: See: :ref:`trainer`
+.. seealso:: :ref:`trainer`
.. code-block:: python
diff --git a/environment.yml b/environment.yml
index 6dae860611b03c..1aa7c8f14820e7 100644
--- a/environment.yml
+++ b/environment.yml
@@ -32,3 +32,4 @@ dependencies:
- comet_ml>=1.0.56
- wandb>=0.8.21
- neptune-client>=0.4.4
+ - trains>=0.13.3
diff --git a/pytorch_lightning/core/__init__.py b/pytorch_lightning/core/__init__.py
index fac76ffd61f046..9cd75606b5c6fd 100644
--- a/pytorch_lightning/core/__init__.py
+++ b/pytorch_lightning/core/__init__.py
@@ -228,7 +228,7 @@ def training_step(self, batch, batch_idx):
# put the z on the appropriate gpu or tpu core
z = sample_noise()
- z = z.type_as(x.type())
+ z = z.type_as(x)
----------
diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
index 25dd9c02c4640e..5766dba56bceca 100644
--- a/pytorch_lightning/core/lightning.py
+++ b/pytorch_lightning/core/lightning.py
@@ -8,8 +8,8 @@
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
import torch
-import torch.distributed as dist
from torch import Tensor
+from torch.distributed import init_process_group
from torch.nn.parallel import DistributedDataParallel
from torch.optim import Adam
from torch.optim.optimizer import Optimizer
@@ -859,7 +859,7 @@ def init_ddp_connection(self):
root_node = self.trainer.resolve_root_node_address(root_node)
os.environ['MASTER_ADDR'] = root_node
- dist.init_process_group('nccl', rank=proc_rank, world_size=world_size)
+ init_process_group('nccl', rank=proc_rank, world_size=world_size)
def configure_apex(
self,
diff --git a/pytorch_lightning/loggers/__init__.py b/pytorch_lightning/loggers/__init__.py
index adcba876d26f5b..93d1b737aab9b6 100644
--- a/pytorch_lightning/loggers/__init__.py
+++ b/pytorch_lightning/loggers/__init__.py
@@ -119,3 +119,9 @@ def any_lightning_module_function_or_hook(...):
__all__.append('WandbLogger')
except ImportError:
pass
+
+try:
+ from .trains import TrainsLogger
+ __all__.append('TrainsLogger')
+except ImportError:
+ pass
diff --git a/pytorch_lightning/loggers/base.py b/pytorch_lightning/loggers/base.py
index 5295bee02fe8ff..8c3daa29cb96e5 100644
--- a/pytorch_lightning/loggers/base.py
+++ b/pytorch_lightning/loggers/base.py
@@ -4,6 +4,8 @@
from functools import wraps
from typing import Union, Optional, Dict, Iterable, Any, Callable, List
+import torch
+
def rank_zero_only(fn: Callable):
"""Decorate a logger method to run it only on the process with rank 0.
@@ -42,7 +44,8 @@ def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None):
"""
pass
- def _convert_params(self, params: Union[Dict[str, Any], Namespace]) -> Dict[str, Any]:
+ @staticmethod
+ def _convert_params(params: Union[Dict[str, Any], Namespace]) -> Dict[str, Any]:
# in case converting from namespace
if isinstance(params, Namespace):
params = vars(params)
@@ -52,6 +55,29 @@ def _convert_params(self, params: Union[Dict[str, Any], Namespace]) -> Dict[str,
return params
+ @staticmethod
+ def _sanitize_params(params: Dict[str, Any]) -> Dict[str, Any]:
+ """Returns params with non-primitvies converted to strings for logging
+
+ >>> params = {"float": 0.3,
+ ... "int": 1,
+ ... "string": "abc",
+ ... "bool": True,
+ ... "list": [1, 2, 3],
+ ... "namespace": Namespace(foo=3),
+ ... "layer": torch.nn.BatchNorm1d}
+ >>> import pprint
+ >>> pprint.pprint(LightningLoggerBase._sanitize_params(params)) # doctest: +NORMALIZE_WHITESPACE
+ {'bool': True,
+ 'float': 0.3,
+ 'int': 1,
+ 'layer': "",
+ 'list': '[1, 2, 3]',
+ 'namespace': 'Namespace(foo=3)',
+ 'string': 'abc'}
+ """
+ return {k: v if type(v) in [bool, int, float, str, torch.Tensor] else str(v) for k, v in params.items()}
+
@abstractmethod
def log_hyperparams(self, params: argparse.Namespace):
"""Record hyperparameters.
diff --git a/pytorch_lightning/loggers/neptune.py b/pytorch_lightning/loggers/neptune.py
index 5e011d7426424d..372d215c34df2e 100644
--- a/pytorch_lightning/loggers/neptune.py
+++ b/pytorch_lightning/loggers/neptune.py
@@ -1,5 +1,5 @@
"""
-Log using `neptune-logger `_
+Log using `neptune-logger `_
.. _neptune:
@@ -30,12 +30,13 @@ class NeptuneLogger(LightningLoggerBase):
"""
def __init__(self, api_key: Optional[str] = None, project_name: Optional[str] = None,
- offline_mode: bool = False, experiment_name: Optional[str] = None,
+ close_after_fit: Optional[bool] = True, offline_mode: bool = False,
+ experiment_name: Optional[str] = None,
upload_source_files: Optional[List[str]] = None, params: Optional[Dict[str, Any]] = None,
properties: Optional[Dict[str, Any]] = None, tags: Optional[List[str]] = None, **kwargs):
r"""
- Initialize a neptune.ml logger.
+ Initialize a neptune.ai logger.
.. note:: Requires either an API Key (online mode) or a local directory path (offline mode)
@@ -44,10 +45,11 @@ def __init__(self, api_key: Optional[str] = None, project_name: Optional[str] =
# ONLINE MODE
from pytorch_lightning.loggers import NeptuneLogger
# arguments made to NeptuneLogger are passed on to the neptune.experiments.Experiment class
+ # We are using an api_key for the anonymous user "neptuner" but you can use your own.
neptune_logger = NeptuneLogger(
- api_key=os.environ["NEPTUNE_API_TOKEN"],
- project_name="USER_NAME/PROJECT_NAME",
+ api_key="ANONYMOUS"
+ project_name="shared/pytorch-lightning-integration",
experiment_name="default", # Optional,
params={"max_epochs": 10}, # Optional,
tags=["pytorch-lightning","mlp"] # Optional,
@@ -85,40 +87,91 @@ def any_lightning_module_function_or_hook(...):
self.logger.experiment.log_artifact("model_checkpoint.pt", prediction_image) # log model checkpoint
self.logger.experiment.whatever_neptune_supports(...)
+ If you want to log objects after the training is finished use close_after_train=False:
+
+ .. code-block:: python
+
+ neptune_logger = NeptuneLogger(
+ ...
+ close_after_fit=False,
+ ...)
+ trainer = Trainer(logger=neptune_logger)
+ trainer.fit()
+
+ # Log test metrics
+ trainer.test(model)
+
+ # Log additional metrics
+ from sklearn.metrics import accuracy_score
+
+ accuracy = accuracy_score(y_true, y_pred)
+ neptune_logger.experiment.log_metric('test_accuracy', accuracy)
+
+ # Log charts
+ from scikitplot.metrics import plot_confusion_matrix
+ import matplotlib.pyplot as plt
+
+ fig, ax = plt.subplots(figsize=(16, 12))
+ plot_confusion_matrix(y_true, y_pred, ax=ax)
+ neptune_logger.experiment.log_image('confusion_matrix', fig)
+
+ # Save checkpoints folder
+ neptune_logger.experiment.log_artifact('my/checkpoints')
+
+ # When you are done, stop the experiment
+ neptune_logger.experiment.stop()
+
+ You can go and see an example experiment here:
+ https://ui.neptune.ai/o/shared/org/pytorch-lightning-integration/e/PYTOR-66/charts
+
Args:
- api_key (str | None): Required in online mode. Neputne API token, found on https://neptune.ml.
+ api_key: Required in online mode.
+ Neputne API token, found on https://neptune.ai
Read how to get your API key
- https://docs.neptune.ml/python-api/tutorials/get-started.html#copy-api-token.
- project_name (str): Required in online mode. Qualified name of a project in a form of
+ https://docs.neptune.ai/python-api/tutorials/get-started.html#copy-api-token.
+ It is recommended to keep it in the `NEPTUNE_API_TOKEN`
+ environment variable and then you can leave `api_key=None`
+ project_name: Required in online mode. Qualified name of a project in a form of
"namespace/project_name" for example "tom/minst-classification".
If None, the value of NEPTUNE_PROJECT environment variable will be taken.
- You need to create the project in https://neptune.ml first.
- offline_mode (bool): Optional default False. If offline_mode=True no logs will be send to neptune.
- Usually used for debug purposes.
- experiment_name (str|None): Optional. Editable name of the experiment.
- Name is displayed in the experiment’s Details (Metadata section) and in experiments view as a column.
- upload_source_files (list|None): Optional. List of source files to be uploaded.
- Must be list of str or single str. Uploaded sources are displayed in the experiment’s Source code tab.
+ You need to create the project in https://neptune.ai first.
+ offline_mode: Optional default False. If offline_mode=True no logs will be send
+ to neptune. Usually used for debug purposes.
+ close_after_fit: Optional default True. If close_after_fit=False the experiment
+ will not be closed after training and additional metrics,
+ images or artifacts can be logged. Also, remember to close the experiment explicitly
+ by running neptune_logger.experiment.stop().
+ experiment_name: Optional. Editable name of the experiment.
+ Name is displayed in the experiment’s Details (Metadata section) and i
+ n experiments view as a column.
+ upload_source_files: Optional. List of source files to be uploaded.
+ Must be list of str or single str. Uploaded sources are displayed
+ in the experiment’s Source code tab.
If None is passed, Python file from which experiment was created will be uploaded.
- Pass empty list ([]) to upload no files. Unix style pathname pattern expansion is supported.
+ Pass empty list ([]) to upload no files.
+ Unix style pathname pattern expansion is supported.
For example, you can pass '\*.py'
to upload all python source files from the current directory.
For recursion lookup use '\**/\*.py' (for Python 3.5 and later).
For more information see glob library.
- params (dict|None): Optional. Parameters of the experiment. After experiment creation params are read-only.
- Parameters are displayed in the experiment’s Parameters section and each key-value pair can be
- viewed in experiments view as a column.
- properties (dict|None): Optional default is {}. Properties of the experiment.
- They are editable after experiment is created. Properties are displayed in the experiment’s Details and
+ params: Optional. Parameters of the experiment.
+ After experiment creation params are read-only.
+ Parameters are displayed in the experiment’s Parameters section and
+ each key-value pair can be viewed in experiments view as a column.
+ properties: Optional default is {}. Properties of the experiment.
+ They are editable after experiment is created.
+ Properties are displayed in the experiment’s Details and
each key-value pair can be viewed in experiments view as a column.
- tags (list|None): Optional default []. Must be list of str. Tags of the experiment.
+ tags: Optional default []. Must be list of str. Tags of the experiment.
They are editable after experiment is created (see: append_tag() and remove_tag()).
- Tags are displayed in the experiment’s Details and can be viewed in experiments view as a column.
+ Tags are displayed in the experiment’s Details and can be viewed
+ in experiments view as a column.
"""
super().__init__()
self.api_key = api_key
self.project_name = project_name
self.offline_mode = offline_mode
+ self.close_after_fit = close_after_fit
self.experiment_name = experiment_name
self.upload_source_files = upload_source_files
self.params = params
@@ -138,6 +191,12 @@ def any_lightning_module_function_or_hook(...):
log.info(f'NeptuneLogger was initialized in {self.mode} mode')
+ def __getstate__(self):
+ state = self.__dict__.copy()
+ # cannot be pickled
+ state['_experiment'] = None
+ return state
+
@property
def experiment(self) -> Experiment:
r"""
@@ -150,15 +209,14 @@ def experiment(self) -> Experiment:
"""
- if self._experiment is not None:
- return self._experiment
- else:
- self._experiment = neptune.create_experiment(name=self.experiment_name,
- params=self.params,
- properties=self.properties,
- tags=self.tags,
- upload_source_files=self.upload_source_files,
- **self._kwargs)
+ if self._experiment is None:
+ self._experiment = neptune.create_experiment(
+ name=self.experiment_name,
+ params=self.params,
+ properties=self.properties,
+ tags=self.tags,
+ upload_source_files=self.upload_source_files,
+ **self._kwargs)
return self._experiment
@rank_zero_only
@@ -184,7 +242,8 @@ def log_metrics(
@rank_zero_only
def finalize(self, status: str) -> None:
- self.experiment.stop()
+ if self.close_after_fit:
+ self.experiment.stop()
@property
def name(self) -> str:
diff --git a/pytorch_lightning/loggers/tensorboard.py b/pytorch_lightning/loggers/tensorboard.py
index 9be1d82b7669a0..662ecdf4af4959 100644
--- a/pytorch_lightning/loggers/tensorboard.py
+++ b/pytorch_lightning/loggers/tensorboard.py
@@ -101,6 +101,7 @@ def experiment(self) -> SummaryWriter:
@rank_zero_only
def log_hyperparams(self, params: Union[Dict[str, Any], Namespace]) -> None:
params = self._convert_params(params)
+ sanitized_params = self._sanitize_params(params)
if parse_version(torch.__version__) < parse_version("1.3.0"):
warn(
@@ -110,13 +111,14 @@ def log_hyperparams(self, params: Union[Dict[str, Any], Namespace]) -> None:
)
else:
from torch.utils.tensorboard.summary import hparams
- exp, ssi, sei = hparams(params, {})
+ exp, ssi, sei = hparams(sanitized_params, {})
writer = self.experiment._get_file_writer()
writer.add_summary(exp)
writer.add_summary(ssi)
writer.add_summary(sei)
+
# some alternative should be added
- self.tags.update(params)
+ self.tags.update(sanitized_params)
@rank_zero_only
def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None:
diff --git a/pytorch_lightning/loggers/trains.py b/pytorch_lightning/loggers/trains.py
new file mode 100644
index 00000000000000..7d2bd01e35d8bc
--- /dev/null
+++ b/pytorch_lightning/loggers/trains.py
@@ -0,0 +1,282 @@
+"""
+Log using `allegro.ai TRAINS '_
+
+.. code-block:: python
+
+ from pytorch_lightning.loggers import TrainsLogger
+ trains_logger = TrainsLogger(
+ project_name="pytorch lightning",
+ task_name="default",
+ )
+ trainer = Trainer(logger=trains_logger)
+
+
+Use the logger anywhere in you LightningModule as follows:
+
+.. code-block:: python
+
+ def train_step(...):
+ # example
+ self.logger.experiment.whatever_trains_supports(...)
+
+ def any_lightning_module_function_or_hook(...):
+ self.logger.experiment.whatever_trains_supports(...)
+
+"""
+
+import logging as log
+from argparse import Namespace
+from pathlib import Path
+from typing import Any, Dict, Optional, Union
+
+import numpy as np
+import torch
+
+try:
+ import trains
+except ImportError:
+ raise ImportError('You want to use `TRAINS` logger which is not installed yet,'
+ ' install it with `pip install trains`.')
+
+from .base import LightningLoggerBase, rank_zero_only
+
+
+class TrainsLogger(LightningLoggerBase):
+ """Logs using TRAINS
+
+ Args:
+ project_name: The name of the experiment's project. Defaults to None.
+ task_name: The name of the experiment. Defaults to None.
+ task_type: The name of the experiment. Defaults to 'training'.
+ reuse_last_task_id: Start with the previously used task id. Defaults to True.
+ output_uri: Default location for output models. Defaults to None.
+ auto_connect_arg_parser: Automatically grab the ArgParser
+ and connect it with the task. Defaults to True.
+ auto_connect_frameworks: If True, automatically patch to trains backend. Defaults to True.
+ auto_resource_monitoring: If true, machine vitals will be
+ sent along side the task scalars. Defaults to True.
+ """
+
+ def __init__(
+ self, project_name: Optional[str] = None, task_name: Optional[str] = None,
+ task_type: str = 'training', reuse_last_task_id: bool = True,
+ output_uri: Optional[str] = None, auto_connect_arg_parser: bool = True,
+ auto_connect_frameworks: bool = True, auto_resource_monitoring: bool = True) -> None:
+ super().__init__()
+ self._trains = trains.Task.init(
+ project_name=project_name, task_name=task_name, task_type=task_type,
+ reuse_last_task_id=reuse_last_task_id, output_uri=output_uri,
+ auto_connect_arg_parser=auto_connect_arg_parser,
+ auto_connect_frameworks=auto_connect_frameworks,
+ auto_resource_monitoring=auto_resource_monitoring
+ )
+
+ @property
+ def experiment(self) -> trains.Task:
+ r"""Actual TRAINS object. To use TRAINS features do the following.
+
+ Example:
+ .. code-block:: python
+
+ self.logger.experiment.some_trains_function()
+
+ """
+ return self._trains
+
+ @property
+ def id(self) -> Union[str, None]:
+ """
+ ID is a uuid (string) representing this specific experiment in the entire system.
+ """
+ if not self._trains:
+ return None
+ return self._trains.id
+
+ @rank_zero_only
+ def log_hyperparams(self, params: Union[Dict[str, Any], Namespace]) -> None:
+ """Log hyperparameters (numeric values) in TRAINS experiments
+
+ Args:
+ params:
+ The hyperparameters that passed through the model.
+ """
+ if not self._trains:
+ return None
+ if not params:
+ return
+ if isinstance(params, dict):
+ self._trains.connect(params)
+ else:
+ self._trains.connect(vars(params))
+
+ @rank_zero_only
+ def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None:
+ """Log metrics (numeric values) in TRAINS experiments.
+ This method will be called by Trainer.
+
+ Args:
+ metrics:
+ The dictionary of the metrics.
+ If the key contains "/", it will be split by the delimiter,
+ then the elements will be logged as "title" and "series" respectively.
+ step: Step number at which the metrics should be recorded. Defaults to None.
+ """
+ if not self._trains:
+ return None
+
+ if not step:
+ step = self._trains.get_last_iteration()
+
+ for k, v in metrics.items():
+ if isinstance(v, str):
+ log.warning("Discarding metric with string value {}={}".format(k, v))
+ continue
+ if isinstance(v, torch.Tensor):
+ v = v.item()
+ parts = k.split('/')
+ if len(parts) <= 1:
+ series = title = k
+ else:
+ title = parts[0]
+ series = '/'.join(parts[1:])
+ self._trains.get_logger().report_scalar(
+ title=title, series=series, value=v, iteration=step)
+
+ @rank_zero_only
+ def log_metric(self, title: str, series: str, value: float, step: Optional[int] = None) -> None:
+ """Log metrics (numeric values) in TRAINS experiments.
+ This method will be called by the users.
+
+ Args:
+ title: The title of the graph to log, e.g. loss, accuracy.
+ series: The series name in the graph, e.g. classification, localization.
+ value: The value to log.
+ step: Step number at which the metrics should be recorded. Defaults to None.
+ """
+ if not self._trains:
+ return None
+
+ if not step:
+ step = self._trains.get_last_iteration()
+
+ if isinstance(value, torch.Tensor):
+ value = value.item()
+ self._trains.get_logger().report_scalar(
+ title=title, series=series, value=value, iteration=step)
+
+ @rank_zero_only
+ def log_text(self, text: str) -> None:
+ """Log console text data in TRAINS experiment
+
+ Args:
+ text: The value of the log (data-point).
+ """
+ if not self._trains:
+ return None
+
+ self._trains.get_logger().report_text(text)
+
+ @rank_zero_only
+ def log_image(
+ self, title: str, series: str,
+ image: Union[str, np.ndarray, 'PIL.Image', torch.Tensor],
+ step: Optional[int] = None) -> None:
+ """Log Debug image in TRAINS experiment
+
+ Args:
+ title: The title of the debug image, i.e. "failed", "passed".
+ series: The series name of the debug image, i.e. "Image 0", "Image 1".
+ image:
+ Debug image to log. Can be one of the following types:
+ Torch, Numpy, PIL image, path to image file (str)
+ If Numpy or Torch, the image is assume to be the following:
+ shape: CHW
+ color space: RGB
+ value range: [0., 1.] (float) or [0, 255] (uint8)
+ step:
+ Step number at which the metrics should be recorded. Defaults to None.
+ """
+ if not self._trains:
+ return None
+
+ if not step:
+ step = self._trains.get_last_iteration()
+
+ if isinstance(image, str):
+ self._trains.get_logger().report_image(
+ title=title, series=series, local_path=image, iteration=step)
+ else:
+ if isinstance(image, torch.Tensor):
+ image = image.cpu().numpy()
+ if isinstance(image, np.ndarray):
+ image = image.transpose(1, 2, 0)
+ self._trains.get_logger().report_image(
+ title=title, series=series, image=image, iteration=step)
+
+ @rank_zero_only
+ def log_artifact(
+ self, name: str,
+ artifact: Union[str, Path, Dict[str, Any], 'pandas.DataFrame', 'numpy.ndarray', 'PIL.Image.Image'],
+ metadata: Optional[Dict[str, Any]] = None, delete_after_upload: bool = False) -> None:
+ """Save an artifact (file/object) in TRAINS experiment storage.
+
+ Args:
+ name: Artifact name. Notice! it will override previous artifact
+ if name already exists
+ artifact: Artifact object to upload. Currently supports:
+ - string / pathlib2.Path are treated as path to artifact file to upload
+ If wildcard or a folder is passed, zip file containing the
+ local files will be created and uploaded
+ - dict will be stored as .json file and uploaded
+ - pandas.DataFrame will be stored as .csv.gz (compressed CSV file) and uploaded
+ - numpy.ndarray will be stored as .npz and uploaded
+ - PIL.Image will be stored to .png file and uploaded
+ metadata:
+ Simple key/value dictionary to store on the artifact. Defaults to None.
+ delete_after_upload:
+ If True local artifact will be deleted (only applies if artifact_object is a
+ local file). Defaults to False.
+ """
+ if not self._trains:
+ return None
+
+ self._trains.upload_artifact(
+ name=name, artifact_object=artifact, metadata=metadata,
+ delete_after_upload=delete_after_upload
+ )
+
+ def save(self) -> None:
+ pass
+
+ @rank_zero_only
+ def finalize(self, status: str) -> None:
+ if not self._trains:
+ return None
+ self._trains.close()
+ self._trains = None
+
+ @property
+ def name(self) -> Union[str, None]:
+ """
+ Name is a human readable non-unique name (str) of the experiment.
+ """
+ if not self._trains:
+ return None
+ return self._trains.name
+
+ @property
+ def version(self) -> Union[str, None]:
+ if not self._trains:
+ return None
+ return self._trains.id
+
+ def __getstate__(self) -> Union[str, None]:
+ if not self._trains:
+ return None
+ return self._trains.id
+
+ def __setstate__(self, state: str) -> None:
+ self._rank = 0
+ self._trains = None
+ if state:
+ self._trains = trains.Task.get_task(task_id=state)
diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py
index 7668a1f6de4084..826bd4eded8d33 100644
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@@ -1,3 +1,4 @@
+import inspect
import logging as log
import os
import sys
@@ -437,8 +438,6 @@ def slurm_job_id(self) -> int:
@classmethod
def default_attributes(cls):
- import inspect
-
init_signature = inspect.signature(Trainer)
args = {}
@@ -708,8 +707,8 @@ def configure_schedulers(self, schedulers: list):
if 'scheduler' not in scheduler:
raise ValueError(f'Lr scheduler should have key `scheduler`',
' with item being a lr scheduler')
- scheduler['reduce_on_plateau'] = \
- isinstance(scheduler, optim.lr_scheduler.ReduceLROnPlateau)
+ scheduler['reduce_on_plateau'] = isinstance(
+ scheduler['scheduler'], optim.lr_scheduler.ReduceLROnPlateau)
lr_schedulers.append({**default_config, **scheduler})
diff --git a/requirements-extra.txt b/requirements-extra.txt
index dd153091052e85..1265bc654a6e4e 100644
--- a/requirements-extra.txt
+++ b/requirements-extra.txt
@@ -2,4 +2,5 @@ neptune-client>=0.4.4
comet-ml>=1.0.56
mlflow>=1.0.0
test_tube>=0.7.5
-wandb>=0.8.21
\ No newline at end of file
+wandb>=0.8.21
+trains>=0.13.3
diff --git a/tests/loggers/test_neptune.py b/tests/loggers/test_neptune.py
index 6130bfb532c7bd..5c2ab5b52029a3 100644
--- a/tests/loggers/test_neptune.py
+++ b/tests/loggers/test_neptune.py
@@ -1,5 +1,6 @@
import pickle
-from unittest.mock import patch
+
+from unittest.mock import patch, MagicMock
import torch
@@ -96,3 +97,31 @@ def test_neptune_pickle(tmpdir):
pkl_bytes = pickle.dumps(trainer)
trainer2 = pickle.loads(pkl_bytes)
trainer2.logger.log_metrics({'acc': 1.0})
+
+
+def test_neptune_leave_open_experiment_after_fit(tmpdir):
+ """Verify that neptune experiment was closed after training"""
+ tutils.reset_seed()
+
+ hparams = tutils.get_hparams()
+ model = LightningTestModel(hparams)
+
+ def _run_training(logger):
+ logger._experiment = MagicMock()
+
+ trainer_options = dict(
+ default_save_path=tmpdir,
+ max_epochs=1,
+ train_percent_check=0.05,
+ logger=logger
+ )
+ trainer = Trainer(**trainer_options)
+ trainer.fit(model)
+ return logger
+
+ logger_close_after_fit = _run_training(NeptuneLogger(offline_mode=True))
+ assert logger_close_after_fit._experiment.stop.call_count == 1
+
+ logger_open_after_fit = _run_training(
+ NeptuneLogger(offline_mode=True, close_after_fit=False))
+ assert logger_open_after_fit._experiment.stop.call_count == 0
diff --git a/tests/loggers/test_tensorboard.py b/tests/loggers/test_tensorboard.py
index b3f3d19242c8c1..e815384011f982 100644
--- a/tests/loggers/test_tensorboard.py
+++ b/tests/loggers/test_tensorboard.py
@@ -1,4 +1,5 @@
import pickle
+from argparse import Namespace
import pytest
import torch
@@ -108,6 +109,9 @@ def test_tensorboard_log_hyperparams(tmpdir):
"float": 0.3,
"int": 1,
"string": "abc",
- "bool": True
+ "bool": True,
+ "list": [1, 2, 3],
+ "namespace": Namespace(foo=3),
+ "layer": torch.nn.BatchNorm1d
}
logger.log_hyperparams(hparams)
diff --git a/tests/loggers/test_trains.py b/tests/loggers/test_trains.py
new file mode 100644
index 00000000000000..1c8ca4167462a4
--- /dev/null
+++ b/tests/loggers/test_trains.py
@@ -0,0 +1,48 @@
+import pickle
+
+import tests.models.utils as tutils
+from pytorch_lightning import Trainer
+from pytorch_lightning.loggers import TrainsLogger
+from tests.models import LightningTestModel
+
+
+def test_trains_logger(tmpdir):
+ """Verify that basic functionality of TRAINS logger works."""
+ tutils.reset_seed()
+
+ hparams = tutils.get_hparams()
+ model = LightningTestModel(hparams)
+ logger = TrainsLogger(project_name="examples", task_name="pytorch lightning test")
+
+ trainer_options = dict(
+ default_save_path=tmpdir,
+ max_epochs=1,
+ train_percent_check=0.05,
+ logger=logger
+ )
+ trainer = Trainer(**trainer_options)
+ result = trainer.fit(model)
+
+ print('result finished')
+ assert result == 1, "Training failed"
+
+
+def test_trains_pickle(tmpdir):
+ """Verify that pickling trainer with TRAINS logger works."""
+ tutils.reset_seed()
+
+ # hparams = tutils.get_hparams()
+ # model = LightningTestModel(hparams)
+
+ logger = TrainsLogger(project_name="examples", task_name="pytorch lightning test")
+
+ trainer_options = dict(
+ default_save_path=tmpdir,
+ max_epochs=1,
+ logger=logger
+ )
+
+ trainer = Trainer(**trainer_options)
+ pkl_bytes = pickle.dumps(trainer)
+ trainer2 = pickle.loads(pkl_bytes)
+ trainer2.logger.log_metrics({"acc": 1.0})
diff --git a/tests/models/__init__.py b/tests/models/__init__.py
index 4992e70a2ec30f..67206a63d0fe6b 100644
--- a/tests/models/__init__.py
+++ b/tests/models/__init__.py
@@ -24,7 +24,8 @@
LightInfTestDataloader,
LightTestOptimizerWithSchedulingMixin,
LightTestMultipleOptimizersWithSchedulingMixin,
- LightTestOptimizersWithMixedSchedulingMixin
+ LightTestOptimizersWithMixedSchedulingMixin,
+ LightTestReduceLROnPlateauMixin
)
diff --git a/tests/models/mixins.py b/tests/models/mixins.py
index fd3f0ddea1b9f4..0be691726e209c 100644
--- a/tests/models/mixins.py
+++ b/tests/models/mixins.py
@@ -678,6 +678,16 @@ def configure_optimizers(self):
[{'scheduler': lr_scheduler1, 'interval': 'step'}, lr_scheduler2]
+class LightTestReduceLROnPlateauMixin:
+ def configure_optimizers(self):
+ if self.hparams.optimizer_name == 'lbfgs':
+ optimizer = optim.LBFGS(self.parameters(), lr=self.hparams.learning_rate)
+ else:
+ optimizer = optim.Adam(self.parameters(), lr=self.hparams.learning_rate)
+ lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer)
+ return [optimizer], [lr_scheduler]
+
+
def _get_output_metric(output, name):
if isinstance(output, dict):
val = output[name]
diff --git a/tests/trainer/test_optimizers.py b/tests/trainer/test_optimizers.py
index bc5dde5f75013f..3ea0e3ff2aab76 100644
--- a/tests/trainer/test_optimizers.py
+++ b/tests/trainer/test_optimizers.py
@@ -10,9 +10,12 @@
from tests.models import (
TestModelBase,
LightTrainDataloader,
+ LightValidationStepMixin,
+ LightValidationMixin,
LightTestOptimizerWithSchedulingMixin,
LightTestMultipleOptimizersWithSchedulingMixin,
- LightTestOptimizersWithMixedSchedulingMixin
+ LightTestOptimizersWithMixedSchedulingMixin,
+ LightTestReduceLROnPlateauMixin
)
@@ -144,3 +147,35 @@ class CurrentTestModel(
# Called every 3 steps, meaning for 1 epoch of 11 batches, it is called 3 times
assert init_lr * 0.1 == adjusted_lr2, \
'lr for optimizer 2 not adjusted correctly'
+
+
+def test_reduce_lr_on_plateau_scheduling(tmpdir):
+ tutils.reset_seed()
+
+ class CurrentTestModel(
+ LightTestReduceLROnPlateauMixin,
+ LightTrainDataloader,
+ LightValidationMixin,
+ LightValidationStepMixin,
+ TestModelBase):
+ pass
+
+ hparams = tutils.get_hparams()
+ model = CurrentTestModel(hparams)
+
+ # logger file to get meta
+ trainer_options = dict(
+ default_save_path=tmpdir,
+ max_epochs=1,
+ val_percent_check=0.1,
+ train_percent_check=0.2
+ )
+
+ # fit model
+ trainer = Trainer(**trainer_options)
+ results = trainer.fit(model)
+
+ assert trainer.lr_schedulers[0] == \
+ dict(scheduler=trainer.lr_schedulers[0]['scheduler'], monitor='val_loss',
+ interval='epoch', frequency=1, reduce_on_plateau=True), \
+ 'lr schduler was not correctly converted to dict'
diff --git a/tox.ini b/tox.ini
index 3243beb420a459..f181bc64dd14d2 100644
--- a/tox.ini
+++ b/tox.ini
@@ -38,8 +38,8 @@ commands =
pip list
check-manifest
python setup.py check --metadata --strict
- flake8 .
- coverage run --source pytorch_lightning -m py.test pytorch_lightning tests pl_examples -v --doctest-modules
+ coverage run --source pytorch_lightning -m py.test pytorch_lightning tests -v --doctest-modules
+ coverage report
python setup.py sdist
twine check dist/*