Merge branch 'master' into ckpt-fix

Lightning-AI · Mar 16, 2020 · 8b76a28 · 8b76a28
2 parents 7799df8 + 384e124
commit 8b76a28
Show file tree

Hide file tree

Showing 30 changed files with 618 additions and 93 deletions.
diff --git a/.codecov.yml b/.codecov.yml
@@ -2,9 +2,15 @@
 # Validation check:
 # $ curl --data-binary @.codecov.yml https://codecov.io/validate
 
+
+# https://docs.codecov.io/docs/codecovyml-reference
 codecov:
+  bot: "codecov-io"
+  strict_yaml_branch: "yaml-config"
+  require_ci_to_pass: yes
   notify:
-    require_ci_to_pass: yes
+    # after_n_builds: 2
+    wait_for_ci: yes
 
 coverage:
   precision: 0  # 2 = xx.xx%, 0 = xx%
@@ -16,15 +22,15 @@ coverage:
       default:
         against: auto
         target: 99% # specify the target coverage for each commit status
-        threshold: 20% # allow this little decrease on project
+        threshold: 30% # allow this little decrease on project
         # https://github.com/codecov/support/wiki/Filtering-Branches
         # branches: master
         if_ci_failed: error
     # https://github.com/codecov/support/wiki/Patch-Status
     patch:
       default:
         against: auto
-        target: 40% # specify the target "X%" coverage to hit
+        target: 50% # specify the target "X%" coverage to hit
         # threshold: 50% # allow this much decrease on patch
     changes: false
 

diff --git a/.drone.yml b/.drone.yml
@@ -7,17 +7,20 @@ name: torch-GPU
 steps:
 - name: testing
   image: nvcr.io/nvidia/pytorch:20.02-py3
+  environment:
+    SLURM_LOCALID: 0
+    CODECOV_TOKEN:
+      from_secret: codecov_token
   commands:
     - python --version
     - pip install pip -U
     - pip --version
     - nvidia-smi
     #- pip install torch==1.3
     - pip install -r requirements.txt --user
-    - pip install coverage pytest pytest-cov pytest-flake8
+    - pip install coverage pytest pytest-cov pytest-flake8 codecov
     - pip install -r ./tests/requirements.txt --user
     - pip list
-    - export SLURM_LOCALID=0
     - python -c "import torch ; print(' & '.join([torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())]) if torch.cuda.is_available() else 'only CPU')"
     - coverage run --source pytorch_lightning -m py.test pytorch_lightning tests pl_examples -v --doctest-modules #  --flake8
-    - coverage report
+    - codecov --token $CODECOV_TOKEN  # --pr $DRONE_PULL_REQUEST --build $DRONE_BUILD_NUMBER --branch $DRONE_BRANCH --commit $DRONE_COMMIT --tag $DRONE_TAG
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,8 +8,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Added
 
+- Added `TrainsLogger` class ([#1122](https://github.com/PyTorchLightning/pytorch-lightning/pull/1122))
 - Added type hints to `pytorch_lightning.core` ([#946](https://github.com/PyTorchLightning/pytorch-lightning/pull/946))
 - Added support for IterableDataset in validation and testing ([#1104](https://github.com/PyTorchLightning/pytorch-lightning/pull/1104))
+- Added support for non-primitive types in hparams for TensorboardLogger ([#1130](https://github.com/PyTorchLightning/pytorch-lightning/pull/1130))
+
 
 ### Changed
 
@@ -25,6 +28,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Fixed
 
+- Fixed bug related to type cheking of `ReduceLROnPlateau` lr schedulers([#1114](https://github.com/PyTorchLightning/pytorch-lightning/issues/1114))
 - Fixed a bug to ensure lightning checkpoints to be backward compatible ([#1132](https://github.com/PyTorchLightning/pytorch-lightning/pull/1132))
 
 ## [0.7.1] - 2020-03-07
@@ -65,6 +69,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Changed
 
+- Improved `NeptuneLogger` by adding `close_after_fit` argument to allow logging after training([#908](https://github.com/PyTorchLightning/pytorch-lightning/pull/1084))
 - Changed default TQDM to use `tqdm.auto` for prettier outputs in IPython notebooks ([#752](https://github.com/PyTorchLightning/pytorch-lightning/pull/752))
 - Changed `pytorch_lightning.logging` to `pytorch_lightning.loggers` ([#767](https://github.com/PyTorchLightning/pytorch-lightning/pull/767))
 - Moved the default `tqdm_dict` definition from Trainer to `LightningModule`, so it can be overridden by the user ([#749](https://github.com/PyTorchLightning/pytorch-lightning/pull/749))

diff --git a/README.md b/README.md
@@ -9,7 +9,7 @@
 
 [![PyPI Status](https://badge.fury.io/py/pytorch-lightning.svg)](https://badge.fury.io/py/pytorch-lightning)
 [![PyPI Status](https://pepy.tech/badge/pytorch-lightning)](https://pepy.tech/project/pytorch-lightning)
-[![Coverage](docs/source/_static/images/coverage.svg)](https://github.com/PytorchLightning/pytorch-lightning/tree/master/tests#running-coverage)
+[![codecov](https://codecov.io/gh/PyTorchLightning/pytorch-lightning/branch/master/graph/badge.svg)](https://codecov.io/gh/PyTorchLightning/pytorch-lightning)
 [![CodeFactor](https://www.codefactor.io/repository/github/pytorchlightning/pytorch-lightning/badge)](https://www.codefactor.io/repository/github/pytorchlightning/pytorch-lightning)
 
 [![ReadTheDocs](https://readthedocs.org/projects/pytorch-lightning/badge/?version=0.7.1)](https://pytorch-lightning.readthedocs.io/en/0.7.1/)

diff --git a/docs/source/_static/images/coverage.svg b/docs/source/_static/images/coverage.svg
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -297,7 +297,8 @@ def setup(app):
             MOCK_REQUIRE_PACKAGES.append(pkg.rstrip())
 
 # TODO: better parse from package since the import name and package name may differ
-MOCK_MANUAL_PACKAGES = ['torch', 'torchvision', 'test_tube', 'mlflow', 'comet_ml', 'wandb', 'neptune']
+MOCK_MANUAL_PACKAGES = ['torch', 'torchvision', 'test_tube',
+                        'mlflow', 'comet_ml', 'wandb', 'neptune', 'trains']
 autodoc_mock_imports = MOCK_REQUIRE_PACKAGES + MOCK_MANUAL_PACKAGES
 # for mod_name in MOCK_REQUIRE_PACKAGES:
 #     sys.modules[mod_name] = mock.Mock()

diff --git a/docs/source/early_stopping.rst b/docs/source/early_stopping.rst
@@ -11,7 +11,7 @@ Enable Early Stopping
 ---------------------
 There are two ways to enable early stopping.
 
-.. note:: See: :ref:`trainer`
+.. seealso:: :ref:`trainer`
 
 .. code-block:: python
 
@@ -35,4 +35,4 @@ To disable early stopping pass ``False`` to the `early_stop_callback`.
 Note that ``None`` will not disable early stopping but will lead to the
 default behaviour.
 
-.. note:: See: :ref:`trainer`
+.. seealso:: :ref:`trainer`
diff --git a/docs/source/experiment_logging.rst b/docs/source/experiment_logging.rst
@@ -7,7 +7,7 @@ Comet.ml
 `Comet.ml <https://www.comet.ml/site/>`_ is a third-party logger.
 To use CometLogger as your logger do the following.
 
-.. note:: See: :ref:`comet` docs.
+.. seealso:: :ref:`comet` docs.
 
 .. code-block:: python
 
@@ -38,7 +38,7 @@ Neptune.ai
 `Neptune.ai <https://neptune.ai/>`_ is a third-party logger.
 To use Neptune.ai as your logger do the following.
 
-.. note:: See: :ref:`neptune` docs.
+.. seealso:: :ref:`neptune` docs.
 
 .. code-block:: python
 
@@ -62,12 +62,40 @@ The Neptune.ai is available anywhere except ``__init__`` in your LightningModule
          some_img = fake_image()
          self.logger.experiment.add_image('generated_images', some_img, 0)
 
+allegro.ai TRAINS
+^^^^^^^^^^^^^^^^^
+
+`allegro.ai <https://github.com/allegroai/trains/>`_ is a third-party logger.
+To use TRAINS as your logger do the following.
+
+.. seealso:: :ref:`trains` docs.
+
+.. code-block:: python
+
+   from pytorch_lightning.loggers import TrainsLogger
+
+    trains_logger = TrainsLogger(
+        project_name="examples",
+        task_name="pytorch lightning test"
+    )
+   trainer = Trainer(logger=trains_logger)
+
+The TrainsLogger is available anywhere in your LightningModule
+
+.. code-block:: python
+
+   class MyModule(pl.LightningModule):
+
+      def __init__(self, ...):
+         some_img = fake_image()
+         self.logger.log_image('debug', 'generated_image_0', some_img, 0)
+
 Tensorboard
 ^^^^^^^^^^^
 
 To use `Tensorboard <https://pytorch.org/docs/stable/tensorboard.html>`_ as your logger do the following.
 
-.. note:: See: TensorBoardLogger :ref:`tf-logger`
+.. seealso:: TensorBoardLogger :ref:`tf-logger`
 
 .. code-block:: python
 
@@ -93,7 +121,7 @@ Test Tube
 `Test Tube <https://github.com/williamFalcon/test-tube>`_ is a tensorboard logger but with nicer file structure.
 To use TestTube as your logger do the following.
 
-.. note:: See: TestTube :ref:`testTube`
+.. seealso:: TestTube :ref:`testTube`
 
 .. code-block:: python
 
@@ -118,7 +146,7 @@ Wandb
 `Wandb <https://www.wandb.com/>`_ is a third-party logger.
 To use Wandb as your logger do the following.
 
-.. note:: See: :ref:`wandb` docs
+.. seealso:: :ref:`wandb` docs
 
 .. code-block:: python
 

diff --git a/docs/source/experiment_reporting.rst b/docs/source/experiment_reporting.rst
@@ -22,7 +22,7 @@ Control log writing frequency
 Writing to a logger  can be expensive. In Lightning you can set the interval at which you
 want to log using this trainer flag.
 
-.. note:: See: :ref:`trainer`
+.. seealso:: :ref:`trainer`
 
 .. code-block:: python
 
@@ -32,7 +32,7 @@ want to log using this trainer flag.
 Log metrics
 ^^^^^^^^^^^
 
-To plot metrics into whatever logger you passed in (tensorboard, comet, neptune, etc...)
+To plot metrics into whatever logger you passed in (tensorboard, comet, neptune, TRAINS, etc...)
 
 1. training_epoch_end, validation_epoch_end, test_epoch_end will all log anything in the "log" key of the return dict.
 

diff --git a/docs/source/fast_training.rst b/docs/source/fast_training.rst
@@ -16,7 +16,7 @@ Force training for min or max epochs
 -------------------------------------
 It can be useful to force training for a minimum number of epochs or limit to a max number.
 
-.. note:: See: :ref:`trainer`
+.. seealso:: :ref:`trainer`
 
 .. code-block:: python
 

diff --git a/docs/source/introduction_guide.rst b/docs/source/introduction_guide.rst
@@ -472,7 +472,7 @@ First, change the runtime to TPU (and reinstall lightning).
 
 Next, install the required xla library (adds support for PyTorch on TPUs)
 
-.. code-block:: python
+.. code-block::
 
     import collections
     from datetime import datetime, timedelta

diff --git a/docs/source/multi_gpu.rst b/docs/source/multi_gpu.rst
@@ -38,7 +38,7 @@ This will make your code scale to any arbitrary number of GPUs or TPUs with Ligh
     # with lightning
     def forward(self, x):
         z = torch.Tensor(2, 3)
-        z = z.type_as(x.type())
+        z = z.type_as(x)
 
 Remove samplers
 ^^^^^^^^^^^^^^^

diff --git a/docs/source/training_tricks.rst b/docs/source/training_tricks.rst
@@ -7,7 +7,7 @@ Accumulate gradients
 Accumulated gradients runs K small batches of size N before doing a backwards pass.
 The effect is a large effective batch size of size KxN.
 
-.. note:: See: :ref:`trainer`
+.. seealso:: :ref:`trainer`
 
 .. code-block:: python
 
@@ -20,7 +20,7 @@ Gradient Clipping
 Gradient clipping may be enabled to avoid exploding gradients. Specifically, this will `clip the gradient
 norm <https://pytorch.org/docs/stable/nn.html#torch.nn.utils.clip_grad_norm_>`_ computed over all model parameters together.
 
-.. note:: See: :ref:`trainer`
+.. seealso:: :ref:`trainer`
 
 .. code-block:: python
 

diff --git a/environment.yml b/environment.yml
@@ -32,3 +32,4 @@ dependencies:
         - comet_ml>=1.0.56
         - wandb>=0.8.21
         - neptune-client>=0.4.4
+        - trains>=0.13.3
diff --git a/pytorch_lightning/core/__init__.py b/pytorch_lightning/core/__init__.py
@@ -228,7 +228,7 @@ def training_step(self, batch, batch_idx):
 
         # put the z on the appropriate gpu or tpu core
         z = sample_noise()
-        z = z.type_as(x.type())
+        z = z.type_as(x)
 
 ----------
 

diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py
@@ -8,8 +8,8 @@
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 import torch
-import torch.distributed as dist
 from torch import Tensor
+from torch.distributed import init_process_group
 from torch.nn.parallel import DistributedDataParallel
 from torch.optim import Adam
 from torch.optim.optimizer import Optimizer
@@ -859,7 +859,7 @@ def init_ddp_connection(self):
 
         root_node = self.trainer.resolve_root_node_address(root_node)
         os.environ['MASTER_ADDR'] = root_node
-        dist.init_process_group('nccl', rank=proc_rank, world_size=world_size)
+        init_process_group('nccl', rank=proc_rank, world_size=world_size)
 
     def configure_apex(
             self,

diff --git a/pytorch_lightning/loggers/__init__.py b/pytorch_lightning/loggers/__init__.py
@@ -119,3 +119,9 @@ def any_lightning_module_function_or_hook(...):
     __all__.append('WandbLogger')
 except ImportError:
     pass
+
+try:
+    from .trains import TrainsLogger
+    __all__.append('TrainsLogger')
+except ImportError:
+    pass
diff --git a/pytorch_lightning/loggers/base.py b/pytorch_lightning/loggers/base.py
@@ -4,6 +4,8 @@
 from functools import wraps
 from typing import Union, Optional, Dict, Iterable, Any, Callable, List
 
+import torch
+
 
 def rank_zero_only(fn: Callable):
     """Decorate a logger method to run it only on the process with rank 0.
@@ -42,7 +44,8 @@ def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None):
         """
         pass
 
-    def _convert_params(self, params: Union[Dict[str, Any], Namespace]) -> Dict[str, Any]:
+    @staticmethod
+    def _convert_params(params: Union[Dict[str, Any], Namespace]) -> Dict[str, Any]:
         # in case converting from namespace
         if isinstance(params, Namespace):
             params = vars(params)
@@ -52,6 +55,29 @@ def _convert_params(self, params: Union[Dict[str, Any], Namespace]) -> Dict[str,
 
         return params
 
+    @staticmethod
+    def _sanitize_params(params: Dict[str, Any]) -> Dict[str, Any]:
+        """Returns params with non-primitvies converted to strings for logging
+
+        >>> params = {"float": 0.3,
+        ...           "int": 1,
+        ...           "string": "abc",
+        ...           "bool": True,
+        ...           "list": [1, 2, 3],
+        ...           "namespace": Namespace(foo=3),
+        ...           "layer": torch.nn.BatchNorm1d}
+        >>> import pprint
+        >>> pprint.pprint(LightningLoggerBase._sanitize_params(params))  # doctest: +NORMALIZE_WHITESPACE
+        {'bool': True,
+         'float': 0.3,
+         'int': 1,
+         'layer': "<class 'torch.nn.modules.batchnorm.BatchNorm1d'>",
+         'list': '[1, 2, 3]',
+         'namespace': 'Namespace(foo=3)',
+         'string': 'abc'}
+        """
+        return {k: v if type(v) in [bool, int, float, str, torch.Tensor] else str(v) for k, v in params.items()}
+
     @abstractmethod
     def log_hyperparams(self, params: argparse.Namespace):
         """Record hyperparameters.