diff --git a/.circleci/config.yml b/.circleci/config.yml index e6eca78ea59b19..28de0a75bdd123 100755 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -64,10 +64,13 @@ references: name: Make Documentation command: | # sudo apt-get install pandoc + sudo apt-get update && sudo apt-get install -y cmake pip install -r requirements.txt --user sudo pip install -r docs/requirements.txt + pip install -r requirements-extra.txt --user # for doctesting loggers etc. # sphinx-apidoc -o ./docs/source ./pytorch_lightning **/test_* --force --follow-links - cd docs; make clean ; make html --debug --jobs 2 SPHINXOPTS="-W" + cd docs; make clean; make html --debug --jobs 2 SPHINXOPTS="-W" + make doctest; make coverage jobs: diff --git a/.drone.yml b/.drone.yml index 407ebd066cf9ba..88e2d76a525032 100644 --- a/.drone.yml +++ b/.drone.yml @@ -35,9 +35,11 @@ steps: - apt-get update && apt-get install -y cmake - pip install -r requirements.txt --user -q - pip install -r ./tests/requirements-devel.txt --user -q + #- pip install -r ./docs/requirements.txt --user -q - pip list - python -c "import torch ; print(' & '.join([torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())]) if torch.cuda.is_available() else 'only CPU')" - coverage run --source pytorch_lightning -m py.test pytorch_lightning tests benchmarks -v --doctest-modules # --flake8 + #- cd docs; make doctest; make coverage - coverage report - codecov --token $CODECOV_TOKEN # --pr $DRONE_PULL_REQUEST --build $DRONE_BUILD_NUMBER --branch $DRONE_BRANCH --commit $DRONE_COMMIT --tag $DRONE_TAG - python tests/collect_env_details.py diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index e9343b6b9ce71d..ac24dcee0a1e1b 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -86,6 +86,17 @@ jobs: pip list shell: bash + - name: Reinstall Horovod if necessary + if: runner.os != 'windows' && matrix.python-version != '3.8' + run: | + HOROVOD_BUILT=$(python -c "import horovod.torch; horovod.torch.nccl_built(); print('SUCCESS')") + if [[ $HOROVOD_BUILT != "SUCCESS" ]]; then + pip uninstall -y horovod + HOROVOD_BUILD_ARCH_FLAGS="-mfma" pip install --no-cache-dir $(grep "horovod" requirements-extra.txt) + fi + horovodrun --check-build + shell: bash + - name: Cache datasets uses: actions/cache@v1 with: diff --git a/.gitignore b/.gitignore index d5bea7f6d58f43..cb8fd278c5c4f2 100644 --- a/.gitignore +++ b/.gitignore @@ -13,9 +13,7 @@ test_tube_data/ test_tube_exp/ # Documentations -docs/source/pl_examples*.rst -docs/source/pytorch_lightning*.rst -docs/source/tests*.rst +docs/source/api docs/source/*.md # Byte-compiled / optimized / DLL files diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e075c4d10d537..1acc19c813c7ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Reduction when `batch_size < num_gpus` ([#1609](https://github.com/PyTorchLightning/pytorch-lightning/pull/1609)) +- Updated LightningTemplateModel to look more like Colab example ([#1577](https://github.com/PyTorchLightning/pytorch-lightning/pull/1577)) + ### Deprecated ### Removed @@ -34,6 +36,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed wandb logger `global_step` affects other loggers ([#1492](https://github.com/PyTorchLightning/pytorch-lightning/issues/1485)) +- Fixed disabling progress bar on non-zero ranks using Horovod backend ([#1709](https://github.com/PyTorchLightning/pytorch-lightning/pull/1709)) + +- Fixed bugs that prevent lr finder to be used together with early stopping and validation dataloaders ([#1676](https://github.com/PyTorchLightning/pytorch-lightning/pull/1676)) + ## [0.7.5] - 2020-04-27 ### Changed @@ -78,7 +84,6 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Defines shared proc. rank, remove rank from instances (e.g. loggers) ([#1408](https://github.com/PyTorchLightning/pytorch-lightning/pull/1408)) - Updated semantic segmentation example with custom U-Net and logging ([#1371](https://github.com/PyTorchLightning/pytorch-lightning/pull/1371)) - Disabled val and test shuffling ([#1600](https://github.com/PyTorchLightning/pytorch-lightning/pull/1600)) -- Updated LightningTemplateModel to look more like Colab example ([#1546](https://github.com/PyTorchLightning/pytorch-lightning/pull/1577)) ### Deprecated diff --git a/Datasets/MNIST/digits-0-1-2_nb-100/test.pt b/Datasets/MNIST/digits-0-1-2_nb-100/test.pt new file mode 100644 index 00000000000000..08e765d025dbbd Binary files /dev/null and b/Datasets/MNIST/digits-0-1-2_nb-100/test.pt differ diff --git a/Datasets/MNIST/digits-0-1-2_nb-100/training.pt b/Datasets/MNIST/digits-0-1-2_nb-100/training.pt new file mode 100644 index 00000000000000..5e99b2c92696c2 Binary files /dev/null and b/Datasets/MNIST/digits-0-1-2_nb-100/training.pt differ diff --git a/docs/source/apex.rst b/docs/source/apex.rst index e1c7a1b2c83649..f705e040bd38f3 100644 --- a/docs/source/apex.rst +++ b/docs/source/apex.rst @@ -1,3 +1,8 @@ +.. testsetup:: * + + from pytorch_lightning.trainer.trainer import Trainer + + 16-bit training ================= Lightning offers 16-bit training for CPUs, GPUs and TPUs. @@ -38,7 +43,7 @@ Install apex Enable 16-bit ^^^^^^^^^^^^^ -.. code-block:: python +.. testcode:: # turn on 16-bit trainer = Trainer(amp_level='O1', precision=16) @@ -50,7 +55,7 @@ TPU 16-bit ---------- 16-bit on TPus is much simpler. To use 16-bit with TPUs set precision to 16 when using the tpu flag -.. code-block:: python +.. testcode:: # DEFAULT trainer = Trainer(num_tpu_cores=8, precision=32) diff --git a/docs/source/callbacks.rst b/docs/source/callbacks.rst index a2969820b2eebc..744c1f0c5edd67 100644 --- a/docs/source/callbacks.rst +++ b/docs/source/callbacks.rst @@ -1,3 +1,8 @@ +.. testsetup:: * + + from pytorch_lightning.trainer.trainer import Trainer + from pytorch_lightning.callbacks.base import Callback + .. role:: hidden :class: hidden-section @@ -18,21 +23,23 @@ An overall Lightning system should have: Example: -.. doctest:: - - >>> import pytorch_lightning as pl - >>> class MyPrintingCallback(pl.Callback): - ... - ... def on_init_start(self, trainer): - ... print('Starting to init trainer!') - ... - ... def on_init_end(self, trainer): - ... print('trainer is init now') - ... - ... def on_train_end(self, trainer, pl_module): - ... print('do something when training ends') - ... - >>> trainer = pl.Trainer(callbacks=[MyPrintingCallback()]) +.. testcode:: + + class MyPrintingCallback(Callback): + + def on_init_start(self, trainer): + print('Starting to init trainer!') + + def on_init_end(self, trainer): + print('trainer is init now') + + def on_train_end(self, trainer, pl_module): + print('do something when training ends') + + trainer = Trainer(callbacks=[MyPrintingCallback()]) + +.. testoutput:: + Starting to init trainer! trainer is init now diff --git a/docs/source/child_modules.rst b/docs/source/child_modules.rst index 49fe6f463c3738..4c2d60cc13246e 100644 --- a/docs/source/child_modules.rst +++ b/docs/source/child_modules.rst @@ -1,3 +1,22 @@ +.. testsetup:: * + + import torch + from pytorch_lightning.trainer.trainer import Trainer + from pytorch_lightning.callbacks.base import Callback + from pytorch_lightning.core.lightning import LightningModule + + class LitMNIST(LightningModule): + + def __init__(self): + super().__init__() + + def train_dataloader(): + pass + + def val_dataloader(): + pass + + Child Modules ------------- Research projects tend to test different approaches to the same dataset. @@ -7,13 +26,18 @@ For example, imagine we now want to train an Autoencoder to use as a feature ext Recall that `LitMNIST` already defines all the dataloading etc... The only things that change in the `Autoencoder` model are the init, forward, training, validation and test step. -.. code-block:: python +.. testcode:: class Encoder(torch.nn.Module): - ... + pass + + class Decoder(torch.nn.Module): + pass class AutoEncoder(LitMNIST): + def __init__(self): + super().__init__() self.encoder = Encoder() self.decoder = Decoder() @@ -30,10 +54,10 @@ that change in the `Autoencoder` model are the init, forward, training, validati return loss def validation_step(self, batch, batch_idx): - return self._shared_eval(batch, batch_idx, 'val'): + return self._shared_eval(batch, batch_idx, 'val') def test_step(self, batch, batch_idx): - return self._shared_eval(batch, batch_idx, 'test'): + return self._shared_eval(batch, batch_idx, 'test') def _shared_eval(self, batch, batch_idx, prefix): x, y = batch @@ -43,6 +67,7 @@ that change in the `Autoencoder` model are the init, forward, training, validati loss = F.nll_loss(logits, y) return {f'{prefix}_loss': loss} + and we can train this using the same trainer .. code-block:: python @@ -58,5 +83,3 @@ In this case, we want to use the `AutoEncoder` to extract image representations some_images = torch.Tensor(32, 1, 28, 28) representations = autoencoder(some_images) - -.. \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index 9a9948680b4c2b..f6dad2c3922eaf 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -21,6 +21,7 @@ # import m2r import builtins import pt_lightning_sphinx_theme +from sphinx.ext import apidoc PATH_HERE = os.path.abspath(os.path.dirname(__file__)) PATH_ROOT = os.path.join(PATH_HERE, '..', '..') @@ -127,18 +128,18 @@ # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. exclude_patterns = [ - 'pytorch_lightning.rst', - 'pl_examples.*', - 'modules.rst', + 'api/pytorch_lightning.rst', + 'api/pl_examples.*', + 'api/modules.rst', # deprecated/renamed: - 'pytorch_lightning.loggers.comet_logger.rst', # TODO: remove in v0.8.0 - 'pytorch_lightning.loggers.mlflow_logger.rst', # TODO: remove in v0.8.0 - 'pytorch_lightning.loggers.test_tube_logger.rst', # TODO: remove in v0.8.0 - 'pytorch_lightning.callbacks.pt_callbacks.*', # TODO: remove in v0.8.0 - 'pytorch_lightning.pt_overrides.*', # TODO: remove in v0.8.0 - 'pytorch_lightning.root_module.*', # TODO: remove in v0.8.0 - 'pytorch_lightning.logging.*', # TODO: remove in v0.8.0 + 'api/pytorch_lightning.loggers.comet_logger.rst', # TODO: remove in v0.8.0 + 'api/pytorch_lightning.loggers.mlflow_logger.rst', # TODO: remove in v0.8.0 + 'api/pytorch_lightning.loggers.test_tube_logger.rst', # TODO: remove in v0.8.0 + 'api/pytorch_lightning.callbacks.pt_callbacks.*', # TODO: remove in v0.8.0 + 'api/pytorch_lightning.pt_overrides.*', # TODO: remove in v0.8.0 + 'api/pytorch_lightning.root_module.*', # TODO: remove in v0.8.0 + 'api/pytorch_lightning.logging.*', # TODO: remove in v0.8.0 ] # The name of the Pygments (syntax highlighting) style to use. @@ -263,32 +264,33 @@ # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = True -# https://github.com/rtfd/readthedocs.org/issues/1139 -# I use sphinx-apidoc to auto-generate API documentation for my project. -# Right now I have to commit these auto-generated files to my repository -# so that RTD can build them into HTML docs. It'd be cool if RTD could run -# sphinx-apidoc for me, since it's easy to forget to regen API docs -# and commit them to my repo after making changes to my code. +# packages for which sphinx-apidoc should generate the docs (.rst files) PACKAGES = [ pytorch_lightning.__name__, 'pl_examples', ] +apidoc_output_folder = os.path.join(PATH_HERE, 'api') + def run_apidoc(_): + sys.path.insert(0, apidoc_output_folder) + + # delete api-doc files before generating them + if os.path.exists(apidoc_output_folder): + shutil.rmtree(apidoc_output_folder) + for pkg in PACKAGES: - argv = ['-e', '-o', PATH_HERE, os.path.join(PATH_HERE, PATH_ROOT, pkg), - '**/test_*', '--force', '--private', '--module-first'] - try: - # Sphinx 1.7+ - from sphinx.ext import apidoc - apidoc.main(argv) - except ImportError: - # Sphinx 1.6 (and earlier) - from sphinx import apidoc - argv.insert(0, apidoc.__file__) - apidoc.main(argv) + argv = ['-e', + '-o', apidoc_output_folder, + os.path.join(PATH_ROOT, pkg), + '**/test_*', + '--force', + '--private', + '--module-first'] + + apidoc.main(argv) def setup(app): @@ -307,7 +309,7 @@ def setup(app): # https://stackoverflow.com/questions/15889621/sphinx-how-to-exclude-imports-in-automodule MOCK_REQUIRE_PACKAGES = [] -with open(os.path.join(PATH_ROOT, 'requirements.txt'), 'r') as fp: +with open(os.path.join(PATH_ROOT, 'requirements-extra.txt'), 'r') as fp: for ln in fp.readlines(): found = [ln.index(ch) for ch in list(',=<>#') if ch in ln] pkg = ln[:min(found)] if found else ln @@ -316,19 +318,10 @@ def setup(app): # TODO: better parse from package since the import name and package name may differ MOCK_MANUAL_PACKAGES = [ - 'torch', 'torchvision', 'PIL', - 'test_tube', - 'mlflow', - 'comet_ml', - 'wandb', - 'neptune', - 'trains', ] autodoc_mock_imports = MOCK_REQUIRE_PACKAGES + MOCK_MANUAL_PACKAGES -# for mod_name in MOCK_REQUIRE_PACKAGES: -# sys.modules[mod_name] = mock.Mock() # Options for the linkcode extension @@ -403,3 +396,16 @@ def find_source(): # Useful for avoiding ambiguity when the same section heading appears in different documents. # http://www.sphinx-doc.org/en/master/usage/extensions/autosectionlabel.html autosectionlabel_prefix_document = True + +# only run doctests marked with a ".. doctest::" directive +doctest_test_doctest_blocks = '' +doctest_global_setup = """ + +import importlib +import os +import torch + +TORCHVISION_AVAILABLE = importlib.util.find_spec('torchvision') + +""" +coverage_skip_undoc_in_source = True diff --git a/docs/source/debugging.rst b/docs/source/debugging.rst index 775862d8c1826d..412b6d613ecc6d 100644 --- a/docs/source/debugging.rst +++ b/docs/source/debugging.rst @@ -1,3 +1,7 @@ +.. testsetup:: * + + from pytorch_lightning.trainer.trainer import Trainer + Debugging ========= The following are flags that make debugging much easier. @@ -11,9 +15,9 @@ a full epoch to crash. (See: :paramref:`~pytorch_lightning.trainer.trainer.Trainer.fast_dev_run` argument of :class:`~pytorch_lightning.trainer.trainer.Trainer`) -.. code-block:: python +.. testcode:: - trainer = pl.Trainer(fast_dev_run=True) + trainer = Trainer(fast_dev_run=True) Inspect gradient norms ---------------------- @@ -22,10 +26,10 @@ Logs (to a logger), the norm of each weight matrix. (See: :paramref:`~pytorch_lightning.trainer.trainer.Trainer.track_grad_norm` argument of :class:`~pytorch_lightning.trainer.trainer.Trainer`) -.. code-block:: python +.. testcode:: # the 2-norm - trainer = pl.Trainer(track_grad_norm=2) + trainer = Trainer(track_grad_norm=2) Log GPU usage ------------- @@ -34,9 +38,9 @@ Logs (to a logger) the GPU usage for each GPU on the master machine. (See: :paramref:`~pytorch_lightning.trainer.trainer.Trainer.log_gpu_memory` argument of :class:`~pytorch_lightning.trainer.trainer.Trainer`) -.. code-block:: python +.. testcode:: - trainer = pl.Trainer(log_gpu_memory=True) + trainer = Trainer(log_gpu_memory=True) Make model overfit on subset of data ------------------------------------ @@ -47,9 +51,9 @@ and try to get your model to overfit. If it can't, it's a sign it won't work wit (See: :paramref:`~pytorch_lightning.trainer.trainer.Trainer.overfit_pct` argument of :class:`~pytorch_lightning.trainer.trainer.Trainer`) -.. code-block:: python +.. testcode:: - trainer = pl.Trainer(overfit_pct=0.01) + trainer = Trainer(overfit_pct=0.01) Print the parameter count by layer ---------------------------------- @@ -59,9 +63,9 @@ To disable this behavior, turn off this flag: (See: :paramref:`~pytorch_lightning.trainer.trainer.Trainer.weights_summary` argument of :class:`~pytorch_lightning.trainer.trainer.Trainer`) -.. code-block:: python +.. testcode:: - trainer = pl.Trainer(weights_summary=None) + trainer = Trainer(weights_summary=None) Set the number of validation sanity steps @@ -72,7 +76,7 @@ This avoids crashing in the validation loop sometime deep into a lengthy trainin (See: :paramref:`~pytorch_lightning.trainer.trainer.Trainer.num_sanity_val_steps` argument of :class:`~pytorch_lightning.trainer.trainer.Trainer`) -.. code-block:: python +.. testcode:: # DEFAULT trainer = Trainer(num_sanity_val_steps=5) \ No newline at end of file diff --git a/docs/source/early_stopping.rst b/docs/source/early_stopping.rst index e74a720b30ebf6..a0bfc83ec27d9c 100644 --- a/docs/source/early_stopping.rst +++ b/docs/source/early_stopping.rst @@ -1,3 +1,9 @@ +.. testsetup:: * + + from pytorch_lightning.trainer.trainer import Trainer + from pytorch_lightning.callbacks.early_stopping import EarlyStopping + + Early stopping ============== @@ -17,23 +23,25 @@ Enable Early Stopping using Callbacks on epoch end -------------------------------------------------- There are two ways to enable early stopping using callbacks on epoch end. -.. doctest:: +- Set early_stop_callback to True. Will look for 'val_loss' in validation_epoch_end() return dict. + If it is not found an error is raised. + + .. testcode:: + + trainer = Trainer(early_stop_callback=True) + +- Or configure your own callback - >>> from pytorch_lightning import Trainer - >>> from pytorch_lightning.callbacks import EarlyStopping + .. testcode:: - # A) Set early_stop_callback to True. Will look for 'val_loss' - # in validation_epoch_end() return dict. If it is not found an error is raised. - >>> trainer = Trainer(early_stop_callback=True) - # B) Or configure your own callback - >>> early_stop_callback = EarlyStopping( - ... monitor='val_loss', - ... min_delta=0.00, - ... patience=3, - ... verbose=False, - ... mode='min' - ... ) - >>> trainer = Trainer(early_stop_callback=early_stop_callback) + early_stop_callback = EarlyStopping( + monitor='val_loss', + min_delta=0.00, + patience=3, + verbose=False, + mode='min' + ) + trainer = Trainer(early_stop_callback=early_stop_callback) In any case, the callback will fall back to the training metrics (returned in :meth:`~pytorch_lightning.core.lightning.LightningModule.training_step`, @@ -43,7 +51,8 @@ looking for a key to monitor if validation is disabled or is not defined. .. seealso:: - :class:`~pytorch_lightning.trainer.trainer.Trainer` + - :class:`~pytorch_lightning.trainer.trainer.Trainer` + - :class:`~pytorch_lightning.callbacks.early_stopping.EarlyStopping` Disable Early Stopping with callbacks on epoch end -------------------------------------------------- @@ -53,4 +62,5 @@ Note that ``None`` will not disable early stopping but will lead to the default behaviour. .. seealso:: - :class:`~pytorch_lightning.trainer.trainer.Trainer` + - :class:`~pytorch_lightning.trainer.trainer.Trainer` + - :class:`~pytorch_lightning.callbacks.early_stopping.EarlyStopping` diff --git a/docs/source/experiment_logging.rst b/docs/source/experiment_logging.rst index e9ddb47239b502..772efcfc13bc53 100644 --- a/docs/source/experiment_logging.rst +++ b/docs/source/experiment_logging.rst @@ -1,3 +1,9 @@ +.. testsetup:: * + + from pytorch_lightning.trainer.trainer import Trainer + from pytorch_lightning.core.lightning import LightningModule + + Experiment Logging ================== @@ -14,31 +20,29 @@ First, install the package: Then configure the logger and pass it to the :class:`~pytorch_lightning.trainer.trainer.Trainer`: -.. doctest:: - - >>> import os - >>> from pytorch_lightning import Trainer - >>> from pytorch_lightning.loggers import CometLogger - >>> comet_logger = CometLogger( - ... api_key=os.environ.get('COMET_API_KEY'), - ... workspace=os.environ.get('COMET_WORKSPACE'), # Optional - ... save_dir='.', # Optional - ... project_name='default_project', # Optional - ... rest_api_key=os.environ.get('COMET_REST_API_KEY'), # Optional - ... experiment_name='default' # Optional - ... ) - >>> trainer = Trainer(logger=comet_logger) +.. testcode:: + + import os + from pytorch_lightning.loggers import CometLogger + comet_logger = CometLogger( + api_key=os.environ.get('COMET_API_KEY'), + workspace=os.environ.get('COMET_WORKSPACE'), # Optional + save_dir='.', # Optional + project_name='default_project', # Optional + rest_api_key=os.environ.get('COMET_REST_API_KEY'), # Optional + experiment_name='default' # Optional + ) + trainer = Trainer(logger=comet_logger) The :class:`~pytorch_lightning.loggers.CometLogger` is available anywhere except ``__init__`` in your :class:`~pytorch_lightning.core.lightning.LightningModule`. -.. doctest:: +.. testcode:: - >>> from pytorch_lightning import LightningModule - >>> class MyModule(LightningModule): - ... def any_lightning_module_function_or_hook(self): - ... some_img = fake_image() - ... self.logger.experiment.add_image('generated_images', some_img, 0) + class MyModule(LightningModule): + def any_lightning_module_function_or_hook(self): + some_img = fake_image() + self.logger.experiment.add_image('generated_images', some_img, 0) .. seealso:: :class:`~pytorch_lightning.loggers.CometLogger` docs. @@ -56,15 +60,14 @@ First, install the package: Then configure the logger and pass it to the :class:`~pytorch_lightning.trainer.trainer.Trainer`: -.. doctest:: +.. testcode:: - >>> from pytorch_lightning import Trainer - >>> from pytorch_lightning.loggers import MLFlowLogger - >>> mlf_logger = MLFlowLogger( - ... experiment_name="default", - ... tracking_uri="file:/." - ... ) - >>> trainer = Trainer(logger=mlf_logger) + from pytorch_lightning.loggers import MLFlowLogger + mlf_logger = MLFlowLogger( + experiment_name="default", + tracking_uri="file:./ml-runs" + ) + trainer = Trainer(logger=mlf_logger) .. seealso:: :class:`~pytorch_lightning.loggers.MLFlowLogger` docs. @@ -82,29 +85,27 @@ First, install the package: Then configure the logger and pass it to the :class:`~pytorch_lightning.trainer.trainer.Trainer`: -.. doctest:: +.. testcode:: - >>> from pytorch_lightning import Trainer - >>> from pytorch_lightning.loggers import NeptuneLogger - >>> neptune_logger = NeptuneLogger( - ... api_key='ANONYMOUS', # replace with your own - ... project_name='shared/pytorch-lightning-integration', - ... experiment_name='default', # Optional, - ... params={'max_epochs': 10}, # Optional, - ... tags=['pytorch-lightning', 'mlp'], # Optional, - ... ) - >>> trainer = Trainer(logger=neptune_logger) + from pytorch_lightning.loggers import NeptuneLogger + neptune_logger = NeptuneLogger( + api_key='ANONYMOUS', # replace with your own + project_name='shared/pytorch-lightning-integration', + experiment_name='default', # Optional, + params={'max_epochs': 10}, # Optional, + tags=['pytorch-lightning', 'mlp'], # Optional, + ) + trainer = Trainer(logger=neptune_logger) The :class:`~pytorch_lightning.loggers.NeptuneLogger` is available anywhere except ``__init__`` in your :class:`~pytorch_lightning.core.lightning.LightningModule`. -.. doctest:: +.. testcode:: - >>> from pytorch_lightning import LightningModule - >>> class MyModule(LightningModule): - ... def any_lightning_module_function_or_hook(self): - ... some_img = fake_image() - ... self.logger.experiment.add_image('generated_images', some_img, 0) + class MyModule(LightningModule): + def any_lightning_module_function_or_hook(self): + some_img = fake_image() + self.logger.experiment.add_image('generated_images', some_img, 0) .. seealso:: :class:`~pytorch_lightning.loggers.NeptuneLogger` docs. @@ -122,28 +123,31 @@ First, install the package: Then configure the logger and pass it to the :class:`~pytorch_lightning.trainer.trainer.Trainer`: -.. doctest:: +.. testcode:: + + from pytorch_lightning.loggers import TrainsLogger + trains_logger = TrainsLogger( + project_name='examples', + task_name='pytorch lightning test', + ) + trainer = Trainer(logger=trains_logger) + +.. testoutput:: + :options: +ELLIPSIS, +NORMALIZE_WHITESPACE + :hide: - >>> from pytorch_lightning import Trainer - >>> from pytorch_lightning.loggers import TrainsLogger - >>> trains_logger = TrainsLogger( - ... project_name='examples', - ... task_name='pytorch lightning test', - ... ) # doctest: +ELLIPSIS TRAINS Task: ... TRAINS results page: ... - >>> trainer = Trainer(logger=trains_logger) The :class:`~pytorch_lightning.loggers.TrainsLogger` is available anywhere in your :class:`~pytorch_lightning.core.lightning.LightningModule`. -.. doctest:: +.. testcode:: - >>> from pytorch_lightning import LightningModule - >>> class MyModule(LightningModule): - ... def __init__(self): - ... some_img = fake_image() - ... self.logger.experiment.log_image('debug', 'generated_image_0', some_img, 0) + class MyModule(LightningModule): + def __init__(self): + some_img = fake_image() + self.logger.experiment.log_image('debug', 'generated_image_0', some_img, 0) .. seealso:: :class:`~pytorch_lightning.loggers.TrainsLogger` docs. @@ -153,23 +157,21 @@ Tensorboard To use `TensorBoard `_ as your logger do the following. -.. doctest:: +.. testcode:: - >>> from pytorch_lightning import Trainer - >>> from pytorch_lightning.loggers import TensorBoardLogger - >>> logger = TensorBoardLogger('tb_logs', name='my_model') - >>> trainer = Trainer(logger=logger) + from pytorch_lightning.loggers import TensorBoardLogger + logger = TensorBoardLogger('tb_logs', name='my_model') + trainer = Trainer(logger=logger) The :class:`~pytorch_lightning.loggers.TensorBoardLogger` is available anywhere except ``__init__`` in your :class:`~pytorch_lightning.core.lightning.LightningModule`. -.. doctest:: +.. testcode:: - >>> from pytorch_lightning import LightningModule - >>> class MyModule(LightningModule): - ... def any_lightning_module_function_or_hook(self): - ... some_img = fake_image() - ... self.logger.experiment.add_image('generated_images', some_img, 0) + class MyModule(LightningModule): + def any_lightning_module_function_or_hook(self): + some_img = fake_image() + self.logger.experiment.add_image('generated_images', some_img, 0) .. seealso:: :class:`~pytorch_lightning.loggers.TensorBoardLogger` docs. @@ -188,22 +190,21 @@ First, install the package: Then configure the logger and pass it to the :class:`~pytorch_lightning.trainer.trainer.Trainer`: -.. doctest:: +.. testcode:: - >>> from pytorch_lightning.loggers import TestTubeLogger - >>> logger = TestTubeLogger('tb_logs', name='my_model') - >>> trainer = Trainer(logger=logger) + from pytorch_lightning.loggers import TestTubeLogger + logger = TestTubeLogger('tb_logs', name='my_model') + trainer = Trainer(logger=logger) The :class:`~pytorch_lightning.loggers.TestTubeLogger` is available anywhere except ``__init__`` in your :class:`~pytorch_lightning.core.lightning.LightningModule`. -.. doctest:: +.. testcode:: - >>> from pytorch_lightning import LightningModule - >>> class MyModule(LightningModule): - ... def any_lightning_module_function_or_hook(self): - ... some_img = fake_image() - ... self.logger.experiment.add_image('generated_images', some_img, 0) + class MyModule(LightningModule): + def any_lightning_module_function_or_hook(self): + some_img = fake_image() + self.logger.experiment.add_image('generated_images', some_img, 0) .. seealso:: :class:`~pytorch_lightning.loggers.TestTubeLogger` docs. @@ -221,24 +222,23 @@ First, install the package: Then configure the logger and pass it to the :class:`~pytorch_lightning.trainer.trainer.Trainer`: -.. doctest:: +.. testcode:: - >>> from pytorch_lightning.loggers import WandbLogger - >>> wandb_logger = WandbLogger() - >>> trainer = Trainer(logger=wandb_logger) + from pytorch_lightning.loggers import WandbLogger + wandb_logger = WandbLogger() + trainer = Trainer(logger=wandb_logger) The :class:`~pytorch_lightning.loggers.WandbLogger` is available anywhere except ``__init__`` in your :class:`~pytorch_lightning.core.lightning.LightningModule`. -.. doctest:: +.. testcode:: - >>> from pytorch_lightning import LightningModule - >>> class MyModule(LightningModule): - ... def any_lightning_module_function_or_hook(self): - ... some_img = fake_image() - ... self.logger.experiment.log({ - ... "generated_images": [wandb.Image(some_img, caption="...")] - ... }) + class MyModule(LightningModule): + def any_lightning_module_function_or_hook(self): + some_img = fake_image() + self.logger.experiment.log({ + "generated_images": [wandb.Image(some_img, caption="...")] + }) .. seealso:: :class:`~pytorch_lightning.loggers.WandbLogger` docs. @@ -249,23 +249,22 @@ Multiple Loggers Lightning supports the use of multiple loggers, just pass a list to the :class:`~pytorch_lightning.trainer.trainer.Trainer`. -.. doctest:: +.. testcode:: - >>> from pytorch_lightning.loggers import TensorBoardLogger, TestTubeLogger - >>> logger1 = TensorBoardLogger('tb_logs', name='my_model') - >>> logger2 = TestTubeLogger('tb_logs', name='my_model') - >>> trainer = Trainer(logger=[logger1, logger2]) + from pytorch_lightning.loggers import TensorBoardLogger, TestTubeLogger + logger1 = TensorBoardLogger('tb_logs', name='my_model') + logger2 = TestTubeLogger('tb_logs', name='my_model') + trainer = Trainer(logger=[logger1, logger2]) The loggers are available as a list anywhere except ``__init__`` in your :class:`~pytorch_lightning.core.lightning.LightningModule`. -.. doctest:: +.. testcode:: - >>> from pytorch_lightning import LightningModule - >>> class MyModule(LightningModule): - ... def any_lightning_module_function_or_hook(self): - ... some_img = fake_image() - ... # Option 1 - ... self.logger.experiment[0].add_image('generated_images', some_img, 0) - ... # Option 2 - ... self.logger[0].experiment.add_image('generated_images', some_img, 0) + class MyModule(LightningModule): + def any_lightning_module_function_or_hook(self): + some_img = fake_image() + # Option 1 + self.logger.experiment[0].add_image('generated_images', some_img, 0) + # Option 2 + self.logger[0].experiment.add_image('generated_images', some_img, 0) diff --git a/docs/source/experiment_reporting.rst b/docs/source/experiment_reporting.rst index 0353fcd7a9e039..8e534f4cc6d265 100644 --- a/docs/source/experiment_reporting.rst +++ b/docs/source/experiment_reporting.rst @@ -1,3 +1,8 @@ +.. testsetup:: * + + from pytorch_lightning.trainer.trainer import Trainer + + Experiment Reporting ===================== @@ -11,10 +16,10 @@ Control logging frequency It may slow training down to log every single batch. Trainer has an option to log every k batches instead. -.. code-block:: python +.. testcode:: - # k = 10 - Trainer(row_log_interval=10) + k = 10 + trainer = Trainer(row_log_interval=k) Control log writing frequency ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -25,10 +30,10 @@ want to log using this trainer flag. .. seealso:: :class:`~pytorch_lightning.trainer.trainer.Trainer` -.. code-block:: python +.. testcode:: - k = 100 - Trainer(log_save_interval=k) + k = 100 + trainer = Trainer(log_save_interval=k) Log metrics ^^^^^^^^^^^ @@ -37,46 +42,47 @@ To plot metrics into whatever logger you passed in (tensorboard, comet, neptune, 1. training_epoch_end, validation_epoch_end, test_epoch_end will all log anything in the "log" key of the return dict. -.. code-block:: python +.. testcode:: - def training_epoch_end(self, outputs): - loss = some_loss() - ... + def training_epoch_end(self, outputs): + loss = some_loss() + ... - logs = {'train_loss': loss} - results = {'log': logs} - return results + logs = {'train_loss': loss} + results = {'log': logs} + return results - def validation_epoch_end(self, outputs): - loss = some_loss() - ... + def validation_epoch_end(self, outputs): + loss = some_loss() + ... - logs = {'val_loss': loss} - results = {'log': logs} - return results + logs = {'val_loss': loss} + results = {'log': logs} + return results - def test_epoch_end(self, outputs): - loss = some_loss() - ... + def test_epoch_end(self, outputs): + loss = some_loss() + ... - logs = {'test_loss': loss} - results = {'log': logs} - return results + logs = {'test_loss': loss} + results = {'log': logs} + return results 2. In addition, you can also use any arbitrary functionality from a particular logger from within your LightningModule. For instance, here we log images using tensorboard. -.. code-block:: python +.. testcode:: + :skipif: not TORCHVISION_AVAILABLE - def training_step(self, batch, batch_idx): - self.generated_imgs = self.decoder.generate() + def training_step(self, batch, batch_idx): + self.generated_imgs = self.decoder.generate() - sample_imgs = self.generated_imgs[:6] - grid = torchvision.utils.make_grid(sample_imgs) - self.logger.experiment.add_image('generated_images', grid, 0) + sample_imgs = self.generated_imgs[:6] + grid = torchvision.utils.make_grid(sample_imgs) + self.logger.experiment.add_image('generated_images', grid, 0) - ... - return results + ... + return results Modify progress bar ^^^^^^^^^^^^^^^^^^^ @@ -86,15 +92,15 @@ a key called "progress_bar". Here we show the validation loss in the progress bar -.. code-block:: python +.. testcode:: - def validation_epoch_end(self, outputs): - loss = some_loss() - ... + def validation_epoch_end(self, outputs): + loss = some_loss() + ... - logs = {'val_loss': loss} - results = {'progress_bar': logs} - return results + logs = {'val_loss': loss} + results = {'progress_bar': logs} + return results Snapshot hyperparameters ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -103,8 +109,8 @@ When Lightning creates a checkpoint, it stores a key "hparams" with the hyperpar .. code-block:: python - lightning_checkpoint = torch.load(filepath, map_location=lambda storage, loc: storage) - hyperparams = lightning_checkpoint['hparams'] + lightning_checkpoint = torch.load(filepath, map_location=lambda storage, loc: storage) + hyperparams = lightning_checkpoint['hparams'] Some loggers also allow logging the hyperparams used in the experiment. For instance, when using the TestTubeLogger or the TensorBoardLogger, all hyperparams will show @@ -115,8 +121,7 @@ Snapshot code Loggers also allow you to snapshot a copy of the code used in this experiment. For example, TestTubeLogger does this with a flag: -.. code-block:: python - - from pytorch_lightning.loggers import TestTubeLogger +.. testcode:: - logger = TestTubeLogger(create_git_tag=True) + from pytorch_lightning.loggers import TestTubeLogger + logger = TestTubeLogger('.', create_git_tag=True) diff --git a/docs/source/fast_training.rst b/docs/source/fast_training.rst index 970e9486173e14..208838f58b07c7 100644 --- a/docs/source/fast_training.rst +++ b/docs/source/fast_training.rst @@ -1,3 +1,8 @@ +.. testsetup:: * + + from pytorch_lightning.trainer.trainer import Trainer + + Fast Training ============= There are multiple options to speed up different parts of the training by choosing to train @@ -7,7 +12,7 @@ Check validation every n epochs ------------------------------- If you have a small dataset you might want to check validation every n epochs -.. code-block:: python +.. testcode:: # DEFAULT trainer = Trainer(check_val_every_n_epoch=1) @@ -19,7 +24,7 @@ It can be useful to force training for a minimum number of epochs or limit to a .. seealso:: :class:`~pytorch_lightning.trainer.trainer.Trainer` -.. code-block:: python +.. testcode:: # DEFAULT trainer = Trainer(min_epochs=1, max_epochs=1000) @@ -31,7 +36,7 @@ For large datasets it's often desirable to check validation multiple times withi Pass in a float to check that often within 1 training epoch. Pass in an int k to check every k training batches. Must use an int if using an IterableDataset. -.. code-block:: python +.. testcode:: # DEFAULT trainer = Trainer(val_check_interval=0.95) @@ -46,21 +51,21 @@ Use data subset for training, validation and test ------------------------------------------------- If you don't want to check 100% of the training/validation/test set (for debugging or if it's huge), set these flags. -.. code-block:: python - - # DEFAULT - trainer = Trainer( - train_percent_check=1.0, - val_percent_check=1.0, - test_percent_check=1.0 - ) - - # check 10%, 20%, 30% only, respectively for training, validation and test set - trainer = Trainer( - train_percent_check=0.1, - val_percent_check=0.2, - test_percent_check=0.3 - ) +.. testcode:: + + # DEFAULT + trainer = Trainer( + train_percent_check=1.0, + val_percent_check=1.0, + test_percent_check=1.0 + ) + + # check 10%, 20%, 30% only, respectively for training, validation and test set + trainer = Trainer( + train_percent_check=0.1, + val_percent_check=0.2, + test_percent_check=0.3 + ) .. note:: ``train_percent_check``, ``val_percent_check`` and ``test_percent_check`` will be overwritten by ``overfit_pct`` if ``overfit_pct`` > 0. ``val_percent_check`` will be ignored if ``fast_dev_run=True``. diff --git a/docs/source/hyperparameters.rst b/docs/source/hyperparameters.rst index ff067b82e0ae38..5b2dd343fb6225 100644 --- a/docs/source/hyperparameters.rst +++ b/docs/source/hyperparameters.rst @@ -1,3 +1,13 @@ +.. testsetup:: * + + import torch + from argparse import ArgumentParser, Namespace + from pytorch_lightning.trainer.trainer import Trainer + from pytorch_lightning.core.lightning import LightningModule + import sys + sys.argv = ['foo'] + + Hyperparameters --------------- Lightning has utilities to interact seamlessly with the command line ArgumentParser @@ -7,13 +17,11 @@ ArgumentParser ^^^^^^^^^^^^^^ Lightning is designed to augment a lot of the functionality of the built-in Python ArgumentParser -.. code-block:: python +.. testcode:: from argparse import ArgumentParser - parser = ArgumentParser() parser.add_argument('--layer_1_dim', type=int, default=128) - args = parser.parse_args() This allows you to call your program like so: @@ -35,9 +43,9 @@ We can do this as follows. First, in your LightningModule, define the arguments specific to that module. Remember that data splits or data paths may also be specific to a module (ie: if your project has a model that trains on Imagenet and another on CIFAR-10). -.. code-block:: python +.. testcode:: - class LitModel(LightningModule): + class LitModel(LightningModule): @staticmethod def add_model_specific_args(parent_parser): @@ -48,13 +56,12 @@ a module (ie: if your project has a model that trains on Imagenet and another on Now in your main trainer file, add the Trainer args, the program args, and add the model args -.. code-block:: python +.. testcode:: # ---------------- # trainer_main.py # ---------------- from argparse import ArgumentParser - parser = ArgumentParser() # add PROGRAM level args @@ -66,7 +73,7 @@ Now in your main trainer file, add the Trainer args, the program args, and add t # add all the available trainer options to argparse # ie: now --gpus --num_nodes ... --fast_dev_run all work in the cli - parser = pl.Trainer.add_argparse_args(parser) + parser = Trainer.add_argparse_args(parser) hparams = parser.parse_args() @@ -78,9 +85,7 @@ Now you can call run your program like so Finally, make sure to start the training like so: -.. code-block:: bash - - hparams = parser.parse_args() +.. code-block:: python # YES model = LitModel(hparams) @@ -88,59 +93,56 @@ Finally, make sure to start the training like so: # NO # model = LitModel(learning_rate=hparams.learning_rate, ...) - #trainer = Trainer(gpus=hparams.gpus, ...) - + # trainer = Trainer(gpus=hparams.gpus, ...) -LightiningModule hparams -^^^^^^^^^^^^^^^^^^^^^^^^ +LightningModule hparams +^^^^^^^^^^^^^^^^^^^^^^^ Normally, we don't hard-code the values to a model. We usually use the command line to modify the network and read those values in the LightningModule -.. code-block:: python +.. testcode:: - class LitMNIST(pl.LightningModule): - def __init__(self, hparams): - super().__init__() + class LitMNIST(LightningModule): - # do this to save all arguments in any logger (tensorboard) - self.hparams = hparams - - self.layer_1 = torch.nn.Linear(28 * 28, hparams.layer_1_dim) - self.layer_2 = torch.nn.Linear(hparams.layer_1_dim, hparams.layer_2_dim) - self.layer_3 = torch.nn.Linear(hparams.layer_2_dim, 10) + def __init__(self, hparams): + super().__init__() - def forward(self, x): - ... + # do this to save all arguments in any logger (tensorboard) + self.hparams = hparams - def train_dataloader(self): - ... - return DataLoader(mnist_train, batch_size=self.hparams.batch_size) + self.layer_1 = torch.nn.Linear(28 * 28, hparams.layer_1_dim) + self.layer_2 = torch.nn.Linear(hparams.layer_1_dim, hparams.layer_2_dim) + self.layer_3 = torch.nn.Linear(hparams.layer_2_dim, 10) - def configure_optimizers(self): - return Adam(self.parameters(), lr=self.hparams.learning_rate) + def train_dataloader(self): + return DataLoader(mnist_train, batch_size=self.hparams.batch_size) - @staticmethod - def add_model_specific_args(parent_parser): - parser = ArgumentParser(parents=[parent_parser], add_help=False) + def configure_optimizers(self): + return Adam(self.parameters(), lr=self.hparams.learning_rate) - parser.add_argument('--layer_1_dim', type=int, default=128) - parser.add_argument('--layer_2_dim', type=int, default=256) - parser.add_argument('--batch_size', type=int, default=64) - parser.add_argument('--learning_rate', type=float, default=0.002) - return parser + @staticmethod + def add_model_specific_args(parent_parser): + parser = ArgumentParser(parents=[parent_parser], add_help=False) + parser.add_argument('--layer_1_dim', type=int, default=128) + parser.add_argument('--layer_2_dim', type=int, default=256) + parser.add_argument('--batch_size', type=int, default=64) + parser.add_argument('--learning_rate', type=float, default=0.002) + return parser Now pass in the params when you init your model .. code-block:: python - hparams = parse_args() + parser = ArgumentParser() + parser = LitMNIST.add_model_specific_args(parser) + hparams = parser.parse_args() model = LitMNIST(hparams) The line `self.hparams = hparams` is very special. This line assigns your hparams to the LightningModule. This does two things: -1. It adds them automatically to tensorboard logs under the hparams tab. +1. It adds them automatically to TensorBoard logs under the hparams tab. 2. Lightning will save those hparams to the checkpoint and use them to restore the module correctly. Trainer args @@ -165,9 +167,10 @@ Multiple Lightning Modules We often have multiple Lightning Modules where each one has different arguments. Instead of polluting the main.py file, the LightningModule lets you define arguments for each one. -.. code-block:: python +.. testcode:: + + class LitMNIST(LightningModule): - class LitMNIST(pl.LightningModule): def __init__(self, hparams): super().__init__() self.layer_1 = torch.nn.Linear(28 * 28, hparams.layer_1_dim) @@ -178,7 +181,10 @@ polluting the main.py file, the LightningModule lets you define arguments for ea parser.add_argument('--layer_1_dim', type=int, default=128) return parser - class GoodGAN(pl.LightningModule): +.. testcode:: + + class GoodGAN(LightningModule): + def __init__(self, hparams): super().__init__() self.encoder = Encoder(layers=hparams.encoder_layers) @@ -189,7 +195,8 @@ polluting the main.py file, the LightningModule lets you define arguments for ea parser.add_argument('--encoder_layers', type=int, default=12) return parser -Now we can allow each model to inject the arguments it needs in the main.py + +Now we can allow each model to inject the arguments it needs in the ``main.py`` .. code-block:: python @@ -213,7 +220,7 @@ Now we can allow each model to inject the arguments it needs in the main.py parser.add_argument('--model_name', type=str, default='gan', help='gan or mnist') # THIS LINE IS KEY TO PULL THE MODEL NAME - temp_args = parser.parse_known_args() + temp_args, _ = parser.parse_known_args() # let the model add what it wants if temp_args.model_name == 'gan': @@ -226,7 +233,7 @@ Now we can allow each model to inject the arguments it needs in the main.py # train main(args) -and now we can train MNIST or the gan using the command line interface! +and now we can train MNIST or the GAN using the command line interface! .. code-block:: bash diff --git a/docs/source/index.rst b/docs/source/index.rst index 6d1bfa26c28535..b74a9490af4e0e 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -104,10 +104,10 @@ Indices and tables .. toctree:: :hidden: - pytorch_lightning.core - pytorch_lightning.callbacks - pytorch_lightning.loggers - pytorch_lightning.overrides - pytorch_lightning.profiler - pytorch_lightning.trainer - pytorch_lightning.utilities \ No newline at end of file + api/pytorch_lightning.core + api/pytorch_lightning.callbacks + api/pytorch_lightning.loggers + api/pytorch_lightning.overrides + api/pytorch_lightning.profiler + api/pytorch_lightning.trainer + api/pytorch_lightning.utilities \ No newline at end of file diff --git a/docs/source/introduction_guide.rst b/docs/source/introduction_guide.rst index a7a406bbcb68d3..5d26278483c398 100644 --- a/docs/source/introduction_guide.rst +++ b/docs/source/introduction_guide.rst @@ -1,3 +1,9 @@ +.. testsetup:: * + + from pytorch_lightning.core.lightning import LightningModule + from pytorch_lightning.trainer.trainer import Trainer + + Introduction Guide ================== PyTorch Lightning provides a very simple template for organizing your PyTorch code. Once @@ -126,14 +132,14 @@ The LightningModule provides the structure on how to organize these 5 ingredient Let's first start with the model. In this case we'll design a 3-layer neural network. -.. code-block:: default +.. testcode:: import torch from torch.nn import functional as F from torch import nn - import pytorch_lightning as pl + from pytorch_lightning.core.lightning import LightningModule - class LitMNIST(pl.LightningModule): + class LitMNIST(LightningModule): def __init__(self): super().__init__() @@ -169,7 +175,7 @@ Notice this is a `LightningModule` instead of a `torch.nn.Module`. A LightningMo equivalent to a PyTorch Module except it has added functionality. However, you can use it EXACTLY the same as you would a PyTorch Module. -.. code-block:: default +.. testcode:: net = LitMNIST() x = torch.Tensor(1, 1, 28, 28) @@ -189,14 +195,14 @@ Data The Lightning Module organizes your dataloaders and data processing as well. Here's the PyTorch code for loading MNIST -.. code-block:: default +.. testcode:: + :skipif: not TORCHVISION_AVAILABLE from torch.utils.data import DataLoader, random_split from torchvision.datasets import MNIST import os from torchvision import datasets, transforms - # transforms # prepare transforms standard to MNIST transform=transforms.Compose([transforms.ToTensor(), @@ -206,24 +212,38 @@ Here's the PyTorch code for loading MNIST mnist_train = MNIST(os.getcwd(), train=True, download=True) mnist_train = DataLoader(mnist_train, batch_size=64) +.. testoutput:: + :hide: + :skipif: os.path.isdir(os.path.join(os.getcwd(), 'MNIST')) or not TORCHVISION_AVAILABLE + + Downloading ... + Extracting ... + Downloading ... + Extracting ... + Downloading ... + Extracting ... + Processing... + Done! + When using PyTorch Lightning, we use the exact same code except we organize it into the LightningModule -.. code-block:: python +.. testcode:: + :skipif: not TORCHVISION_AVAILABLE from torch.utils.data import DataLoader, random_split from torchvision.datasets import MNIST import os from torchvision import datasets, transforms - class LitMNIST(pl.LightningModule): + class LitMNIST(LightningModule): - def train_dataloader(self): - transform=transforms.Compose([transforms.ToTensor(), - transforms.Normalize((0.1307,), (0.3081,))]) - mnist_train = MNIST(os.getcwd(), train=True, download=False, - transform=transform) - return DataLoader(mnist_train, batch_size=64) + def train_dataloader(self): + transform=transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,))]) + mnist_train = MNIST(os.getcwd(), train=True, download=False, + transform=transform) + return DataLoader(mnist_train, batch_size=64) Notice the code is exactly the same, except now the training dataloading has been organized by the LightningModule under the `train_dataloader` method. This is great because if you run into a project that uses Lightning and want @@ -232,21 +252,21 @@ to figure out how they prepare their training data you can just look in the `tra Usually though, we want to separate the things that write to disk in data-processing from things like transforms which happen in memory. -.. code-block:: python +.. testcode:: - class LitMNIST(pl.LightningModule): + class LitMNIST(LightningModule): - def prepare_data(self): - # download only - MNIST(os.getcwd(), train=True, download=True) + def prepare_data(self): + # download only + MNIST(os.getcwd(), train=True, download=True) - def train_dataloader(self): - # no download, just transform - transform=transforms.Compose([transforms.ToTensor(), - transforms.Normalize((0.1307,), (0.3081,))]) - mnist_train = MNIST(os.getcwd(), train=True, download=False, - transform=transform) - return DataLoader(mnist_train, batch_size=64) + def train_dataloader(self): + # no download, just transform + transform=transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,))]) + mnist_train = MNIST(os.getcwd(), train=True, download=False, + transform=transform) + return DataLoader(mnist_train, batch_size=64) Doing it in the `prepare_data` method ensures that when you have multiple GPUs you won't overwrite the data. This is a contrived example @@ -254,24 +274,24 @@ but it gets more complicated with things like NLP or Imagenet. In general fill these methods with the following: -.. code-block:: python - - class LitMNIST(pl.LightningModule): +.. testcode:: - def prepare_data(self): - # stuff here is done once at the very beginning of training - # before any distributed training starts + class LitMNIST(LightningModule): - # download stuff - # save to disk - # etc... - - def train_dataloader(self): - # data transforms - # dataset creation - # return a DataLoader + def prepare_data(self): + # stuff here is done once at the very beginning of training + # before any distributed training starts + # download stuff + # save to disk + # etc... + ... + def train_dataloader(self): + # data transforms + # dataset creation + # return a DataLoader + ... Optimizer ^^^^^^^^^ @@ -287,20 +307,20 @@ In PyTorch we do it as follows: In Lightning we do the same but organize it under the configure_optimizers method. -.. code-block:: python +.. testcode:: - class LitMNIST(pl.LightningModule): + class LitMNIST(LightningModule): - def configure_optimizers(self): - return Adam(self.parameters(), lr=1e-3) + def configure_optimizers(self): + return Adam(self.parameters(), lr=1e-3) .. note:: The LightningModule itself has the parameters, so pass in self.parameters() However, if you have multiple optimizers use the matching parameters -.. code-block:: python +.. testcode:: - class LitMNIST(pl.LightningModule): + class LitMNIST(LightningModule): def configure_optimizers(self): return Adam(self.generator(), lr=1e-3), Adam(self.discriminator(), lr=1e-3) @@ -340,16 +360,16 @@ In the case of MNIST we do the following In Lightning, everything that is in the training step gets organized under the `training_step` function in the LightningModule -.. code-block:: python +.. testcode:: - class LitMNIST(pl.LightningModule): + class LitMNIST(LightningModule): - def training_step(self, batch, batch_idx): - x, y = batch - logits = self(x) - loss = F.nll_loss(logits, y) - return {'loss': loss} - # return loss (also works) + def training_step(self, batch, batch_idx): + x, y = batch + logits = self(x) + loss = F.nll_loss(logits, y) + return {'loss': loss} + # return loss (also works) Again, this is the same PyTorch code except that it has been organized by the LightningModule. This code is not restricted which means it can be as complicated as a full seq-2-seq, RL loop, GAN, etc... @@ -367,43 +387,43 @@ So far we defined 4 key ingredients in pure PyTorch but organized the code insid For clarity, we'll recall that the full LightningModule now looks like this. -.. code-block:: python +.. testcode:: + + class LitMNIST(LightningModule): + def __init__(self): + super().__init__() + self.layer_1 = torch.nn.Linear(28 * 28, 128) + self.layer_2 = torch.nn.Linear(128, 256) + self.layer_3 = torch.nn.Linear(256, 10) + + def forward(self, x): + batch_size, channels, width, height = x.size() + x = x.view(batch_size, -1) + x = self.layer_1(x) + x = torch.relu(x) + x = self.layer_2(x) + x = torch.relu(x) + x = self.layer_3(x) + x = torch.log_softmax(x, dim=1) + return x + + def train_dataloader(self): + transform=transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,))]) + mnist_train = MNIST(os.getcwd(), train=True, download=False, transform=transform) + return DataLoader(mnist_train, batch_size=64) - class LitMNIST(pl.LightningModule): - def __init__(self): - super().__init__() - self.layer_1 = torch.nn.Linear(28 * 28, 128) - self.layer_2 = torch.nn.Linear(128, 256) - self.layer_3 = torch.nn.Linear(256, 10) - - def forward(self, x): - batch_size, channels, width, height = x.size() - x = x.view(batch_size, -1) - x = self.layer_1(x) - x = torch.relu(x) - x = self.layer_2(x) - x = torch.relu(x) - x = self.layer_3(x) - x = torch.log_softmax(x, dim=1) - return x - - def train_dataloader(self): - transform=transforms.Compose([transforms.ToTensor(), - transforms.Normalize((0.1307,), (0.3081,))]) - mnist_train = MNIST(os.getcwd(), train=True, download=False, transform=transform) - return DataLoader(mnist_train, batch_size=64) - - def configure_optimizers(self): - return Adam(self.parameters(), lr=1e-3) + def configure_optimizers(self): + return Adam(self.parameters(), lr=1e-3) - def training_step(self, batch, batch_idx): - x, y = batch - logits = self(x) - loss = F.nll_loss(logits, y) + def training_step(self, batch, batch_idx): + x, y = batch + logits = self(x) + loss = F.nll_loss(logits, y) - # add logging - logs = {'loss': loss} - return {'loss': loss, 'log': logs} + # add logging + logs = {'loss': loss} + return {'loss': loss, 'log': logs} Again, this is the same PyTorch code, except that it's organized by the LightningModule. This organization now lets us train this model @@ -551,33 +571,33 @@ will cause all sorts of issues. To solve this problem, move the download code to the `prepare_data` method in the LightningModule. In this method we do all the preparation we need to do once (instead of on every gpu). -.. code-block:: python +.. testcode:: - class LitMNIST(pl.LightningModule): - def prepare_data(self): - # transform - transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) + class LitMNIST(LightningModule): + def prepare_data(self): + # transform + transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) - # download - mnist_train = MNIST(os.getcwd(), train=True, download=True, transform=transform) - mnist_test = MNIST(os.getcwd(), train=False, download=True, transform=transform) + # download + mnist_train = MNIST(os.getcwd(), train=True, download=True, transform=transform) + mnist_test = MNIST(os.getcwd(), train=False, download=True, transform=transform) - # train/val split - mnist_train, mnist_val = random_split(mnist_train, [55000, 5000]) + # train/val split + mnist_train, mnist_val = random_split(mnist_train, [55000, 5000]) - # assign to use in dataloaders - self.train_dataset = mnist_train - self.val_dataset = mnist_val - self.test_dataset = mnist_test + # assign to use in dataloaders + self.train_dataset = mnist_train + self.val_dataset = mnist_val + self.test_dataset = mnist_test - def train_dataloader(self): - return DataLoader(self.train_dataset, batch_size=64) + def train_dataloader(self): + return DataLoader(self.train_dataset, batch_size=64) - def val_dataloader(self): - return DataLoader(self.val_dataset, batch_size=64) + def val_dataloader(self): + return DataLoader(self.val_dataset, batch_size=64) - def test_dataloader(self): - return DataLoader(self.test_dataset, batch_size=64) + def test_dataloader(self): + return DataLoader(self.test_dataset, batch_size=64) The `prepare_data` method is also a good place to do any data processing that needs to be done only once (ie: download or tokenize, etc...). @@ -642,28 +662,28 @@ In addition, we define a `val_dataloader` method which tells the trainer what da Notice we split the train split of MNIST into train, validation. We also have to make sure to do the sample split in the `train_dataloader` method. -.. code-block:: python +.. testcode:: - class LitMNIST(pl.LightningModule): - def validation_step(self, batch, batch_idx): - x, y = batch - logits = self(x) - loss = F.nll_loss(logits, y) - return {'val_loss': loss} - - def validation_epoch_end(self, outputs): - avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean() - tensorboard_logs = {'val_loss': avg_loss} - return {'val_loss': avg_loss, 'log': tensorboard_logs} - - def val_dataloader(self): - transform=transforms.Compose([transforms.ToTensor(), - transforms.Normalize((0.1307,), (0.3081,))]) - mnist_train = MNIST(os.getcwd(), train=True, download=False, - transform=transform) - _, mnist_val = random_split(mnist_train, [55000, 5000]) - mnist_val = DataLoader(mnist_val, batch_size=64) - return mnist_val + class LitMNIST(LightningModule): + def validation_step(self, batch, batch_idx): + x, y = batch + logits = self(x) + loss = F.nll_loss(logits, y) + return {'val_loss': loss} + + def validation_epoch_end(self, outputs): + avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean() + tensorboard_logs = {'val_loss': avg_loss} + return {'val_loss': avg_loss, 'log': tensorboard_logs} + + def val_dataloader(self): + transform=transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,))]) + mnist_train = MNIST(os.getcwd(), train=True, download=False, + transform=transform) + _, mnist_val = random_split(mnist_train, [55000, 5000]) + mnist_val = DataLoader(mnist_val, batch_size=64) + return mnist_val Again, we've just organized the regular PyTorch code into two steps, the `validation_step` method which operates on a single batch and the `validation_epoch_end` method to compute statistics on all batches. @@ -698,26 +718,26 @@ Just like the validation loop, we define exactly the same steps for testing: - test_epoch_end - test_dataloader -.. code-block:: python +.. testcode:: - class LitMNIST(pl.LightningModule): - def test_step(self, batch, batch_idx): - x, y = batch - logits = self(x) - loss = F.nll_loss(logits, y) - return {'val_loss': loss} - - def test_epoch_end(self, outputs): - avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean() - tensorboard_logs = {'val_loss': avg_loss} - return {'val_loss': avg_loss, 'log': tensorboard_logs} - - def test_dataloader(self): - transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) - mnist_train = MNIST(os.getcwd(), train=False, download=False, transform=transform) - _, mnist_val = random_split(mnist_train, [55000, 5000]) - mnist_val = DataLoader(mnist_val, batch_size=64) - return mnist_val + class LitMNIST(LightningModule): + def test_step(self, batch, batch_idx): + x, y = batch + logits = self(x) + loss = F.nll_loss(logits, y) + return {'val_loss': loss} + + def test_epoch_end(self, outputs): + avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean() + tensorboard_logs = {'val_loss': avg_loss} + return {'val_loss': avg_loss, 'log': tensorboard_logs} + + def test_dataloader(self): + transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) + mnist_train = MNIST(os.getcwd(), train=False, download=False, transform=transform) + _, mnist_val = random_split(mnist_train, [55000, 5000]) + mnist_val = DataLoader(mnist_val, batch_size=64) + return mnist_val However, to make sure the test set isn't used inadvertently, Lightning has a separate API to run tests. Once you train your model simply call `.test()`. @@ -773,26 +793,26 @@ On the surface, it looks like `forward` and `training_step` are similar. General what we want the model to do is what happens in the `forward`. whereas the `training_step` likely calls forward from within it. -.. code-block:: python +.. testcode:: - class MNISTClassifier(pl.LightningModule): + class MNISTClassifier(LightningModule): - def forward(self, x): - batch_size, channels, width, height = x.size() - x = x.view(batch_size, -1) - x = self.layer_1(x) - x = torch.relu(x) - x = self.layer_2(x) - x = torch.relu(x) - x = self.layer_3(x) - x = torch.log_softmax(x, dim=1) - return x + def forward(self, x): + batch_size, channels, width, height = x.size() + x = x.view(batch_size, -1) + x = self.layer_1(x) + x = torch.relu(x) + x = self.layer_2(x) + x = torch.relu(x) + x = self.layer_3(x) + x = torch.log_softmax(x, dim=1) + return x - def training_step(self, batch, batch_idx): - x, y = batch - logits = self(x) - loss = F.nll_loss(logits, y) - return loss + def training_step(self, batch, batch_idx): + x, y = batch + logits = self(x) + loss = F.nll_loss(logits, y) + return loss .. code-block:: python @@ -802,27 +822,27 @@ within it. In this case, we've set this LightningModel to predict logits. But we could also have it predict feature maps: -.. code-block:: python +.. testcode:: - class MNISTRepresentator(pl.LightningModule): + class MNISTRepresentator(LightningModule): - def forward(self, x): - batch_size, channels, width, height = x.size() - x = x.view(batch_size, -1) - x = self.layer_1(x) - x1 = torch.relu(x) - x = self.layer_2(x1) - x2 = torch.relu(x) - x3 = self.layer_3(x2) - return [x, x1, x2, x3] - - def training_step(self, batch, batch_idx): - x, y = batch - out, l1_feats, l2_feats, l3_feats = self(x) - logits = torch.log_softmax(out, dim=1) - ce_loss = F.nll_loss(logits, y) - loss = perceptual_loss(l1_feats, l2_feats, l3_feats) + ce_loss - return loss + def forward(self, x): + batch_size, channels, width, height = x.size() + x = x.view(batch_size, -1) + x = self.layer_1(x) + x1 = torch.relu(x) + x = self.layer_2(x1) + x2 = torch.relu(x) + x3 = self.layer_3(x2) + return [x, x1, x2, x3] + + def training_step(self, batch, batch_idx): + x, y = batch + out, l1_feats, l2_feats, l3_feats = self(x) + logits = torch.log_softmax(out, dim=1) + ce_loss = F.nll_loss(logits, y) + loss = perceptual_loss(l1_feats, l2_feats, l3_feats) + ce_loss + return loss .. code-block:: python @@ -832,21 +852,21 @@ In this case, we've set this LightningModel to predict logits. But we could also Or maybe we have a model that we use to do generation -.. code-block:: python +.. testcode:: - class LitMNISTDreamer(pl.LightningModule): + class LitMNISTDreamer(LightningModule): - def forward(self, z): - imgs = self.decoder(z) - return imgs + def forward(self, z): + imgs = self.decoder(z) + return imgs - def training_step(self, batch, batch_idx): - x, y = batch - representation = self.encoder(x) - imgs = self(representation) + def training_step(self, batch, batch_idx): + x, y = batch + representation = self.encoder(x) + imgs = self(representation) - loss = perceptual_loss(imgs, x) - return loss + loss = perceptual_loss(imgs, x) + return loss .. code-block:: python @@ -871,7 +891,7 @@ Any part of the training, validation and testing loop can be modified. For instance, if you wanted to do your own backward pass, you would override the default implementation -.. code-block:: python +.. testcode:: def backward(self, use_amp, loss, optimizer): if use_amp: @@ -882,9 +902,9 @@ default implementation With your own -.. code-block:: python +.. testcode:: - class LitMNIST(pl.LightningModule): + class LitMNIST(LightningModule): def backward(self, use_amp, loss, optimizer): # do a custom way of backward @@ -892,7 +912,7 @@ With your own Or if you wanted to initialize ddp in a different way than the default one -.. code-block:: python +.. testcode:: def configure_ddp(self, model, device_ids): # Lightning DDP simply routes to test_step, val_step, etc... @@ -905,9 +925,9 @@ Or if you wanted to initialize ddp in a different way than the default one you could do your own: -.. code-block:: python +.. testcode:: - class LitMNIST(pl.LightningModule): + class LitMNIST(LightningModule): def configure_ddp(self, model, device_ids): @@ -916,7 +936,7 @@ you could do your own: return model Every single part of training is configurable this way. -For a full list look at `lightningModule `_. +For a full list look at `LightningModule `_. --------- @@ -925,26 +945,32 @@ Callbacks Another way to add arbitrary functionality is to add a custom callback for hooks that you might care about -.. code-block:: python +.. testcode:: - import pytorch_lightning as pl + from pytorch_lightning.callbacks import Callback - class MyPrintingCallback(pl.Callback): + class MyPrintingCallback(Callback): def on_init_start(self, trainer): print('Starting to init trainer!') def on_init_end(self, trainer): - print('trainer is init now') + print('Trainer is init now') def on_train_end(self, trainer, pl_module): print('do something when training ends') And pass the callbacks into the trainer -.. code-block:: python +.. testcode:: + + trainer = Trainer(callbacks=[MyPrintingCallback()]) + +.. testoutput:: + :hide: - Trainer(callbacks=[MyPrintingCallback()]) + Starting to init trainer! + Trainer is init now .. note:: See full list of 12+ hooks in the :ref:`callbacks`. diff --git a/docs/source/lr_finder.rst b/docs/source/lr_finder.rst index aab0c7548c4cf7..3da5456b6de8b0 100755 --- a/docs/source/lr_finder.rst +++ b/docs/source/lr_finder.rst @@ -1,3 +1,8 @@ +.. testsetup:: * + + from pytorch_lightning.trainer.trainer import Trainer + from pytorch_lightning.core.lightning import LightningModule + Learning Rate Finder -------------------- @@ -24,17 +29,18 @@ will automatically be run before any training is done. The ``lr`` that is found and used will be written to the console and logged together with all other hyperparameters of the model. -.. code-block:: python +.. testcode:: # default, no automatic learning rate finder - Trainer(auto_lr_find=True) + trainer = Trainer(auto_lr_find=True) When the ``lr`` or ``learning_rate`` key in hparams exists, this flag sets your learning_rate. In both cases, if the respective fields are not found, an error will be thrown. -.. code-block:: python +.. testcode:: class LitModel(LightningModule): + def __init__(self, hparams): self.hparams = hparams @@ -43,14 +49,14 @@ In both cases, if the respective fields are not found, an error will be thrown. # finds learning rate automatically # sets hparams.lr or hparams.learning_rate to that learning rate - Trainer(auto_lr_find=True) + trainer = Trainer(auto_lr_find=True) To use an arbitrary value set it in the parameter. -.. code-block:: python +.. testcode:: # to set to your own hparams.my_value - Trainer(auto_lr_find='my_value') + trainer = Trainer(auto_lr_find='my_value') Under the hood, when you call fit, this is what happens. @@ -72,7 +78,7 @@ of this would look like .. code-block:: python model = MyModelClass(hparams) - trainer = pl.Trainer() + trainer = Trainer() # Run learning rate finder lr_finder = trainer.lr_find(model) diff --git a/docs/source/modules.rst b/docs/source/modules.rst deleted file mode 100644 index e4c5121858c280..00000000000000 --- a/docs/source/modules.rst +++ /dev/null @@ -1,7 +0,0 @@ -pl_examples -=========== - -.. toctree:: - :maxdepth: 4 - - pl_examples diff --git a/docs/source/multi_gpu.rst b/docs/source/multi_gpu.rst index 55d9fdb5faac26..8688cd338bc1b4 100644 --- a/docs/source/multi_gpu.rst +++ b/docs/source/multi_gpu.rst @@ -1,3 +1,9 @@ +.. testsetup:: * + + import torch + from pytorch_lightning.trainer.trainer import Trainer + from pytorch_lightning.core.lightning import LightningModule + .. _multi-gpu-training: Multi-GPU training @@ -13,7 +19,7 @@ Delete .cuda() or .to() calls Delete any calls to .cuda() or .to(device). -.. code-block:: python +.. testcode:: # before lightning def forward(self, x): @@ -30,7 +36,7 @@ Init using type_as When you need to create a new tensor, use `type_as`. This will make your code scale to any arbitrary number of GPUs or TPUs with Lightning -.. code-block:: python +.. testcode:: # before lightning def forward(self, x): @@ -47,7 +53,7 @@ Remove samplers For multi-node or TPU training, in PyTorch we must use `torch.nn.DistributedSampler`. The sampler makes sure each GPU sees the appropriate part of your data. -.. code-block:: python +.. testcode:: # without lightning def train_dataloader(self): @@ -62,7 +68,7 @@ sampler makes sure each GPU sees the appropriate part of your data. With Lightning, you don't need to do this because it takes care of adding the correct samplers when needed. -.. code-block:: python +.. testcode:: # with lightning def train_dataloader(self): @@ -131,10 +137,11 @@ each GPU will process 16 samples, after which the root node will aggregate the r .. warning:: DP use is discouraged by PyTorch and Lightning. Use ddp which is more stable and at least 3x faster -.. code-block:: python +.. testcode:: + :skipif: torch.cuda.device_count() < 2 - # train on 1 GPU (using dp mode) - trainer = pl.Trainer(gpus=2, distributed_backend='dp') + # train on 2 GPUs (using dp mode) + trainer = Trainer(gpus=2, distributed_backend='dp') Distributed Data Parallel ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -157,10 +164,10 @@ Distributed Data Parallel .. code-block:: python # train on 8 GPUs (same machine (ie: node)) - trainer = pl.Trainer(gpus=8, distributed_backend='ddp') + trainer = Trainer(gpus=8, distributed_backend='ddp') # train on 32 GPUs (4 nodes) - trainer = pl.Trainer(gpus=8, distributed_backend='ddp', num_nodes=4) + trainer = Trainer(gpus=8, distributed_backend='ddp', num_nodes=4) Distributed Data Parallel 2 ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -182,7 +189,7 @@ In this case, we can use ddp2 which behaves like dp in a machine and ddp across .. code-block:: python # train on 32 GPUs (4 nodes) - trainer = pl.Trainer(gpus=8, distributed_backend='ddp2', num_nodes=4) + trainer = Trainer(gpus=8, distributed_backend='ddp2', num_nodes=4) Horovod ^^^^^^^ @@ -202,15 +209,15 @@ Horovod can be configured in the training script to run with any number of GPUs .. code-block:: python # train Horovod on GPU (number of GPUs / machines provided on command-line) - trainer = pl.Trainer(distributed_backend='horovod', gpus=1) + trainer = Trainer(distributed_backend='horovod', gpus=1) # train Horovod on CPU (number of processes / machines provided on command-line) - trainer = pl.Trainer(distributed_backend='horovod') + trainer = Trainer(distributed_backend='horovod') When starting the training job, the driver application will then be used to specify the total number of worker processes: -.. code-block:: +.. code-block:: bash # run training with 4 GPUs on a single machine horovodrun -np 4 python train.py @@ -226,7 +233,7 @@ DP/DDP2 caveats In DP and DDP2 each GPU within a machine sees a portion of a batch. DP and ddp2 roughly do the following: -.. code-block:: python +.. testcode:: def distributed_forward(batch, model): batch = torch.Tensor(32, 8) @@ -245,7 +252,7 @@ DP and ddp2 roughly do the following: So, when Lightning calls any of the `training_step`, `validation_step`, `test_step` you will only be operating on one of those pieces. -.. code-block:: python +.. testcode:: # the batch here is a portion of the FULL batch def training_step(self, batch, batch_idx): @@ -255,7 +262,7 @@ For most metrics, this doesn't really matter. However, if you want to add something to your computational graph (like softmax) using all batch parts you can use the `training_step_end` step. -.. code-block:: python +.. testcode:: def training_step_end(self, outputs): # only use when on dp @@ -288,7 +295,7 @@ In pseudocode, the full sequence is: to illustrate why this is needed, let's look at dataparallel -.. code-block:: python +.. testcode:: def training_step(self, batch, batch_idx): x, y = batch @@ -313,13 +320,13 @@ it will behave the same no matter the backend. Validation and test step also have the same option when using dp -.. code-block:: python +.. testcode:: - def validation_step_end(self, batch_parts_outputs): - ... + def validation_step_end(self, batch_parts_outputs): + ... - def test_step_end(self, batch_parts_outputs): - ... + def test_step_end(self, batch_parts_outputs): + ... Implement Your Own Distributed (DDP) training ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -335,7 +342,7 @@ batch size. Let's say you have a batch size of 7 in your dataloader. -.. code-block:: +.. testcode:: class LitModel(LightningModule): @@ -344,7 +351,7 @@ Let's say you have a batch size of 7 in your dataloader. In (DDP, Horovod) your effective batch size will be 7 * gpus * num_nodes. -.. code-block:: +.. code-block:: python # effective batch size = 7 * 8 Trainer(gpus=8, distributed_backend='ddp|horovod') @@ -356,7 +363,7 @@ In (DDP, Horovod) your effective batch size will be 7 * gpus * num_nodes. In DDP2, your effective batch size will be 7 * num_nodes. The reason is that the full batch is visible to all GPUs on the node when using DDP2. -.. code-block:: +.. code-block:: python # effective batch size = 7 Trainer(gpus=8, distributed_backend='ddp2') diff --git a/docs/source/multiple_loaders.rst b/docs/source/multiple_loaders.rst index e88b7b1cbe0787..dca339f9b99ad2 100644 --- a/docs/source/multiple_loaders.rst +++ b/docs/source/multiple_loaders.rst @@ -1,3 +1,7 @@ +.. testsetup:: * + + from pytorch_lightning.core.lightning import LightningModule + Multiple Datasets ================= Lightning supports multiple dataloaders in a few ways. @@ -14,7 +18,7 @@ dataloaders). (`reference `_) -.. code-block:: python +.. testcode:: class ConcatDataset(torch.utils.data.Dataset): def __init__(self, *datasets): @@ -27,6 +31,7 @@ dataloaders). return min(len(d) for d in self.datasets) class LitModel(LightningModule): + def train_dataloader(self): concat_dataset = ConcatDataset( datasets.ImageFolder(traindir_A), @@ -44,9 +49,11 @@ dataloaders). def val_dataloader(self): # SAME + ... def test_dataloader(self): # SAME + ... Test/Val dataloaders -------------------- @@ -58,7 +65,7 @@ See the following for more details: - :meth:`~pytorch_lightning.core.LightningModule.val_dataloader` - :meth:`~pytorch_lightning.core.LightningModule.test_dataloader` -.. code-block:: python +.. testcode:: def val_dataloader(self): loader_1 = Dataloader() diff --git a/docs/source/new-project.rst b/docs/source/new-project.rst index e3f3a892d983fa..24b11412e5c7d9 100644 --- a/docs/source/new-project.rst +++ b/docs/source/new-project.rst @@ -1,3 +1,10 @@ +.. testsetup:: * + + from pytorch_lightning.core.lightning import LightningModule + from pytorch_lightning.trainer.trainer import Trainer + + + Quick Start =========== @@ -13,7 +20,8 @@ To illustrate, here's the typical PyTorch project structure organized in a Light Step 1: Define a LightningModule --------------------------------- -.. code-block:: python +.. testcode:: + :skipif: not TORCHVISION_AVAILABLE import os @@ -22,10 +30,9 @@ Step 1: Define a LightningModule from torch.utils.data import DataLoader from torchvision.datasets import MNIST from torchvision import transforms + from pytorch_lightning.core.lightning import LightningModule - import pytorch_lightning as pl - - class LitModel(pl.LightningModule): + class LitModel(LightningModule): def __init__(self): super().__init__() @@ -53,7 +60,8 @@ Step 1: Define a LightningModule Step 2: Fit with a Trainer -------------------------- -.. code-block:: python +.. testcode:: + :skipif: torch.cuda.device_count() < 8 from pytorch_lightning import Trainer @@ -68,13 +76,13 @@ Under the hood, lightning does (in high-level pseudocode): .. code-block:: python model = LitModel() - train_dataloader = model.train_dataloader + train_dataloader = model.train_dataloader() optimizer = model.configure_optimizers() for epoch in epochs: train_outs = [] for batch in train_dataloader: - loss = model.training_step() + loss = model.training_step(batch) loss.backward() train_outs.append(loss.detach()) @@ -88,9 +96,9 @@ Validation loop --------------- To also add a validation loop add the following functions -.. code-block:: python +.. testcode:: - class LitModel(pl.LightningModule): + class LitModel(LightningModule): def validation_step(self, batch, batch_idx): x, y = batch @@ -118,7 +126,11 @@ And now the trainer will call the validation loop automatically Under the hood in pseudocode, lightning does the following: -.. code-block:: python +.. testsetup:: * + + train_dataloader = [] + +.. testcode:: # ... for batch in train_dataloader: @@ -145,9 +157,9 @@ Test loop --------- You might also need a test loop -.. code-block:: python +.. testcode:: - class LitModel(pl.LightningModule): + class LitModel(LightningModule): def test_step(self, batch, batch_idx): x, y = batch diff --git a/docs/source/optimizers.rst b/docs/source/optimizers.rst index 0b02f9c56a7297..8f8715a09e7b3a 100644 --- a/docs/source/optimizers.rst +++ b/docs/source/optimizers.rst @@ -5,7 +5,7 @@ Learning rate scheduling ------------------------------------- Every optimizer you use can be paired with any `LearningRateScheduler `_. -.. code-block:: python +.. testcode:: # no LR scheduler def configure_optimizers(self): @@ -44,7 +44,7 @@ Use multiple optimizers (like GANs) ------------------------------------- To use multiple optimizers return > 1 optimizers from :meth:`pytorch_lightning.core.LightningModule.configure_optimizers` -.. code-block:: python +.. testcode:: # one optimizer def configure_optimizers(self): @@ -79,7 +79,7 @@ override the :meth:`optimizer_step` function. For example, here step optimizer A every 2 batches and optimizer B every 4 batches -.. code-block:: python +.. testcode:: def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_i, second_order_closure=None): optimizer.step() @@ -104,7 +104,7 @@ For example, here step optimizer A every 2 batches and optimizer B every 4 batch Here we add a learning-rate warm up -.. code-block:: python +.. testcode:: # learning rate warm-up def optimizer_step(self, current_epoch, batch_nb, optimizer, optimizer_i, second_order_closure=None): diff --git a/docs/source/pl_examples.basic_examples.cpu_template.rst b/docs/source/pl_examples.basic_examples.cpu_template.rst new file mode 100644 index 00000000000000..05bf1fadf060be --- /dev/null +++ b/docs/source/pl_examples.basic_examples.cpu_template.rst @@ -0,0 +1,7 @@ +pl\_examples.basic\_examples.cpu\_template module +================================================= + +.. automodule:: pl_examples.basic_examples.cpu_template + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pl_examples.basic_examples.gpu_template.rst b/docs/source/pl_examples.basic_examples.gpu_template.rst new file mode 100644 index 00000000000000..ab6b9cdcb0867f --- /dev/null +++ b/docs/source/pl_examples.basic_examples.gpu_template.rst @@ -0,0 +1,7 @@ +pl\_examples.basic\_examples.gpu\_template module +================================================= + +.. automodule:: pl_examples.basic_examples.gpu_template + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pl_examples.basic_examples.multi_node_ddp2_demo.rst b/docs/source/pl_examples.basic_examples.multi_node_ddp2_demo.rst new file mode 100644 index 00000000000000..eb337beb83cedb --- /dev/null +++ b/docs/source/pl_examples.basic_examples.multi_node_ddp2_demo.rst @@ -0,0 +1,7 @@ +pl\_examples.basic\_examples.multi\_node\_ddp2\_demo module +=========================================================== + +.. automodule:: pl_examples.basic_examples.multi_node_ddp2_demo + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pl_examples.basic_examples.multi_node_ddp_demo.rst b/docs/source/pl_examples.basic_examples.multi_node_ddp_demo.rst new file mode 100644 index 00000000000000..bbcbfc1804e33a --- /dev/null +++ b/docs/source/pl_examples.basic_examples.multi_node_ddp_demo.rst @@ -0,0 +1,7 @@ +pl\_examples.basic\_examples.multi\_node\_ddp\_demo module +========================================================== + +.. automodule:: pl_examples.basic_examples.multi_node_ddp_demo + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pl_examples.basic_examples.rst b/docs/source/pl_examples.basic_examples.rst new file mode 100644 index 00000000000000..16dd2d7f241fe5 --- /dev/null +++ b/docs/source/pl_examples.basic_examples.rst @@ -0,0 +1,17 @@ +pl\_examples.basic\_examples package +==================================== + +.. automodule:: pl_examples.basic_examples + :members: + :undoc-members: + :show-inheritance: + +Submodules +---------- + +.. toctree:: + + pl_examples.basic_examples.cpu_template + pl_examples.basic_examples.gpu_template + pl_examples.basic_examples.multi_node_ddp2_demo + pl_examples.basic_examples.multi_node_ddp_demo diff --git a/docs/source/pl_examples.domain_templates.generative_adversarial_net.rst b/docs/source/pl_examples.domain_templates.generative_adversarial_net.rst new file mode 100644 index 00000000000000..af927e779fe466 --- /dev/null +++ b/docs/source/pl_examples.domain_templates.generative_adversarial_net.rst @@ -0,0 +1,7 @@ +pl\_examples.domain\_templates.generative\_adversarial\_net module +================================================================== + +.. automodule:: pl_examples.domain_templates.generative_adversarial_net + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pl_examples.domain_templates.imagenet.rst b/docs/source/pl_examples.domain_templates.imagenet.rst new file mode 100644 index 00000000000000..b995d989c02893 --- /dev/null +++ b/docs/source/pl_examples.domain_templates.imagenet.rst @@ -0,0 +1,7 @@ +pl\_examples.domain\_templates.imagenet module +============================================== + +.. automodule:: pl_examples.domain_templates.imagenet + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pl_examples.domain_templates.reinforce_learn_Qnet.rst b/docs/source/pl_examples.domain_templates.reinforce_learn_Qnet.rst new file mode 100644 index 00000000000000..e59b6344f2476b --- /dev/null +++ b/docs/source/pl_examples.domain_templates.reinforce_learn_Qnet.rst @@ -0,0 +1,7 @@ +pl\_examples.domain\_templates.reinforce\_learn\_Qnet module +============================================================ + +.. automodule:: pl_examples.domain_templates.reinforce_learn_Qnet + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pl_examples.domain_templates.rst b/docs/source/pl_examples.domain_templates.rst new file mode 100644 index 00000000000000..432c9adc383858 --- /dev/null +++ b/docs/source/pl_examples.domain_templates.rst @@ -0,0 +1,17 @@ +pl\_examples.domain\_templates package +====================================== + +.. automodule:: pl_examples.domain_templates + :members: + :undoc-members: + :show-inheritance: + +Submodules +---------- + +.. toctree:: + + pl_examples.domain_templates.generative_adversarial_net + pl_examples.domain_templates.imagenet + pl_examples.domain_templates.reinforce_learn_Qnet + pl_examples.domain_templates.semantic_segmentation diff --git a/docs/source/pl_examples.domain_templates.semantic_segmentation.rst b/docs/source/pl_examples.domain_templates.semantic_segmentation.rst new file mode 100644 index 00000000000000..a501658d1bfa31 --- /dev/null +++ b/docs/source/pl_examples.domain_templates.semantic_segmentation.rst @@ -0,0 +1,7 @@ +pl\_examples.domain\_templates.semantic\_segmentation module +============================================================ + +.. automodule:: pl_examples.domain_templates.semantic_segmentation + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pl_examples.models.lightning_template.rst b/docs/source/pl_examples.models.lightning_template.rst new file mode 100644 index 00000000000000..992cd9e99bbe24 --- /dev/null +++ b/docs/source/pl_examples.models.lightning_template.rst @@ -0,0 +1,7 @@ +pl\_examples.models.lightning\_template module +============================================== + +.. automodule:: pl_examples.models.lightning_template + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pl_examples.models.rst b/docs/source/pl_examples.models.rst new file mode 100644 index 00000000000000..6b4d63c50ae13b --- /dev/null +++ b/docs/source/pl_examples.models.rst @@ -0,0 +1,15 @@ +pl\_examples.models package +=========================== + +.. automodule:: pl_examples.models + :members: + :undoc-members: + :show-inheritance: + +Submodules +---------- + +.. toctree:: + + pl_examples.models.lightning_template + pl_examples.models.unet diff --git a/docs/source/pl_examples.models.unet.rst b/docs/source/pl_examples.models.unet.rst new file mode 100644 index 00000000000000..4c73f72d7fedfc --- /dev/null +++ b/docs/source/pl_examples.models.unet.rst @@ -0,0 +1,7 @@ +pl\_examples.models.unet module +=============================== + +.. automodule:: pl_examples.models.unet + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pl_examples.rst b/docs/source/pl_examples.rst new file mode 100644 index 00000000000000..67c13475897b2b --- /dev/null +++ b/docs/source/pl_examples.rst @@ -0,0 +1,16 @@ +pl\_examples package +==================== + +.. automodule:: pl_examples + :members: + :undoc-members: + :show-inheritance: + +Subpackages +----------- + +.. toctree:: + + pl_examples.basic_examples + pl_examples.domain_templates + pl_examples.models diff --git a/docs/source/pytorch_lightning.callbacks.base.rst b/docs/source/pytorch_lightning.callbacks.base.rst new file mode 100644 index 00000000000000..a2542d09ec929d --- /dev/null +++ b/docs/source/pytorch_lightning.callbacks.base.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.callbacks.base module +======================================== + +.. automodule:: pytorch_lightning.callbacks.base + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.callbacks.early_stopping.rst b/docs/source/pytorch_lightning.callbacks.early_stopping.rst new file mode 100644 index 00000000000000..65ab47b8fe2e3f --- /dev/null +++ b/docs/source/pytorch_lightning.callbacks.early_stopping.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.callbacks.early\_stopping module +=================================================== + +.. automodule:: pytorch_lightning.callbacks.early_stopping + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.callbacks.gradient_accumulation_scheduler.rst b/docs/source/pytorch_lightning.callbacks.gradient_accumulation_scheduler.rst new file mode 100644 index 00000000000000..88791cbd8ddb9e --- /dev/null +++ b/docs/source/pytorch_lightning.callbacks.gradient_accumulation_scheduler.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.callbacks.gradient\_accumulation\_scheduler module +===================================================================== + +.. automodule:: pytorch_lightning.callbacks.gradient_accumulation_scheduler + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.callbacks.lr_logger.rst b/docs/source/pytorch_lightning.callbacks.lr_logger.rst new file mode 100644 index 00000000000000..94772d6a81975e --- /dev/null +++ b/docs/source/pytorch_lightning.callbacks.lr_logger.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.callbacks.lr\_logger module +============================================== + +.. automodule:: pytorch_lightning.callbacks.lr_logger + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.callbacks.model_checkpoint.rst b/docs/source/pytorch_lightning.callbacks.model_checkpoint.rst new file mode 100644 index 00000000000000..19874ce2ee82f3 --- /dev/null +++ b/docs/source/pytorch_lightning.callbacks.model_checkpoint.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.callbacks.model\_checkpoint module +===================================================== + +.. automodule:: pytorch_lightning.callbacks.model_checkpoint + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.callbacks.progress.rst b/docs/source/pytorch_lightning.callbacks.progress.rst new file mode 100644 index 00000000000000..9453148e6cb0a7 --- /dev/null +++ b/docs/source/pytorch_lightning.callbacks.progress.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.callbacks.progress module +============================================ + +.. automodule:: pytorch_lightning.callbacks.progress + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.callbacks.rst b/docs/source/pytorch_lightning.callbacks.rst new file mode 100644 index 00000000000000..99b9f9c9bf409e --- /dev/null +++ b/docs/source/pytorch_lightning.callbacks.rst @@ -0,0 +1,19 @@ +pytorch\_lightning.callbacks package +==================================== + +.. automodule:: pytorch_lightning.callbacks + :members: + :undoc-members: + :show-inheritance: + +Submodules +---------- + +.. toctree:: + + pytorch_lightning.callbacks.base + pytorch_lightning.callbacks.early_stopping + pytorch_lightning.callbacks.gradient_accumulation_scheduler + pytorch_lightning.callbacks.lr_logger + pytorch_lightning.callbacks.model_checkpoint + pytorch_lightning.callbacks.progress diff --git a/docs/source/pytorch_lightning.core.decorators.rst b/docs/source/pytorch_lightning.core.decorators.rst new file mode 100644 index 00000000000000..ace7b01404694b --- /dev/null +++ b/docs/source/pytorch_lightning.core.decorators.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.core.decorators module +========================================= + +.. automodule:: pytorch_lightning.core.decorators + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.core.grads.rst b/docs/source/pytorch_lightning.core.grads.rst new file mode 100644 index 00000000000000..24d2dc4aca46b5 --- /dev/null +++ b/docs/source/pytorch_lightning.core.grads.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.core.grads module +==================================== + +.. automodule:: pytorch_lightning.core.grads + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.core.hooks.rst b/docs/source/pytorch_lightning.core.hooks.rst new file mode 100644 index 00000000000000..2dc675a1bf5aa6 --- /dev/null +++ b/docs/source/pytorch_lightning.core.hooks.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.core.hooks module +==================================== + +.. automodule:: pytorch_lightning.core.hooks + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.core.lightning.rst b/docs/source/pytorch_lightning.core.lightning.rst new file mode 100644 index 00000000000000..0abad946ab5f4b --- /dev/null +++ b/docs/source/pytorch_lightning.core.lightning.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.core.lightning module +======================================== + +.. automodule:: pytorch_lightning.core.lightning + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.core.memory.rst b/docs/source/pytorch_lightning.core.memory.rst new file mode 100644 index 00000000000000..bba731d6e6f086 --- /dev/null +++ b/docs/source/pytorch_lightning.core.memory.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.core.memory module +===================================== + +.. automodule:: pytorch_lightning.core.memory + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.core.model_saving.rst b/docs/source/pytorch_lightning.core.model_saving.rst new file mode 100644 index 00000000000000..f7ff39e4fb0222 --- /dev/null +++ b/docs/source/pytorch_lightning.core.model_saving.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.core.model\_saving module +============================================ + +.. automodule:: pytorch_lightning.core.model_saving + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.core.root_module.rst b/docs/source/pytorch_lightning.core.root_module.rst new file mode 100644 index 00000000000000..accb51fdcdfe67 --- /dev/null +++ b/docs/source/pytorch_lightning.core.root_module.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.core.root\_module module +=========================================== + +.. automodule:: pytorch_lightning.core.root_module + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.core.rst b/docs/source/pytorch_lightning.core.rst new file mode 100644 index 00000000000000..f433027dc16ffc --- /dev/null +++ b/docs/source/pytorch_lightning.core.rst @@ -0,0 +1,21 @@ +pytorch\_lightning.core package +=============================== + +.. automodule:: pytorch_lightning.core + :members: + :undoc-members: + :show-inheritance: + +Submodules +---------- + +.. toctree:: + + pytorch_lightning.core.decorators + pytorch_lightning.core.grads + pytorch_lightning.core.hooks + pytorch_lightning.core.lightning + pytorch_lightning.core.memory + pytorch_lightning.core.model_saving + pytorch_lightning.core.root_module + pytorch_lightning.core.saving diff --git a/docs/source/pytorch_lightning.core.saving.rst b/docs/source/pytorch_lightning.core.saving.rst new file mode 100644 index 00000000000000..b49e325d36500b --- /dev/null +++ b/docs/source/pytorch_lightning.core.saving.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.core.saving module +===================================== + +.. automodule:: pytorch_lightning.core.saving + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.loggers.base.rst b/docs/source/pytorch_lightning.loggers.base.rst new file mode 100644 index 00000000000000..d2a70130bef2f6 --- /dev/null +++ b/docs/source/pytorch_lightning.loggers.base.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.loggers.base module +====================================== + +.. automodule:: pytorch_lightning.loggers.base + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.loggers.comet.rst b/docs/source/pytorch_lightning.loggers.comet.rst new file mode 100644 index 00000000000000..1840b446b0c79a --- /dev/null +++ b/docs/source/pytorch_lightning.loggers.comet.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.loggers.comet module +======================================= + +.. automodule:: pytorch_lightning.loggers.comet + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.loggers.mlflow.rst b/docs/source/pytorch_lightning.loggers.mlflow.rst new file mode 100644 index 00000000000000..0ee58265c25292 --- /dev/null +++ b/docs/source/pytorch_lightning.loggers.mlflow.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.loggers.mlflow module +======================================== + +.. automodule:: pytorch_lightning.loggers.mlflow + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.loggers.neptune.rst b/docs/source/pytorch_lightning.loggers.neptune.rst new file mode 100644 index 00000000000000..c756980751ae0c --- /dev/null +++ b/docs/source/pytorch_lightning.loggers.neptune.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.loggers.neptune module +========================================= + +.. automodule:: pytorch_lightning.loggers.neptune + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.loggers.rst b/docs/source/pytorch_lightning.loggers.rst new file mode 100644 index 00000000000000..e7937f43ef1086 --- /dev/null +++ b/docs/source/pytorch_lightning.loggers.rst @@ -0,0 +1,21 @@ +pytorch\_lightning.loggers package +================================== + +.. automodule:: pytorch_lightning.loggers + :members: + :undoc-members: + :show-inheritance: + +Submodules +---------- + +.. toctree:: + + pytorch_lightning.loggers.base + pytorch_lightning.loggers.comet + pytorch_lightning.loggers.mlflow + pytorch_lightning.loggers.neptune + pytorch_lightning.loggers.tensorboard + pytorch_lightning.loggers.test_tube + pytorch_lightning.loggers.trains + pytorch_lightning.loggers.wandb diff --git a/docs/source/pytorch_lightning.loggers.tensorboard.rst b/docs/source/pytorch_lightning.loggers.tensorboard.rst new file mode 100644 index 00000000000000..14e310e08b5e04 --- /dev/null +++ b/docs/source/pytorch_lightning.loggers.tensorboard.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.loggers.tensorboard module +============================================= + +.. automodule:: pytorch_lightning.loggers.tensorboard + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.loggers.test_tube.rst b/docs/source/pytorch_lightning.loggers.test_tube.rst new file mode 100644 index 00000000000000..2efe4804629fcc --- /dev/null +++ b/docs/source/pytorch_lightning.loggers.test_tube.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.loggers.test\_tube module +============================================ + +.. automodule:: pytorch_lightning.loggers.test_tube + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.loggers.trains.rst b/docs/source/pytorch_lightning.loggers.trains.rst new file mode 100644 index 00000000000000..8f2574b6fead06 --- /dev/null +++ b/docs/source/pytorch_lightning.loggers.trains.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.loggers.trains module +======================================== + +.. automodule:: pytorch_lightning.loggers.trains + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.loggers.wandb.rst b/docs/source/pytorch_lightning.loggers.wandb.rst new file mode 100644 index 00000000000000..21458e1b48daf4 --- /dev/null +++ b/docs/source/pytorch_lightning.loggers.wandb.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.loggers.wandb module +======================================= + +.. automodule:: pytorch_lightning.loggers.wandb + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.logging.comet.rst b/docs/source/pytorch_lightning.logging.comet.rst new file mode 100644 index 00000000000000..3658828fd64122 --- /dev/null +++ b/docs/source/pytorch_lightning.logging.comet.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.logging.comet module +======================================= + +.. automodule:: pytorch_lightning.logging.comet + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.logging.comet_logger.rst b/docs/source/pytorch_lightning.logging.comet_logger.rst new file mode 100644 index 00000000000000..5e01e579d5ce88 --- /dev/null +++ b/docs/source/pytorch_lightning.logging.comet_logger.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.logging.comet\_logger module +=============================================== + +.. automodule:: pytorch_lightning.logging.comet_logger + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.logging.mlflow.rst b/docs/source/pytorch_lightning.logging.mlflow.rst new file mode 100644 index 00000000000000..343a65437b9fbd --- /dev/null +++ b/docs/source/pytorch_lightning.logging.mlflow.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.logging.mlflow module +======================================== + +.. automodule:: pytorch_lightning.logging.mlflow + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.logging.mlflow_logger.rst b/docs/source/pytorch_lightning.logging.mlflow_logger.rst new file mode 100644 index 00000000000000..cd13c07e57b895 --- /dev/null +++ b/docs/source/pytorch_lightning.logging.mlflow_logger.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.logging.mlflow\_logger module +================================================ + +.. automodule:: pytorch_lightning.logging.mlflow_logger + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.logging.neptune.rst b/docs/source/pytorch_lightning.logging.neptune.rst new file mode 100644 index 00000000000000..65109357bad07d --- /dev/null +++ b/docs/source/pytorch_lightning.logging.neptune.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.logging.neptune module +========================================= + +.. automodule:: pytorch_lightning.logging.neptune + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.logging.rst b/docs/source/pytorch_lightning.logging.rst new file mode 100644 index 00000000000000..1cd4249acabbbe --- /dev/null +++ b/docs/source/pytorch_lightning.logging.rst @@ -0,0 +1,21 @@ +pytorch\_lightning.logging package +================================== + +.. automodule:: pytorch_lightning.logging + :members: + :undoc-members: + :show-inheritance: + +Submodules +---------- + +.. toctree:: + + pytorch_lightning.logging.comet + pytorch_lightning.logging.comet_logger + pytorch_lightning.logging.mlflow + pytorch_lightning.logging.mlflow_logger + pytorch_lightning.logging.neptune + pytorch_lightning.logging.test_tube + pytorch_lightning.logging.test_tube_logger + pytorch_lightning.logging.wandb diff --git a/docs/source/pytorch_lightning.logging.test_tube.rst b/docs/source/pytorch_lightning.logging.test_tube.rst new file mode 100644 index 00000000000000..7b193b768b722c --- /dev/null +++ b/docs/source/pytorch_lightning.logging.test_tube.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.logging.test\_tube module +============================================ + +.. automodule:: pytorch_lightning.logging.test_tube + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.logging.test_tube_logger.rst b/docs/source/pytorch_lightning.logging.test_tube_logger.rst new file mode 100644 index 00000000000000..1b801791b38c09 --- /dev/null +++ b/docs/source/pytorch_lightning.logging.test_tube_logger.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.logging.test\_tube\_logger module +==================================================== + +.. automodule:: pytorch_lightning.logging.test_tube_logger + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.logging.wandb.rst b/docs/source/pytorch_lightning.logging.wandb.rst new file mode 100644 index 00000000000000..a3b327a181bc7a --- /dev/null +++ b/docs/source/pytorch_lightning.logging.wandb.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.logging.wandb module +======================================= + +.. automodule:: pytorch_lightning.logging.wandb + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.overrides.data_parallel.rst b/docs/source/pytorch_lightning.overrides.data_parallel.rst new file mode 100644 index 00000000000000..0dfc51c47b8d31 --- /dev/null +++ b/docs/source/pytorch_lightning.overrides.data_parallel.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.overrides.data\_parallel module +================================================== + +.. automodule:: pytorch_lightning.overrides.data_parallel + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.overrides.override_data_parallel.rst b/docs/source/pytorch_lightning.overrides.override_data_parallel.rst new file mode 100644 index 00000000000000..90b4e667ee8ff3 --- /dev/null +++ b/docs/source/pytorch_lightning.overrides.override_data_parallel.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.overrides.override\_data\_parallel module +============================================================ + +.. automodule:: pytorch_lightning.overrides.override_data_parallel + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.overrides.rst b/docs/source/pytorch_lightning.overrides.rst new file mode 100644 index 00000000000000..4cc93b03993b2b --- /dev/null +++ b/docs/source/pytorch_lightning.overrides.rst @@ -0,0 +1,15 @@ +pytorch\_lightning.overrides package +==================================== + +.. automodule:: pytorch_lightning.overrides + :members: + :undoc-members: + :show-inheritance: + +Submodules +---------- + +.. toctree:: + + pytorch_lightning.overrides.data_parallel + pytorch_lightning.overrides.override_data_parallel diff --git a/docs/source/pytorch_lightning.profiler.profilers.rst b/docs/source/pytorch_lightning.profiler.profilers.rst new file mode 100644 index 00000000000000..003b43bb7f5c5f --- /dev/null +++ b/docs/source/pytorch_lightning.profiler.profilers.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.profiler.profilers module +============================================ + +.. automodule:: pytorch_lightning.profiler.profilers + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.profiler.rst b/docs/source/pytorch_lightning.profiler.rst new file mode 100644 index 00000000000000..1bf01413a98b10 --- /dev/null +++ b/docs/source/pytorch_lightning.profiler.rst @@ -0,0 +1,14 @@ +pytorch\_lightning.profiler package +=================================== + +.. automodule:: pytorch_lightning.profiler + :members: + :undoc-members: + :show-inheritance: + +Submodules +---------- + +.. toctree:: + + pytorch_lightning.profiler.profilers diff --git a/docs/source/pytorch_lightning.pt_overrides.override_data_parallel.rst b/docs/source/pytorch_lightning.pt_overrides.override_data_parallel.rst new file mode 100644 index 00000000000000..f17dd1fe1369b7 --- /dev/null +++ b/docs/source/pytorch_lightning.pt_overrides.override_data_parallel.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.pt\_overrides.override\_data\_parallel module +================================================================ + +.. automodule:: pytorch_lightning.pt_overrides.override_data_parallel + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.pt_overrides.rst b/docs/source/pytorch_lightning.pt_overrides.rst new file mode 100644 index 00000000000000..40e39428df32ea --- /dev/null +++ b/docs/source/pytorch_lightning.pt_overrides.rst @@ -0,0 +1,14 @@ +pytorch\_lightning.pt\_overrides package +======================================== + +.. automodule:: pytorch_lightning.pt_overrides + :members: + :undoc-members: + :show-inheritance: + +Submodules +---------- + +.. toctree:: + + pytorch_lightning.pt_overrides.override_data_parallel diff --git a/docs/source/pytorch_lightning.root_module.decorators.rst b/docs/source/pytorch_lightning.root_module.decorators.rst new file mode 100644 index 00000000000000..55934a72c7c710 --- /dev/null +++ b/docs/source/pytorch_lightning.root_module.decorators.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.root\_module.decorators module +================================================= + +.. automodule:: pytorch_lightning.root_module.decorators + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.root_module.grads.rst b/docs/source/pytorch_lightning.root_module.grads.rst new file mode 100644 index 00000000000000..fa5a79e097362c --- /dev/null +++ b/docs/source/pytorch_lightning.root_module.grads.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.root\_module.grads module +============================================ + +.. automodule:: pytorch_lightning.root_module.grads + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.root_module.hooks.rst b/docs/source/pytorch_lightning.root_module.hooks.rst new file mode 100644 index 00000000000000..e2816979a9e4f9 --- /dev/null +++ b/docs/source/pytorch_lightning.root_module.hooks.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.root\_module.hooks module +============================================ + +.. automodule:: pytorch_lightning.root_module.hooks + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.root_module.memory.rst b/docs/source/pytorch_lightning.root_module.memory.rst new file mode 100644 index 00000000000000..f5b56eacdce3f1 --- /dev/null +++ b/docs/source/pytorch_lightning.root_module.memory.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.root\_module.memory module +============================================= + +.. automodule:: pytorch_lightning.root_module.memory + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.root_module.model_saving.rst b/docs/source/pytorch_lightning.root_module.model_saving.rst new file mode 100644 index 00000000000000..2e74e8e7f57b3e --- /dev/null +++ b/docs/source/pytorch_lightning.root_module.model_saving.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.root\_module.model\_saving module +==================================================== + +.. automodule:: pytorch_lightning.root_module.model_saving + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.root_module.root_module.rst b/docs/source/pytorch_lightning.root_module.root_module.rst new file mode 100644 index 00000000000000..66f7c82f1b6685 --- /dev/null +++ b/docs/source/pytorch_lightning.root_module.root_module.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.root\_module.root\_module module +=================================================== + +.. automodule:: pytorch_lightning.root_module.root_module + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.root_module.rst b/docs/source/pytorch_lightning.root_module.rst new file mode 100644 index 00000000000000..62d4b48bbd2f09 --- /dev/null +++ b/docs/source/pytorch_lightning.root_module.rst @@ -0,0 +1,19 @@ +pytorch\_lightning.root\_module package +======================================= + +.. automodule:: pytorch_lightning.root_module + :members: + :undoc-members: + :show-inheritance: + +Submodules +---------- + +.. toctree:: + + pytorch_lightning.root_module.decorators + pytorch_lightning.root_module.grads + pytorch_lightning.root_module.hooks + pytorch_lightning.root_module.memory + pytorch_lightning.root_module.model_saving + pytorch_lightning.root_module.root_module diff --git a/docs/source/pytorch_lightning.rst b/docs/source/pytorch_lightning.rst new file mode 100644 index 00000000000000..177ec80b905dc7 --- /dev/null +++ b/docs/source/pytorch_lightning.rst @@ -0,0 +1,23 @@ +pytorch\_lightning package +========================== + +.. automodule:: pytorch_lightning + :members: + :undoc-members: + :show-inheritance: + +Subpackages +----------- + +.. toctree:: + + pytorch_lightning.callbacks + pytorch_lightning.core + pytorch_lightning.loggers + pytorch_lightning.logging + pytorch_lightning.overrides + pytorch_lightning.profiler + pytorch_lightning.pt_overrides + pytorch_lightning.root_module + pytorch_lightning.trainer + pytorch_lightning.utilities diff --git a/docs/source/pytorch_lightning.trainer.auto_mix_precision.rst b/docs/source/pytorch_lightning.trainer.auto_mix_precision.rst new file mode 100644 index 00000000000000..3b7ecfd3e032c9 --- /dev/null +++ b/docs/source/pytorch_lightning.trainer.auto_mix_precision.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.trainer.auto\_mix\_precision module +====================================================== + +.. automodule:: pytorch_lightning.trainer.auto_mix_precision + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.trainer.callback_config.rst b/docs/source/pytorch_lightning.trainer.callback_config.rst new file mode 100644 index 00000000000000..8425d369161f0e --- /dev/null +++ b/docs/source/pytorch_lightning.trainer.callback_config.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.trainer.callback\_config module +================================================== + +.. automodule:: pytorch_lightning.trainer.callback_config + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.trainer.callback_hook.rst b/docs/source/pytorch_lightning.trainer.callback_hook.rst new file mode 100644 index 00000000000000..573e802dc5c7a8 --- /dev/null +++ b/docs/source/pytorch_lightning.trainer.callback_hook.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.trainer.callback\_hook module +================================================ + +.. automodule:: pytorch_lightning.trainer.callback_hook + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.trainer.cross_validation.rst b/docs/source/pytorch_lightning.trainer.cross_validation.rst new file mode 100644 index 00000000000000..5010d051b6d96b --- /dev/null +++ b/docs/source/pytorch_lightning.trainer.cross_validation.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.trainer.cross\_validation module +=================================================== + +.. automodule:: pytorch_lightning.trainer.cross_validation + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.trainer.data_loading.rst b/docs/source/pytorch_lightning.trainer.data_loading.rst new file mode 100644 index 00000000000000..343d2b81fca8d3 --- /dev/null +++ b/docs/source/pytorch_lightning.trainer.data_loading.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.trainer.data\_loading module +=============================================== + +.. automodule:: pytorch_lightning.trainer.data_loading + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.trainer.deprecated_api.rst b/docs/source/pytorch_lightning.trainer.deprecated_api.rst new file mode 100644 index 00000000000000..7da5f753a62ace --- /dev/null +++ b/docs/source/pytorch_lightning.trainer.deprecated_api.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.trainer.deprecated\_api module +================================================= + +.. automodule:: pytorch_lightning.trainer.deprecated_api + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.trainer.distrib_data_parallel.rst b/docs/source/pytorch_lightning.trainer.distrib_data_parallel.rst new file mode 100644 index 00000000000000..a83c935891db02 --- /dev/null +++ b/docs/source/pytorch_lightning.trainer.distrib_data_parallel.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.trainer.distrib\_data\_parallel module +========================================================= + +.. automodule:: pytorch_lightning.trainer.distrib_data_parallel + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.trainer.distrib_parts.rst b/docs/source/pytorch_lightning.trainer.distrib_parts.rst new file mode 100644 index 00000000000000..4c49e454641769 --- /dev/null +++ b/docs/source/pytorch_lightning.trainer.distrib_parts.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.trainer.distrib\_parts module +================================================ + +.. automodule:: pytorch_lightning.trainer.distrib_parts + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.trainer.evaluation_loop.rst b/docs/source/pytorch_lightning.trainer.evaluation_loop.rst new file mode 100644 index 00000000000000..722f94e2f0e895 --- /dev/null +++ b/docs/source/pytorch_lightning.trainer.evaluation_loop.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.trainer.evaluation\_loop module +================================================== + +.. automodule:: pytorch_lightning.trainer.evaluation_loop + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.trainer.ignored_warnings.rst b/docs/source/pytorch_lightning.trainer.ignored_warnings.rst new file mode 100644 index 00000000000000..9ab77abf0fbda5 --- /dev/null +++ b/docs/source/pytorch_lightning.trainer.ignored_warnings.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.trainer.ignored\_warnings module +=================================================== + +.. automodule:: pytorch_lightning.trainer.ignored_warnings + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.trainer.logging.rst b/docs/source/pytorch_lightning.trainer.logging.rst new file mode 100644 index 00000000000000..7602c2804c26a4 --- /dev/null +++ b/docs/source/pytorch_lightning.trainer.logging.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.trainer.logging module +========================================= + +.. automodule:: pytorch_lightning.trainer.logging + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.trainer.lr_finder.rst b/docs/source/pytorch_lightning.trainer.lr_finder.rst new file mode 100644 index 00000000000000..57a6793cb35396 --- /dev/null +++ b/docs/source/pytorch_lightning.trainer.lr_finder.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.trainer.lr\_finder module +============================================ + +.. automodule:: pytorch_lightning.trainer.lr_finder + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.trainer.model_hooks.rst b/docs/source/pytorch_lightning.trainer.model_hooks.rst new file mode 100644 index 00000000000000..0be1e1a3aecd35 --- /dev/null +++ b/docs/source/pytorch_lightning.trainer.model_hooks.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.trainer.model\_hooks module +============================================== + +.. automodule:: pytorch_lightning.trainer.model_hooks + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.trainer.optimizers.rst b/docs/source/pytorch_lightning.trainer.optimizers.rst new file mode 100644 index 00000000000000..fa2ed3184e9fc0 --- /dev/null +++ b/docs/source/pytorch_lightning.trainer.optimizers.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.trainer.optimizers module +============================================ + +.. automodule:: pytorch_lightning.trainer.optimizers + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.trainer.rst b/docs/source/pytorch_lightning.trainer.rst new file mode 100644 index 00000000000000..3716bf7a1db851 --- /dev/null +++ b/docs/source/pytorch_lightning.trainer.rst @@ -0,0 +1,31 @@ +pytorch\_lightning.trainer package +================================== + +.. automodule:: pytorch_lightning.trainer + :members: + :undoc-members: + :show-inheritance: + +Submodules +---------- + +.. toctree:: + + pytorch_lightning.trainer.auto_mix_precision + pytorch_lightning.trainer.callback_config + pytorch_lightning.trainer.callback_hook + pytorch_lightning.trainer.data_loading + pytorch_lightning.trainer.deprecated_api + pytorch_lightning.trainer.distrib_data_parallel + pytorch_lightning.trainer.distrib_parts + pytorch_lightning.trainer.evaluation_loop + pytorch_lightning.trainer.ignored_warnings + pytorch_lightning.trainer.logging + pytorch_lightning.trainer.lr_finder + pytorch_lightning.trainer.model_hooks + pytorch_lightning.trainer.optimizers + pytorch_lightning.trainer.supporters + pytorch_lightning.trainer.trainer + pytorch_lightning.trainer.training_io + pytorch_lightning.trainer.training_loop + pytorch_lightning.trainer.training_tricks diff --git a/docs/source/pytorch_lightning.trainer.supporters.rst b/docs/source/pytorch_lightning.trainer.supporters.rst new file mode 100644 index 00000000000000..97183b99164c11 --- /dev/null +++ b/docs/source/pytorch_lightning.trainer.supporters.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.trainer.supporters module +============================================ + +.. automodule:: pytorch_lightning.trainer.supporters + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.trainer.trainer.rst b/docs/source/pytorch_lightning.trainer.trainer.rst new file mode 100644 index 00000000000000..168d29e0fcd46a --- /dev/null +++ b/docs/source/pytorch_lightning.trainer.trainer.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.trainer.trainer module +========================================= + +.. automodule:: pytorch_lightning.trainer.trainer + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.trainer.training_io.rst b/docs/source/pytorch_lightning.trainer.training_io.rst new file mode 100644 index 00000000000000..ca74e0433cf344 --- /dev/null +++ b/docs/source/pytorch_lightning.trainer.training_io.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.trainer.training\_io module +============================================== + +.. automodule:: pytorch_lightning.trainer.training_io + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.trainer.training_loop.rst b/docs/source/pytorch_lightning.trainer.training_loop.rst new file mode 100644 index 00000000000000..9165697dfd1b42 --- /dev/null +++ b/docs/source/pytorch_lightning.trainer.training_loop.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.trainer.training\_loop module +================================================ + +.. automodule:: pytorch_lightning.trainer.training_loop + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.trainer.training_tricks.rst b/docs/source/pytorch_lightning.trainer.training_tricks.rst new file mode 100644 index 00000000000000..ebd4070eb024e8 --- /dev/null +++ b/docs/source/pytorch_lightning.trainer.training_tricks.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.trainer.training\_tricks module +================================================== + +.. automodule:: pytorch_lightning.trainer.training_tricks + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.utilities.distributed.rst b/docs/source/pytorch_lightning.utilities.distributed.rst new file mode 100644 index 00000000000000..15a704a5d822c5 --- /dev/null +++ b/docs/source/pytorch_lightning.utilities.distributed.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.utilities.distributed module +=============================================== + +.. automodule:: pytorch_lightning.utilities.distributed + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.utilities.exceptions.rst b/docs/source/pytorch_lightning.utilities.exceptions.rst new file mode 100644 index 00000000000000..ab7db9998cd47b --- /dev/null +++ b/docs/source/pytorch_lightning.utilities.exceptions.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.utilities.exceptions module +============================================== + +.. automodule:: pytorch_lightning.utilities.exceptions + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.utilities.memory.rst b/docs/source/pytorch_lightning.utilities.memory.rst new file mode 100644 index 00000000000000..93fc61402f146a --- /dev/null +++ b/docs/source/pytorch_lightning.utilities.memory.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.utilities.memory module +========================================== + +.. automodule:: pytorch_lightning.utilities.memory + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.utilities.parsing.rst b/docs/source/pytorch_lightning.utilities.parsing.rst new file mode 100644 index 00000000000000..879c0540ec0710 --- /dev/null +++ b/docs/source/pytorch_lightning.utilities.parsing.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.utilities.parsing module +=========================================== + +.. automodule:: pytorch_lightning.utilities.parsing + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pytorch_lightning.utilities.rst b/docs/source/pytorch_lightning.utilities.rst new file mode 100644 index 00000000000000..0a27f4cfd258d7 --- /dev/null +++ b/docs/source/pytorch_lightning.utilities.rst @@ -0,0 +1,17 @@ +pytorch\_lightning.utilities package +==================================== + +.. automodule:: pytorch_lightning.utilities + :members: + :undoc-members: + :show-inheritance: + +Submodules +---------- + +.. toctree:: + + pytorch_lightning.utilities.distributed + pytorch_lightning.utilities.exceptions + pytorch_lightning.utilities.memory + pytorch_lightning.utilities.parsing diff --git a/docs/source/pytorch_lightning.utilities.warnings.rst b/docs/source/pytorch_lightning.utilities.warnings.rst new file mode 100644 index 00000000000000..1e55c5e71a55a4 --- /dev/null +++ b/docs/source/pytorch_lightning.utilities.warnings.rst @@ -0,0 +1,7 @@ +pytorch\_lightning.utilities.warnings module +============================================ + +.. automodule:: pytorch_lightning.utilities.warnings + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/sequences.rst b/docs/source/sequences.rst index 63da7e7147af1b..857fd08198de85 100644 --- a/docs/source/sequences.rst +++ b/docs/source/sequences.rst @@ -1,3 +1,8 @@ +.. testsetup:: * + + from torch.utils.data import IterableDataset + from pytorch_lightning.trainer.trainer import Trainer + Sequential Data ================ Lightning has built in support for dealing with sequential data. @@ -10,9 +15,9 @@ When using PackedSequence, do 2 things: 1. return either a padded tensor in dataset or a list of variable length tensors in the dataloader collate_fn (example above shows the list implementation). 2. Pack the sequence in forward or training and validation steps depending on use case. -.. code-block:: python +.. testcode:: - # For use in dataloader + # For use in dataloader def collate_fn(batch): x = [item[0] for item in batch] y = [item[1] for item in batch] @@ -30,7 +35,7 @@ For example, it may save memory to use Truncated Backpropagation Through Time wh Lightning can handle TBTT automatically via this flag. -.. code-block:: python +.. testcode:: # DEFAULT (single backwards pass per batch) trainer = Trainer(truncated_bptt_steps=None) @@ -54,7 +59,7 @@ option when using sequential data. This is due to the fact that the IterableDataset does not have a __len__ and Lightning requires this to calculate the validation interval when val_check_interval is less than one. -.. code-block:: python +.. testcode:: # IterableDataset class CustomDataset(IterableDataset): @@ -73,5 +78,7 @@ option when using sequential data. dataloader = DataLoader(dataset=iterable_dataset, batch_size=5) return dataloader +.. testcode:: + # Set val_check_interval - trainer = pl.Trainer() + trainer = Trainer(val_check_interval=100) diff --git a/docs/source/single_gpu.rst b/docs/source/single_gpu.rst index 73908489a720aa..c6fa1b9af9bbc7 100644 --- a/docs/source/single_gpu.rst +++ b/docs/source/single_gpu.rst @@ -1,9 +1,14 @@ +.. testsetup:: * + + from pytorch_lightning.trainer.trainer import Trainer + Single GPU Training ==================== Make sure you are running on a machine that has at least one GPU. Lightning handles all the NVIDIA flags for you, there's no need to set them yourself. -.. code-block:: python +.. testcode:: + :skipif: torch.cuda.device_count() < 1 # train on 1 GPU (using dp mode) - trainer = pl.Trainer(gpus=1) \ No newline at end of file + trainer = Trainer(gpus=1) \ No newline at end of file diff --git a/docs/source/slurm.rst b/docs/source/slurm.rst index 2bac01b6f0418d..ed09e7509b5712 100644 --- a/docs/source/slurm.rst +++ b/docs/source/slurm.rst @@ -1,103 +1,107 @@ +.. testsetup:: * + + from pytorch_lightning.trainer.trainer import Trainer + Computing cluster (SLURM) -========================== +========================= Lightning automates job the details behind training on a SLURM powered cluster. .. _multi-node: Multi-node training --------------------- +------------------- To train a model using multiple-nodes do the following: -1. Design your LightningModule. +1. Design your LightningModule. -2. Enable ddp in the trainer +2. Enable ddp in the trainer -.. code-block:: python + .. code-block:: python - # train on 32 GPUs across 4 nodes - trainer = Trainer(gpus=8, num_nodes=4, distributed_backend='ddp') + # train on 32 GPUs across 4 nodes + trainer = Trainer(gpus=8, num_nodes=4, distributed_backend='ddp') -3. It's a good idea to structure your train.py file like this: +3. It's a good idea to structure your train.py file like this: -.. code-block:: python + .. testcode:: - # train.py - def main(hparams): - model = LightningTemplateModel(hparams) + # train.py + def main(hparams): + model = LightningTemplateModel(hparams) - trainer = pl.Trainer( - gpus=8, - num_nodes=4, - distributed_backend='ddp' - ) + trainer = pl.Trainer( + gpus=8, + num_nodes=4, + distributed_backend='ddp' + ) - trainer.fit(model) + trainer.fit(model) - if __name__ == '__main__': - root_dir = os.path.dirname(os.path.realpath(__file__)) - parent_parser = ArgumentParser(add_help=False) - hyperparams = parser.parse_args() + if __name__ == '__main__': + root_dir = os.path.dirname(os.path.realpath(__file__)) + parent_parser = ArgumentParser(add_help=False) + hyperparams = parser.parse_args() - # TRAIN - main(hyperparams) + # TRAIN + main(hyperparams) -4. Create the appropriate SLURM job +4. Create the appropriate SLURM job -.. code-block:: bash + .. code-block:: bash - # (submit.sh) - #!/bin/bash -l + # (submit.sh) + #!/bin/bash -l - # SLURM SUBMIT SCRIPT - #SBATCH --nodes=4 - #SBATCH --gres=gpu:8 - #SBATCH --ntasks-per-node=8 - #SBATCH --mem=0 - #SBATCH --time=0-02:00:00 + # SLURM SUBMIT SCRIPT + #SBATCH --nodes=4 + #SBATCH --gres=gpu:8 + #SBATCH --ntasks-per-node=8 + #SBATCH --mem=0 + #SBATCH --time=0-02:00:00 - # activate conda env - source activate $1 + # activate conda env + source activate $1 - # ------------------------- - # debugging flags (optional) - export NCCL_DEBUG=INFO - export PYTHONFAULTHANDLER=1 + # ------------------------- + # debugging flags (optional) + export NCCL_DEBUG=INFO + export PYTHONFAULTHANDLER=1 - # on your cluster you might need these: - # set the network interface - # export NCCL_SOCKET_IFNAME=^docker0,lo + # on your cluster you might need these: + # set the network interface + # export NCCL_SOCKET_IFNAME=^docker0,lo - # might need the latest cuda - # module load NCCL/2.4.7-1-cuda.10.0 - # ------------------------- + # might need the latest cuda + # module load NCCL/2.4.7-1-cuda.10.0 + # ------------------------- - # run script from above - srun python3 train.py + # run script from above + srun python3 train.py -5. If you want auto-resubmit (read below), add this line to the submit.sh script +5. If you want auto-resubmit (read below), add this line to the submit.sh script -.. code-block:: bash + .. code-block:: bash - #SBATCH --signal=SIGUSR1@90 + #SBATCH --signal=SIGUSR1@90 -6. Submit the SLURM job +6. Submit the SLURM job -.. code-block:: bash + .. code-block:: bash - sbatch submit.sh + sbatch submit.sh .. note:: using :class:`~torch.utils.data.distributed.DistributedSampler` is already handled by Lightning. Walltime auto-resubmit ------------------------------------ +---------------------- When you use Lightning in a SLURM cluster, lightning automatically detects when it is about to run into the walltime, and it does the following: - 1. Saves a temporary checkpoint. - 2. Requeues the job. - 3. When the job starts, it loads the temporary checkpoint. +1. Saves a temporary checkpoint. +2. Requeues the job. +3. When the job starts, it loads the temporary checkpoint. To get this behavior make sure to add the correct signal to your SLURM script diff --git a/docs/source/test_set.rst b/docs/source/test_set.rst index 60a9f9a253cd05..7dfe40ddaa2daf 100644 --- a/docs/source/test_set.rst +++ b/docs/source/test_set.rst @@ -1,10 +1,10 @@ Test set -========== +======== Lightning forces the user to run the test set separately to make sure it isn't evaluated by mistake Test after fit ----------------- +-------------- To run the test set after training completes, use this method .. code-block:: python @@ -15,10 +15,9 @@ To run the test set after training completes, use this method # run test set trainer.test() - Test pre-trained model ---------------------- -To run the test set on a pretrained model, use this method. +To run the test set on a pre-trained model, use this method. .. code-block:: python @@ -36,4 +35,4 @@ To run the test set on a pretrained model, use this method. trainer.test(model) In this case, the options you pass to trainer will be used when -running the test set (ie: 16-bit, dp, ddp, etc... \ No newline at end of file +running the test set (ie: 16-bit, dp, ddp, etc...) \ No newline at end of file diff --git a/docs/source/training_tricks.rst b/docs/source/training_tricks.rst index 4e8079d378dcaa..65a300adf74fc3 100644 --- a/docs/source/training_tricks.rst +++ b/docs/source/training_tricks.rst @@ -1,3 +1,8 @@ +.. testsetup:: * + + from pytorch_lightning.trainer.trainer import Trainer + + Training Tricks ================ Lightning implements various tricks to help during training @@ -9,7 +14,7 @@ The effect is a large effective batch size of size KxN. .. seealso:: :class:`~pytorch_lightning.trainer.trainer.Trainer` -.. code-block:: python +.. testcode:: # DEFAULT (ie: no accumulated grads) trainer = Trainer(accumulate_grad_batches=1) @@ -22,7 +27,7 @@ norm `_ .. seealso:: :class:`~pytorch_lightning.trainer.trainer.Trainer` -.. code-block:: python +.. testcode:: # DEFAULT (ie: don't clip) trainer = Trainer(gradient_clip_val=0) @@ -31,9 +36,9 @@ norm `_ trainer = Trainer(gradient_clip_val=0.5) Auto scaling of batch size -------------------------------------- +-------------------------- Auto scaling of batch size may be enabled to find the largest batch size that fits into -memory. Larger batch size often give better estimates of gradients, but may also give +memory. Larger batch size often yields better estimates of gradients, but may also result in longer training time. .. seealso:: :class:`~pytorch_lightning.trainer.trainer.Trainer` @@ -47,17 +52,17 @@ longer training time. trainer = Trainer(auto_scale_batch_size=True|'power'|'binsearch') Setting the feature to `True` enables `'power'` scaling, that starting from a -batch size of 1 keeps double the batch size until an out-of-memory (OMM) error is -encountered. Setting the argument to `'binsearch'` continue to finetune the batch -size by duing a binary search. +batch size of 1 keeps doubling the batch size until an out-of-memory (OOM) error is +encountered. Setting the argument to `'binsearch'` continues to finetune the batch +size by performing a binary search. .. note:: - This feature expects that a `batch_size` field exist in the `hparams` of your model i.e. + This feature expects that a `batch_size` field in the `hparams` of your model, i.e., `model.hparams.batch_size` should exist and will be overridden by the results of this algorithm. -The scaling algorithm has a number of parameters, that the user can control by +The scaling algorithm has a number of parameters that the user can control by invoking the trainer method `.scale_batch_size` themself. .. code-block:: python diff --git a/docs/source/transfer_learning.rst b/docs/source/transfer_learning.rst index d5a9509f4a0145..35b7d661f07c42 100644 --- a/docs/source/transfer_learning.rst +++ b/docs/source/transfer_learning.rst @@ -1,3 +1,7 @@ +.. testsetup:: * + + from pytorch_lightning.core.lightning import LightningModule + Transfer Learning ----------------- @@ -7,22 +11,22 @@ Using Pretrained Models Sometimes we want to use a LightningModule as a pretrained model. This is fine because a LightningModule is just a `torch.nn.Module`! -.. note:: Remember that a pl.LightningModule is EXACTLY a torch.nn.Module but with more capabilities. +.. note:: Remember that a LightningModule is EXACTLY a torch.nn.Module but with more capabilities. Let's use the `AutoEncoder` as a feature extractor in a separate model. -.. code-block:: python +.. testcode:: class Encoder(torch.nn.Module): ... - class AutoEncoder(pl.LightningModule): + class AutoEncoder(LightningModule): def __init__(self): self.encoder = Encoder() self.decoder = Decoder() - class CIFAR10Classifier(pl.LightingModule): + class CIFAR10Classifier(LightningModule): def __init__(self): # init the pretrained LightningModule self.feature_extractor = AutoEncoder.load_from_checkpoint(PATH) @@ -41,15 +45,16 @@ We used our pretrained Autoencoder (a LightningModule) for transfer learning! Example: Imagenet (computer Vision) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. code-block:: python +.. testcode:: + :skipif: not TORCHVISION_AVAILABLE import torchvision.models as models - class ImagenetTranferLearning(pl.LightingModule): + class ImagenetTransferLearning(LightningModule): def __init__(self): # init a pretrained resnet num_target_classes = 10 - self.feature_extractor = model.resnet50( + self.feature_extractor = models.resnet50( pretrained=True, num_classes=num_target_classes) self.feature_extractor.eval() @@ -66,7 +71,7 @@ Finetune .. code-block:: python - model = ImagenetTranferLearning() + model = ImagenetTransferLearning() trainer = Trainer() trainer.fit(model) @@ -74,7 +79,7 @@ And use it to predict your data of interest .. code-block:: python - model = ImagenetTranferLearning.load_from_checkpoint(PATH) + model = ImagenetTransferLearning.load_from_checkpoint(PATH) model.freeze() x = some_images_from_cifar10() @@ -90,26 +95,24 @@ as it is a `torch.nn.Module` subclass. Here's a model that uses `Huggingface transformers `_. -.. code-block:: python - - from transformers import BertModel +.. testcode:: - class BertMNLIFinetuner(pl.LightningModule): + class BertMNLIFinetuner(LightningModule): - def __init__(self): - super().__init__() + def __init__(self): + super().__init__() - self.bert = BertModel.from_pretrained('bert-base-cased', output_attentions=True) - self.W = nn.Linear(bert.config.hidden_size, 3) - self.num_classes = 3 + self.bert = BertModel.from_pretrained('bert-base-cased', output_attentions=True) + self.W = nn.Linear(bert.config.hidden_size, 3) + self.num_classes = 3 - def forward(self, input_ids, attention_mask, token_type_ids): + def forward(self, input_ids, attention_mask, token_type_ids): - h, _, attn = self.bert(input_ids=input_ids, - attention_mask=attention_mask, - token_type_ids=token_type_ids) + h, _, attn = self.bert(input_ids=input_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids) - h_cls = h[:, 0] - logits = self.W(h_cls) - return logits, attn \ No newline at end of file + h_cls = h[:, 0] + logits = self.W(h_cls) + return logits, attn \ No newline at end of file diff --git a/docs/source/weights_loading.rst b/docs/source/weights_loading.rst index 5f3e4389dd6d04..64a6950738ef1b 100644 --- a/docs/source/weights_loading.rst +++ b/docs/source/weights_loading.rst @@ -1,3 +1,10 @@ +.. testsetup:: * + + import os + from pytorch_lightning.trainer.trainer import Trainer + from pytorch_lightning.core.lightning import LightningModule + + Saving and loading weights ========================== @@ -22,13 +29,13 @@ Automatic saving Checkpointing is enabled by default to the current working directory. To change the checkpoint path pass in: -.. code-block:: python +.. testcode:: - Trainer(default_save_path='/your/path/to/save/checkpoints') + trainer = Trainer(default_save_path='/your/path/to/save/checkpoints') To modify the behavior of checkpointing pass in your own callback. -.. code-block:: python +.. testcode:: from pytorch_lightning.callbacks import ModelCheckpoint @@ -47,17 +54,16 @@ To modify the behavior of checkpointing pass in your own callback. Or disable it by passing -.. code-block:: python +.. testcode:: - trainer = Trainer(checkpoint_callback=False) + trainer = Trainer(checkpoint_callback=False) The Lightning checkpoint also saves the hparams (hyperparams) passed into the LightningModule init. .. note:: hparams is a `Namespace `_. -.. code-block:: python - :emphasize-lines: 8 +.. testcode:: from argparse import Namespace @@ -67,9 +73,9 @@ The Lightning checkpoint also saves the hparams (hyperparams) passed into the Li # define you module to have hparams as the first arg # this means your checkpoint will have everything that went into making # this model (in this case, learning rate) - class MyLightningModule(pl.LightningModule): + class MyLightningModule(LightningModule): - def __init__(self, hparams, ...): + def __init__(self, hparams, *args, **kwargs): self.hparams = hparams Manual saving @@ -78,7 +84,7 @@ You can manually save checkpoints and restore your model from the checkpointed s .. code-block:: python - model = MyModel(hparams) + model = MyLightningModule(hparams) trainer.fit(model) trainer.save_checkpoint("example.ckpt") new_model = MyModel.load_from_checkpoint(checkpoint_path="example.ckpt") @@ -96,9 +102,9 @@ To load a model along with its weights, biases and hyperparameters use following The above only works if you used `hparams` in your model definition -.. code-block:: python +.. testcode:: - class MyModel(pl.LightningModule): + class LitModel(LightningModule): def __init__(self, hparams): self.hparams = hparams @@ -106,9 +112,9 @@ The above only works if you used `hparams` in your model definition But if you don't and instead pass individual parameters -.. code-block:: python +.. testcode:: - class MyModel(pl.LightningModule): + class LitModel(LightningModule): def __init__(self, in_dim, out_dim): self.l1 = nn.Linear(in_dim, out_dim) @@ -117,7 +123,7 @@ you can restore the model like this .. code-block:: python - model = MyModel.load_from_checkpoint(PATH, in_dim=128, out_dim=10) + model = LitModel.load_from_checkpoint(PATH, in_dim=128, out_dim=10) Restoring Training State diff --git a/pytorch_lightning/callbacks/early_stopping.py b/pytorch_lightning/callbacks/early_stopping.py index d383a2fb42d7de..100c317172044e 100644 --- a/pytorch_lightning/callbacks/early_stopping.py +++ b/pytorch_lightning/callbacks/early_stopping.py @@ -45,11 +45,14 @@ class EarlyStopping(Callback): >>> early_stopping = EarlyStopping('val_loss') >>> trainer = Trainer(early_stop_callback=early_stopping) """ + mode_dict = { + 'min': torch.lt, + 'max': torch.gt, + } def __init__(self, monitor: str = 'val_loss', min_delta: float = 0.0, patience: int = 3, verbose: bool = False, mode: str = 'auto', strict: bool = True): super().__init__() - self.monitor = monitor self.patience = patience self.verbose = verbose @@ -59,17 +62,19 @@ def __init__(self, monitor: str = 'val_loss', min_delta: float = 0.0, patience: self.stopped_epoch = 0 self.mode = mode - mode_dict = { - 'min': torch.lt, - 'max': torch.gt, - 'auto': torch.gt if 'acc' in self.monitor else torch.lt - } - - if mode not in mode_dict: + if mode not in self.mode_dict: if self.verbose > 0: log.info(f'EarlyStopping mode {mode} is unknown, fallback to auto mode.') self.mode = 'auto' + if self.mode == 'auto': + if self.monitor == 'acc': + self.mode = 'max' + else: + self.mode = 'min' + if self.verbose > 0: + log.info(f'EarlyStopping mode set to {self.mode} for monitoring {self.monitor}.') + self.min_delta *= 1 if self.monitor_op == torch.gt else -1 def _validate_condition_metric(self, logs): @@ -96,12 +101,7 @@ def _validate_condition_metric(self, logs): @property def monitor_op(self): - mode_dict = { - 'min': torch.lt, - 'max': torch.gt, - 'auto': torch.gt if 'acc' in self.monitor else torch.lt - } - return mode_dict[self.mode] + return self.mode_dict[self.mode] def on_train_start(self, trainer, pl_module): # Allow instances to be re-used diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 2f1de6412f0f05..a534929434a8b7 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1535,8 +1535,8 @@ def _load_model_state(cls, checkpoint: Dict[str, Any], *args, **kwargs) -> 'Ligh hparams = Namespace(**ckpt_hparams) if is_namespace else ckpt_hparams else: rank_zero_warn( - f"Checkpoint does not contain hyperparameters but {cls.__name__}'s __init__ " - f"contains argument 'hparams'. Will pass in an empty Namespace instead." + f"Checkpoint does not contain hyperparameters but {cls.__name__}'s __init__" + " contains argument 'hparams'. Will pass in an empty Namespace instead." " Did you forget to store your model hyperparameters in self.hparams?" ) hparams = Namespace() diff --git a/pytorch_lightning/metrics/functional/classification2.py b/pytorch_lightning/metrics/functional/classification2.py new file mode 100755 index 00000000000000..98a3e92f000aac --- /dev/null +++ b/pytorch_lightning/metrics/functional/classification2.py @@ -0,0 +1,118 @@ +from collections import Sequence +from typing import Optional, Tuple, Callable + +import torch +import torch.nn.functional as F + +def to_onehot(tensor: torch.Tensor, n_classes: Optional[int] = None) -> torch.Tensor: + if n_classes is None: + n_classes = int(tensor.max().detach().item() + 1) + return F.one_hot(tensor, n_classes) + +def to_categorical(tensor: torch.Tensor, argmax_dim: int = 1) -> torch.Tensor: + return torch.argmax(tensor, dim=argmax_dim) + +def get_num_classes(pred: torch.Tensor, target: torch.Tensor, + num_classes: Optional[int]) -> int: + if num_classes is None: + if pred.ndim > target.ndim: + num_classes = pred.size(1) + else: + num_classes = int(target.max().detach().item() + 1) + return num_classes + +def stat_scores(pred: torch.Tensor, target: torch.Tensor, + class_index: int, argmax_dim: int = 1 + ) -> Tuple[torch.Tensor, torch.Tensor, + torch.Tensor, torch.Tensor]: + if pred.ndim == target.ndim + 1: + pred = to_categorical(pred, argmax_dim=argmax_dim) + + tp = ((pred == class_index) * (target == class_index)).to(torch.long).sum() + fp = ((pred == class_index) * (target != class_index)).to(torch.long).sum() + tn = ((pred != class_index) * (target != class_index)).to(torch.long).sum() + fn = ((pred != class_index) * (target == class_index)).to(torch.long).sum() + sup = (target == class_index).to(torch.long).sum() + return tp, fp, tn, fn, sup + +def stat_scores_multiple_classes(pred: torch.Tensor, target: torch.Tensor, + num_classes: Optional[int] = None, + argmax_dim: int = 1 + ) -> Tuple[torch.Tensor, torch.Tensor, + torch.Tensor, torch.Tensor]: + num_classes = get_num_classes(pred=pred, target=target, + num_classes=num_classes) + + if pred.ndim == target.ndim + 1: + pred = to_categorical(pred, argmax_dim=argmax_dim) + + tps = torch.zeros((num_classes,), device=pred.device) + fps = torch.zeros((num_classes,), device=pred.device) + tns = torch.zeros((num_classes,), device=pred.device) + fns = torch.zeros((num_classes,), device=pred.device) + sup = torch.zeros((num_classes,), device=pred.device) + + for c in range(num_classes): + tps[c], fps[c], tns[c], fns[c], sup[c] = stat_scores(pred=pred, + target=target, + class_index=c) + + return tps.float(), fps.float(), tns.float(), fns.float(), sup.float() + +def reduce(num, denom, weights, reduction='micro'): + if reduction=='micro': + return sum(num) / sum(denom) + elif reduction=='macro': + return torch.mean(num / denom) + elif reduction=='weighted': + return torch.sum((num / denom) * (weights / sum(weights))) + elif reduction=='none': + return num / denom + else: + raise ValueError('Reduction parameter unknown.') + +def accuracy(pred: torch.Tensor, target: torch.Tensor, + num_classes: Optional[int] = None, + reduction='micro') -> torch.Tensor: + + tps, fps, tns, fns, sup = stat_scores_multiple_classes(pred=pred, + target=target, + num_classes=num_classes) + if not (target > 0).any() and num_classes is None: + raise RuntimeError("cannot infer num_classes when target is all zero") + return reduce(tps, sup, sup, reduction=reduction) + +def precision(pred: torch.Tensor, target: torch.Tensor, + num_classes: Optional[int] = None, + reduction: str = 'micro') -> torch.Tensor: + + tps, fps, tns, fns, sup = stat_scores_multiple_classes(pred=pred, + target=target, + num_classes=num_classes) + if not (target > 0).any() and num_classes is None: + raise RuntimeError("cannot infer num_classes when target is all zero") + return reduce(tps, tps + fps, sup, reduction=reduction) + +def recall(pred: torch.Tensor, target: torch.Tensor, + num_classes: Optional[int] = None, + reduction: str = 'elementwise_mean') -> torch.Tensor: + tps, fps, tns, fns, sup = stat_scores_multiple_classes(pred=pred, + target=target, + num_classes=num_classes) + if not (target > 0).any() and num_classes is None: + raise RuntimeError("cannot infer num_classes when target is all zero") + return reduce(tps, tps + fns, sup, reduction=reduction) + +pred = torch.randint(4, (100,)) +target = torch.randint(4, (100,)) + +from sklearn.metrics import accuracy_score, precision_score, recall_score + +print(accuracy(pred, target, reduction='none'), accuracy_score(target, pred)) +print(precision(pred, target, reduction='none'), precision_score(target, pred, average=None)) +print(recall(pred, target, reduction='none'), recall_score(target, pred, average=None)) + + + + + diff --git a/pytorch_lightning/metrics/functional/regression.py b/pytorch_lightning/metrics/functional/regression.py new file mode 100755 index 00000000000000..c1691d3c9a95da --- /dev/null +++ b/pytorch_lightning/metrics/functional/regression.py @@ -0,0 +1,36 @@ +from collections import Sequence +from typing import Optional, Tuple, Callable + +import torch + +from pytorch_lightning.metrics.functional.reduction import reduce + +def mean_square_error(pred: torch.Tensor, + target: torch.Tensor, + reduction: str = 'elementwise_mean'): + """ """ + mse = torch.pow(torch.abs(pred - target), 2.0) + return reduce(mse, reduction=reduction) + +def root_mean_squared_error(pred: torch.Tensor, + target: torch.Tensor, + reduction: str = 'elementwise_mean'): + """ """ + rmse = + return reduce(reduction=reduction) + + + +def mean_absolut_error(pred: torch.Tensor, + target: torch.Tensor, + reduction: str = 'elementwise_mean'): + """ """ + abs_error = torch.abs(pred - target) + return reduce(abs_error, reduction=reduction) + +def max_error(pred: torch.Tensor, + target: torch.Tensor, + reduction: str = 'elementwise_mean'): + """ """ + max_error = torch.max(torch.abs(pred - target), dim=-1) + return reduce(reduction=reduction) \ No newline at end of file diff --git a/pytorch_lightning/trainer/__init__.py b/pytorch_lightning/trainer/__init__.py index cce58c3cdc6ebd..c5d5beb99eed9a 100644 --- a/pytorch_lightning/trainer/__init__.py +++ b/pytorch_lightning/trainer/__init__.py @@ -142,16 +142,16 @@ def forward(self, x): .. code-block:: python - # defeault use by the Trainer (no scaling of batch size) + # default used by the Trainer (no scaling of batch size) trainer = Trainer(auto_scale_batch_size=False) Example:: - # run batch size scaling, result override hparams.batch_size + # run batch size scaling, result overrides hparams.batch_size trainer = Trainer(auto_scale_batch_size=True) - # run batch size scaling, result override hparams.my_batch_size_arg - trainer = Trainer(auto_scale_batch_size='my_batch_size_arg') + # run batch size scaling, using binary search strategy + trainer = Trainer(auto_scale_batch_size='binsearch') auto_lr_find ^^^^^^^^^^^^ diff --git a/pytorch_lightning/trainer/data_loading.py b/pytorch_lightning/trainer/data_loading.py index b3e15024c45de7..52e53acd5a2b49 100644 --- a/pytorch_lightning/trainer/data_loading.py +++ b/pytorch_lightning/trainer/data_loading.py @@ -46,8 +46,8 @@ def _has_len(dataloader: DataLoader) -> bool: try: # try getting the length if len(dataloader) == 0: - raise ValueError('Dataloader returned 0 length. Please make sure' - ' that your Dataloader atleast returns 1 batch') + raise ValueError('`Dataloader` returned 0 length.' + ' Please make sure that your Dataloader at least returns 1 batch') return True except TypeError: return False @@ -186,10 +186,10 @@ def reset_train_dataloader(self, model: LightningModule) -> None: self.val_check_batch = float('inf') else: raise MisconfigurationException( - 'When using an infinite DataLoader (e.g. with an IterableDataset or when ' - 'DataLoader does not implement `__len__`) for `train_dataloader`, ' - '`Trainer(val_check_interval)` must be `1.0` or an int. An int k specifies ' - 'checking validation every k training batches.') + 'When using an infinite DataLoader (e.g. with an IterableDataset' + ' or when DataLoader does not implement `__len__`) for `train_dataloader`,' + ' `Trainer(val_check_interval)` must be `1.0` or an int. An int k specifies' + ' checking validation every k training batches.') else: self._percent_range_check('val_check_interval') @@ -219,7 +219,7 @@ def _reset_eval_dataloader(self, model: LightningModule, mode: str) -> Tuple[int ' this off for validation and test dataloaders.') # add samplers - dataloaders = [self.auto_add_sampler(dl, train=False) for dl in dataloaders if dl] + dataloaders = [self.auto_add_sampler(dl, train=False) for dl in dataloaders if dl is not None] num_batches = 0 @@ -240,9 +240,9 @@ def _reset_eval_dataloader(self, model: LightningModule, mode: str) -> Tuple[int num_batches = int(num_batches * percent_check) elif percent_check not in (0.0, 1.0): raise MisconfigurationException( - 'When using an infinite DataLoader (e.g. with an IterableDataset or when ' - f'DataLoader does not implement `__len__`) for `{mode}_dataloader`, ' - f'`Trainer({mode}_percent_check)` must be `0.0` or `1.0`.') + 'When using an infinite DataLoader (e.g. with an IterableDataset' + f' or when DataLoader does not implement `__len__`) for `{mode}_dataloader`,' + f' `Trainer({mode}_percent_check)` must be `0.0` or `1.0`.') return num_batches, dataloaders def reset_val_dataloader(self, model: LightningModule) -> None: @@ -252,7 +252,7 @@ def reset_val_dataloader(self, model: LightningModule) -> None: model: The current `LightningModule` """ if self.is_overriden('validation_step'): - self.num_val_batches, self.val_dataloaders =\ + self.num_val_batches, self.val_dataloaders = \ self._reset_eval_dataloader(model, 'val') def reset_test_dataloader(self, model) -> None: diff --git a/pytorch_lightning/trainer/distrib_parts.py b/pytorch_lightning/trainer/distrib_parts.py index a9f4b6114522e1..bcd0c0724ee7c2 100644 --- a/pytorch_lightning/trainer/distrib_parts.py +++ b/pytorch_lightning/trainer/distrib_parts.py @@ -576,8 +576,9 @@ def horovod_train(self, model): torch.cuda.set_device(self.root_gpu) model.cuda(self.root_gpu) - # Only show progress bar from the first worker - self.progress_bar_refresh_rate = self.progress_bar_refresh_rate if hvd.rank() == 0 else 0 + # avoid duplicating progress bar + if hvd.rank() != 0 and self.progress_bar_callback is not None: + self.progress_bar_callback.disable() # CHOOSE OPTIMIZER # allow for lr schedulers as well diff --git a/pytorch_lightning/trainer/lr_finder.py b/pytorch_lightning/trainer/lr_finder.py index b0491c818dd5b4..e664fd3cc47d04 100755 --- a/pytorch_lightning/trainer/lr_finder.py +++ b/pytorch_lightning/trainer/lr_finder.py @@ -53,6 +53,7 @@ def _run_lr_finder_internally(self, model: LightningModule): def lr_find(self, model: LightningModule, train_dataloader: Optional[DataLoader] = None, + val_dataloaders: Optional[DataLoader] = None, min_lr: float = 1e-8, max_lr: float = 1, num_training: int = 100, @@ -105,7 +106,7 @@ def lr_find(self, """ save_path = os.path.join(self.default_root_dir, 'lr_find_temp.ckpt') - self._dump_params(model) + self._lr_finder_dump_params(model) # Prevent going into infinite loop self.auto_lr_find = False @@ -129,8 +130,10 @@ def lr_find(self, # Accumulation of gradients self.accumulate_grad_batches = num_accumulation_steps - # Disable standard checkpoint + # Disable standard checkpoint & early stopping self.checkpoint_callback = False + self.early_stop_callback = None + self.enable_early_stop = False # Required for saving the model self.optimizers, self.schedulers = [], [], @@ -150,7 +153,9 @@ def lr_find(self, model.configure_optimizers = lr_finder._get_new_optimizer(optimizers[0]) # Fit, lr & loss logged in callback - self.fit(model, train_dataloader=train_dataloader) + self.fit(model, + train_dataloader=train_dataloader, + val_dataloaders=val_dataloaders) # Prompt if we stopped early if self.global_step != num_training: @@ -165,13 +170,13 @@ def lr_find(self, os.remove(save_path) # Finish by resetting variables so trainer is ready to fit model - self._restore_params(model) + self._lr_finder_restore_params(model) if self.progress_bar_callback: self.progress_bar_callback.enable() return lr_finder - def _dump_params(self, model): + def _lr_finder_dump_params(self, model): # Prevent going into infinite loop self._params = { 'auto_lr_find': self.auto_lr_find, @@ -181,11 +186,13 @@ def _dump_params(self, model): 'progress_bar_refresh_rate': self.progress_bar_refresh_rate, 'accumulate_grad_batches': self.accumulate_grad_batches, 'checkpoint_callback': self.checkpoint_callback, + 'early_stop_callback': self.early_stop_callback, + 'enable_early_stop': self.enable_early_stop, 'progress_bar_callback': self.progress_bar_callback, 'configure_optimizers': model.configure_optimizers, } - def _restore_params(self, model): + def _lr_finder_restore_params(self, model): self.auto_lr_find = self._params['auto_lr_find'] self.logger = self._params['logger'] self.callbacks = self._params['callbacks'] @@ -193,6 +200,8 @@ def _restore_params(self, model): self.progress_bar_refresh_rate = self._params['progress_bar_refresh_rate'] self.accumulate_grad_batches = self._params['accumulate_grad_batches'] self.checkpoint_callback = self._params['checkpoint_callback'] + self.early_stop_callback = self._params['early_stop_callback'] + self.enable_early_stop = self._params['enable_early_stop'] self.progress_bar_callback = self._params['progress_bar_callback'] model.configure_optimizers = self._params['configure_optimizers'] diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 17c0e209488787..0d83234025f4c8 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -51,6 +51,13 @@ else: XLA_AVAILABLE = True +try: + import horovod.torch as hvd +except ImportError: + HOROVOD_AVAILABLE = False +else: + HOROVOD_AVAILABLE = True + class Trainer( TrainerIOMixin, @@ -290,9 +297,9 @@ def __init__( auto_scale_batch_size: If set to True, will `initially` run a batch size finder trying to find the largest batch size that fits into memory. - The results will be stored in self.hparams.batch_size in the lightning module. + The result will be stored in self.hparams.batch_size in the LightningModule. Additionally, can be set to either `power` (same as `True`) that - estimates the batch size through a power search or `binseach` that + estimates the batch size through a power search or `binsearch` that estimates the batch size through a binary search. """ @@ -742,7 +749,7 @@ def fit( if self.auto_scale_batch_size: if self.auto_scale_batch_size is True: self.auto_scale_batch_size = 'power' - _ = self.scale_batch_size(model, mode = self.auto_scale_batch_size) + _ = self.scale_batch_size(model, mode=self.auto_scale_batch_size) # Run learning rate finder: if self.auto_lr_find: @@ -868,6 +875,10 @@ def run_pretrain_routine(self, model: LightningModule): # wait for all processes to catch up torch_xla.core.xla_model.rendezvous("pl.Trainer.run_pretrain_routine") + elif self.use_horovod: + # wait for all processes to catch up + hvd.join() + # register auto-resubmit when on SLURM self.register_slurm_signal_handlers() diff --git a/pytorch_lightning/trainer/training_io.py b/pytorch_lightning/trainer/training_io.py index 78d24fad0a18f2..4f474b761e94fb 100644 --- a/pytorch_lightning/trainer/training_io.py +++ b/pytorch_lightning/trainer/training_io.py @@ -112,6 +112,13 @@ else: XLA_AVAILABLE = True +try: + import horovod.torch as hvd +except ImportError: + HOROVOD_AVAILABLE = False +else: + HOROVOD_AVAILABLE = True + class TrainerIOMixin(ABC): @@ -123,6 +130,7 @@ class TrainerIOMixin(ABC): resume_from_checkpoint: ... use_ddp: bool use_ddp2: bool + use_horovod: bool checkpoint_callback: ... proc_rank: int weights_save_path: str @@ -175,6 +183,10 @@ def restore_weights(self, model: LightningModule): # wait for all processes to catch up torch_xla.core.xla_model.rendezvous("pl.TrainerIOMixin.restore_weights") + elif self.use_horovod: + # wait for all processes to catch up + hvd.join() + # clear cache after restore if self.on_gpu: torch.cuda.empty_cache() diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py index 37bac3d99727f3..b2ce8599bc9a06 100644 --- a/pytorch_lightning/trainer/training_loop.py +++ b/pytorch_lightning/trainer/training_loop.py @@ -325,7 +325,7 @@ def train(self): if self.reload_dataloaders_every_epoch: self.reset_train_dataloader(model) # set seed for distributed sampler (enables shuffling for each epoch) - if self.use_ddp or self.use_horovod \ + if (self.use_ddp or self.use_horovod) \ and hasattr(self.train_dataloader.sampler, 'set_epoch'): self.train_dataloader.sampler.set_epoch(epoch) diff --git a/pytorch_lightning/trainer/training_tricks.py b/pytorch_lightning/trainer/training_tricks.py index 1542790e1d5e00..199a30887e64ca 100644 --- a/pytorch_lightning/trainer/training_tricks.py +++ b/pytorch_lightning/trainer/training_tricks.py @@ -95,7 +95,7 @@ def scale_batch_size(self, n_max_try: int = 25): r""" Will iteratively try to find the largest batch size for a given model - that does not not give an out of memory (OOM) error + that does not give an out of memory (OOM) error. Args: model: Model to fit. @@ -104,22 +104,21 @@ def scale_batch_size(self, If mode is `power` we keep multiplying the batch size by 2, until we get an OOM error. If mode is 'binsearch', we will initially also keep multiplying by 2 and after encountering an OOM error - do a binary search between the last succeded batch size and the + do a binary search between the last successful batch size and the batch size that failed. n_step_per_try: number of steps to run with a given batch size. Idealy 1 should be enough to test if a OOM error occurs, - however in practise a few is needed + however in practise a few are needed - init_val: initial batch size to do the search from + init_val: initial batch size to start the search with n_max_try: max number of increase in batch size done before algorithm is terminated """ if mode not in ['power', 'binsearch']: - raise ValueError('mode in method `scale_batch_size`' - ' can only be `power` or `binsearch') + raise ValueError('mode in method `scale_batch_size` can only be `power` or `binsearch') # Arguments we adjust during the batch size finder, save for restoring max_steps = self.max_steps diff --git a/tests/Datasets/MNIST/digits-0-1-2_nb-100/test.pt b/tests/Datasets/MNIST/digits-0-1-2_nb-100/test.pt new file mode 100644 index 00000000000000..9edb5bddfabded Binary files /dev/null and b/tests/Datasets/MNIST/digits-0-1-2_nb-100/test.pt differ diff --git a/tests/Datasets/MNIST/digits-0-1-2_nb-100/training.pt b/tests/Datasets/MNIST/digits-0-1-2_nb-100/training.pt new file mode 100644 index 00000000000000..424a07a610e8d6 Binary files /dev/null and b/tests/Datasets/MNIST/digits-0-1-2_nb-100/training.pt differ diff --git a/tests/base/eval_model_optimizers.py b/tests/base/eval_model_optimizers.py index bcce319d4a5659..2fd9b104a06d9a 100644 --- a/tests/base/eval_model_optimizers.py +++ b/tests/base/eval_model_optimizers.py @@ -12,7 +12,7 @@ def configure_optimizers(self): optimizer = optim.Adam(self.parameters(), lr=self.hparams.learning_rate) return optimizer - def configure_optimizers_empty(self): + def configure_optimizers__empty(self): return None def configure_optimizers__lbfgs(self): diff --git a/tests/base/eval_model_template.py b/tests/base/eval_model_template.py index 37f4dfbd04144f..d97e8a925fc6dc 100644 --- a/tests/base/eval_model_template.py +++ b/tests/base/eval_model_template.py @@ -1,3 +1,5 @@ +from argparse import Namespace + import torch import torch.nn as nn import torch.nn.functional as F @@ -37,7 +39,7 @@ def __init__(self, hparams: object) -> object: """Pass in parsed HyperOptArgumentParser to the model.""" # init superclass super().__init__() - self.hparams = hparams + self.hparams = Namespace(**hparams) if isinstance(hparams, dict) else hparams # if you specify an example input, the summary will show input/output for each layer self.example_input_array = torch.rand(5, 28 * 28) diff --git a/tests/base/eval_model_test_dataloaders.py b/tests/base/eval_model_test_dataloaders.py index ecbfe19142edaf..fdab56994ab9e5 100644 --- a/tests/base/eval_model_test_dataloaders.py +++ b/tests/base/eval_model_test_dataloaders.py @@ -1,5 +1,7 @@ from abc import ABC, abstractmethod +from tests.base.eval_model_utils import CustomInfDataloader + class TestDataloaderVariations(ABC): @@ -9,3 +11,12 @@ def dataloader(self, train: bool): def test_dataloader(self): return self.dataloader(train=False) + + def test_dataloader__infinite(self): + return CustomInfDataloader(self.dataloader(train=False)) + + def test_dataloader__empty(self): + return None + + def test_dataloader__multiple(self): + return [self.dataloader(train=False), self.dataloader(train=False)] diff --git a/tests/base/eval_model_test_epoch_ends.py b/tests/base/eval_model_test_epoch_ends.py index 5279e6a9fcab9e..fa3c3f7f4a90e0 100644 --- a/tests/base/eval_model_test_epoch_ends.py +++ b/tests/base/eval_model_test_epoch_ends.py @@ -37,3 +37,39 @@ def test_epoch_end(self, outputs): metrics_dict = {'test_loss': test_loss_mean.item(), 'test_acc': test_acc_mean.item()} result = {'progress_bar': metrics_dict, 'log': metrics_dict} return result + + def test_epoch_end__multiple_dataloaders(self, outputs): + """ + Called at the end of validation to aggregate outputs + :param outputs: list of individual outputs of each validation step + :return: + """ + # if returned a scalar from test_step, outputs is a list of tensor scalars + # we return just the average in this case (if we want) + # return torch.stack(outputs).mean() + test_loss_mean = 0 + test_acc_mean = 0 + i = 0 + for dl_output in outputs: + for output in dl_output: + test_loss = output['test_loss'] + + # reduce manually when using dp + if self.trainer.use_dp: + test_loss = torch.mean(test_loss) + test_loss_mean += test_loss + + # reduce manually when using dp + test_acc = output['test_acc'] + if self.trainer.use_dp: + test_acc = torch.mean(test_acc) + + test_acc_mean += test_acc + i += 1 + + test_loss_mean /= i + test_acc_mean /= i + + tqdm_dict = {'test_loss': test_loss_mean.item(), 'test_acc': test_acc_mean.item()} + result = {'progress_bar': tqdm_dict} + return result diff --git a/tests/base/eval_model_test_steps.py b/tests/base/eval_model_test_steps.py index b4c80cff06421f..bf57c2815bc89c 100644 --- a/tests/base/eval_model_test_steps.py +++ b/tests/base/eval_model_test_steps.py @@ -8,6 +8,7 @@ class TestStepVariations(ABC): """ Houses all variations of test steps """ + def test_step(self, batch, batch_idx, *args, **kwargs): """ Default, baseline test_step @@ -87,3 +88,6 @@ def test_step__multiple_dataloaders(self, batch, batch_idx, dataloader_idx, **kw f'test_acc_{dataloader_idx}': test_acc, }) return output + + def test_step__empty(self, batch, batch_idx, *args, **kwargs): + return {} diff --git a/tests/base/eval_model_train_dataloaders.py b/tests/base/eval_model_train_dataloaders.py index 3d547a83639b38..ded46de3d6e41b 100644 --- a/tests/base/eval_model_train_dataloaders.py +++ b/tests/base/eval_model_train_dataloaders.py @@ -1,5 +1,7 @@ from abc import ABC, abstractmethod +from tests.base.eval_model_utils import CustomInfDataloader + class TrainDataloaderVariations(ABC): @@ -9,3 +11,12 @@ def dataloader(self, train: bool): def train_dataloader(self): return self.dataloader(train=True) + + def train_dataloader__infinite(self): + return CustomInfDataloader(self.dataloader(train=True)) + + def train_dataloader__zero_length(self): + dataloader = self.dataloader(train=True) + dataloader.dataset.data = dataloader.dataset.data[:0] + dataloader.dataset.targets = dataloader.dataset.targets[:0] + return dataloader diff --git a/tests/base/eval_model_train_steps.py b/tests/base/eval_model_train_steps.py index f9d0663de64ec7..8a4307555dccb3 100644 --- a/tests/base/eval_model_train_steps.py +++ b/tests/base/eval_model_train_steps.py @@ -1,11 +1,16 @@ +import math from abc import ABC from collections import OrderedDict +import torch + class TrainingStepVariations(ABC): """ Houses all variations of training steps """ + test_step_inf_loss = float('inf') + def training_step(self, batch, batch_idx, optimizer_idx=None): """Lightning calls this inside the training loop""" # forward pass @@ -28,3 +33,12 @@ def training_step(self, batch, batch_idx, optimizer_idx=None): if self.trainer.batch_idx % 2 == 0: return loss_val + + def training_step__inf_loss(self, batch, batch_idx, optimizer_idx=None): + output = self.training_step(batch, batch_idx, optimizer_idx) + if batch_idx == self.test_step_inf_loss: + if isinstance(output, dict): + output['loss'] *= torch.tensor(math.inf) # make loss infinite + else: + output /= 0 + return output diff --git a/tests/base/eval_model_utils.py b/tests/base/eval_model_utils.py index e1a40f95b804f3..d3eed3cb8dc5be 100644 --- a/tests/base/eval_model_utils.py +++ b/tests/base/eval_model_utils.py @@ -26,3 +26,25 @@ def get_output_metric(self, output, name): else: # if it is 2level deep -> per dataloader and per batch val = sum(out[name] for out in output) / len(output) return val + + +class CustomInfDataloader: + + def __init__(self, dataloader): + self.dataloader = dataloader + self.iter = iter(dataloader) + self.count = 0 + + def __iter__(self): + self.count = 0 + return self + + def __next__(self): + if self.count >= 50: + raise StopIteration + self.count = self.count + 1 + try: + return next(self.iter) + except StopIteration: + self.iter = iter(self.dataloader) + return next(self.iter) diff --git a/tests/base/eval_model_valid_dataloaders.py b/tests/base/eval_model_valid_dataloaders.py index 2d6f2bf2af5d96..2b760e13086fd4 100644 --- a/tests/base/eval_model_valid_dataloaders.py +++ b/tests/base/eval_model_valid_dataloaders.py @@ -1,5 +1,7 @@ from abc import ABC, abstractmethod +from tests.base.eval_model_utils import CustomInfDataloader + class ValDataloaderVariations(ABC): @@ -9,3 +11,10 @@ def dataloader(self, train: bool): def val_dataloader(self): return self.dataloader(train=False) + + def val_dataloader__multiple(self): + return [self.dataloader(train=False), + self.dataloader(train=False)] + + def val_dataloader__infinite(self): + return CustomInfDataloader(self.dataloader(train=False)) diff --git a/tests/base/eval_model_valid_epoch_ends.py b/tests/base/eval_model_valid_epoch_ends.py index ab14ed10ef5ab8..73866451023f59 100644 --- a/tests/base/eval_model_valid_epoch_ends.py +++ b/tests/base/eval_model_valid_epoch_ends.py @@ -16,9 +16,13 @@ def validation_epoch_end(self, outputs): """ # if returned a scalar from validation_step, outputs is a list of tensor scalars # we return just the average in this case (if we want) + def _mean(res, key): + # recursive mean for multilevel dicts + return torch.stack([x[key] if isinstance(x, dict) else _mean(x, key) for x in res]).mean() + # return torch.stack(outputs).mean() - val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean() - val_acc_mean = torch.stack([x['val_acc'] for x in outputs]).mean() + val_loss_mean = _mean(outputs, 'val_loss') + val_acc_mean = _mean(outputs, 'val_acc') for output in outputs: val_loss = self.get_output_metric(output, 'val_loss') diff --git a/tests/base/models.py b/tests/base/models.py index ebc6d755761c85..4d39c5150b0352 100644 --- a/tests/base/models.py +++ b/tests/base/models.py @@ -8,6 +8,7 @@ from torch import optim from torch.utils.data import DataLoader +from tests.base import EvalModelTemplate from tests.base.datasets import TrialMNIST try: diff --git a/tests/callbacks/test_callbacks.py b/tests/callbacks/test_callbacks.py index 2bbcfaea1f191a..8a50cb667c4bf3 100644 --- a/tests/callbacks/test_callbacks.py +++ b/tests/callbacks/test_callbacks.py @@ -3,28 +3,14 @@ from pytorch_lightning import Callback from pytorch_lightning import Trainer, LightningModule from pytorch_lightning.callbacks import EarlyStopping, LearningRateLogger, ModelCheckpoint -from tests.base import ( - LightTrainDataloader, - LightTestMixin, - LightValidationMixin, - LightTestOptimizersWithMixedSchedulingMixin, - TestModelBase -) +from tests.base import EvalModelTemplate def test_trainer_callback_system(tmpdir): """Test the callback system.""" - class CurrentTestModel( - LightTrainDataloader, - LightTestMixin, - LightValidationMixin, - TestModelBase, - ): - pass - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) + model = EvalModelTemplate(hparams) def _check_args(trainer, pl_module): assert isinstance(trainer, Trainer) @@ -214,18 +200,18 @@ def on_test_end(self, trainer, pl_module): def test_early_stopping_no_val_step(tmpdir): """Test that early stopping callback falls back to training metrics when no validation defined.""" - class ModelWithoutValStep(LightTrainDataloader, TestModelBase): + class CurrentModel(EvalModelTemplate): def training_step(self, *args, **kwargs): output = super().training_step(*args, **kwargs) - loss = output['loss'] # could be anything else - output.update({'my_train_metric': loss}) + output.update({'my_train_metric': output['loss']}) # could be anything else return output - model = ModelWithoutValStep(tutils.get_default_hparams()) + model = CurrentModel(tutils.get_default_hparams()) + model.validation_step = None + model.val_dataloader = None stopping = EarlyStopping(monitor='my_train_metric', min_delta=0.1) - trainer = Trainer( default_root_dir=tmpdir, early_stop_callback=stopping, @@ -243,21 +229,21 @@ def test_pickling(tmpdir): early_stopping = EarlyStopping() ckpt = ModelCheckpoint(tmpdir) - pickle.dumps(ckpt) - pickle.dumps(early_stopping) + early_stopping_pickled = pickle.dumps(early_stopping) + ckpt_pickled = pickle.dumps(ckpt) + + early_stopping_loaded = pickle.loads(early_stopping_pickled) + ckpt_loaded = pickle.loads(ckpt_pickled) + + assert vars(early_stopping) == vars(early_stopping_loaded) + assert vars(ckpt) == vars(ckpt_loaded) @pytest.mark.parametrize('save_top_k', [-1, 0, 1, 2]) def test_model_checkpoint_with_non_string_input(tmpdir, save_top_k): - """ Test that None in checkpoint callback is valid and that chkp_path is - set correctly """ + """ Test that None in checkpoint callback is valid and that chkp_path is set correctly """ tutils.reset_seed() - - class CurrentTestModel(LightTrainDataloader, TestModelBase): - pass - - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) checkpoint = ModelCheckpoint(filepath=None, save_top_k=save_top_k) @@ -276,11 +262,8 @@ def test_lr_logger_single_lr(tmpdir): """ Test that learning rates are extracted and logged for single lr scheduler""" tutils.reset_seed() - class CurrentTestModel(LightTrainDataloader, TestModelBase): - pass - - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) + model.configure_optimizers = model.configure_optimizers__single_scheduler lr_logger = LearningRateLogger() trainer = Trainer( @@ -292,6 +275,7 @@ class CurrentTestModel(LightTrainDataloader, TestModelBase): ) results = trainer.fit(model) + assert results == 1 assert lr_logger.lrs, 'No learning rates logged' assert len(lr_logger.lrs) == len(trainer.lr_schedulers), \ 'Number of learning rates logged does not match number of lr schedulers' @@ -303,13 +287,8 @@ def test_lr_logger_multi_lrs(tmpdir): """ Test that learning rates are extracted and logged for multi lr schedulers """ tutils.reset_seed() - class CurrentTestModel(LightTestOptimizersWithMixedSchedulingMixin, - LightTrainDataloader, - TestModelBase): - pass - - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) + model.configure_optimizers = model.configure_optimizers__multiple_schedulers lr_logger = LearningRateLogger() trainer = Trainer( @@ -321,6 +300,7 @@ class CurrentTestModel(LightTestOptimizersWithMixedSchedulingMixin, ) results = trainer.fit(model) + assert results == 1 assert lr_logger.lrs, 'No learning rates logged' assert len(lr_logger.lrs) == len(trainer.lr_schedulers), \ 'Number of learning rates logged does not match number of lr schedulers' diff --git a/tests/callbacks/test_progress_bar.py b/tests/callbacks/test_progress_bar.py index 7cd5d5435adefc..ebd35fedfa13d6 100644 --- a/tests/callbacks/test_progress_bar.py +++ b/tests/callbacks/test_progress_bar.py @@ -4,12 +4,7 @@ from pytorch_lightning import Trainer from pytorch_lightning.callbacks import ProgressBarBase, ProgressBar, ModelCheckpoint from pytorch_lightning.utilities.exceptions import MisconfigurationException -from tests.base import ( - LightTrainDataloader, - LightTestMixin, - LightValidationMixin, - TestModelBase -) +from tests.base import EvalModelTemplate @pytest.mark.parametrize('callbacks,refresh_rate', [ @@ -63,16 +58,7 @@ def test_progress_bar_misconfiguration(): def test_progress_bar_totals(): """Test that the progress finishes with the correct total steps processed.""" - class CurrentTestModel( - LightTrainDataloader, - LightTestMixin, - LightValidationMixin, - TestModelBase, - ): - pass - - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) trainer = Trainer( progress_bar_refresh_rate=1, @@ -121,16 +107,7 @@ class CurrentTestModel( def test_progress_bar_fast_dev_run(): - class CurrentTestModel( - LightTrainDataloader, - LightTestMixin, - LightValidationMixin, - TestModelBase, - ): - pass - - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) trainer = Trainer( fast_dev_run=True, @@ -163,16 +140,7 @@ class CurrentTestModel( def test_progress_bar_progress_refresh(refresh_rate): """Test that the three progress bars get correctly updated when using different refresh rates.""" - class CurrentTestModel( - LightTrainDataloader, - LightTestMixin, - LightValidationMixin, - TestModelBase, - ): - pass - - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) class CurrentProgressBar(ProgressBar): diff --git a/tests/models/test_amp.py b/tests/models/test_amp.py index f4f1d9c20a6e9e..52fb90f135bae0 100644 --- a/tests/models/test_amp.py +++ b/tests/models/test_amp.py @@ -6,7 +6,7 @@ import tests.base.utils as tutils from pytorch_lightning import Trainer from pytorch_lightning.utilities.exceptions import MisconfigurationException -from tests.base import LightningTestModel, EvalModelTemplate +from tests.base import EvalModelTemplate @pytest.mark.spawn @@ -15,7 +15,6 @@ def test_amp_single_gpu(tmpdir, backend): """Make sure DP/DDP + AMP work.""" tutils.reset_seed() - trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, @@ -63,8 +62,7 @@ def test_amp_gpu_ddp_slurm_managed(tmpdir): tutils.set_random_master_port() os.environ['SLURM_LOCALID'] = str(0) - hparams = tutils.get_default_hparams() - model = LightningTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) # exp file to get meta logger = tutils.get_default_logger(tmpdir) diff --git a/tests/models/test_cpu.py b/tests/models/test_cpu.py index 46d1ba6e44aaf3..13120c01756c10 100644 --- a/tests/models/test_cpu.py +++ b/tests/models/test_cpu.py @@ -7,16 +7,8 @@ import tests.base.utils as tutils from pytorch_lightning import Trainer -from pytorch_lightning.callbacks import ( - EarlyStopping, -) -from tests.base import ( - TestModelBase, - LightTrainDataloader, - LightningTestModel, - LightTestMixin, - EvalModelTemplate, -) +from pytorch_lightning.callbacks import EarlyStopping +from tests.base import EvalModelTemplate def test_early_stopping_cpu_model(tmpdir): @@ -106,8 +98,7 @@ def test_default_logger_callbacks_cpu_model(tmpdir): def test_running_test_after_fitting(tmpdir): """Verify test() on fitted model.""" - hparams = tutils.get_default_hparams() - model = LightningTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) # logger file to get meta logger = tutils.get_default_logger(tmpdir) @@ -138,11 +129,7 @@ def test_running_test_after_fitting(tmpdir): def test_running_test_no_val(tmpdir): """Verify `test()` works on a model with no `val_loader`.""" - class CurrentTestModel(LightTrainDataloader, LightTestMixin, TestModelBase): - pass - - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) # logger file to get meta logger = tutils.get_default_logger(tmpdir) @@ -220,8 +207,7 @@ def test_single_gpu_batch_parse(): def test_simple_cpu(tmpdir): """Verify continue training session on CPU.""" - hparams = tutils.get_default_hparams() - model = LightningTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) # fit model trainer = Trainer( @@ -285,7 +271,7 @@ def __getitem__(self, i): def __len__(self): return 1 - class BpttTestModel(LightTrainDataloader, TestModelBase): + class BpttTestModel(EvalModelTemplate): def __init__(self, hparams): super().__init__(hparams) self.test_hidden = None diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index dbaf4db8f8ed2f..5bdb603e145188 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -9,7 +9,7 @@ from pytorch_lightning.core import memory from pytorch_lightning.trainer.distrib_parts import parse_gpu_ids, determine_root_gpu_device from pytorch_lightning.utilities.exceptions import MisconfigurationException -from tests.base import LightningTestModel, EvalModelTemplate +from tests.base import EvalModelTemplate PRETEND_N_OF_GPUS = 16 @@ -65,7 +65,7 @@ def test_ddp_all_dataloaders_passed_to_fit(tmpdir): def test_cpu_slurm_save_load(tmpdir): """Verify model save/load/checkpoint on CPU.""" hparams = tutils.get_default_hparams() - model = LightningTestModel(hparams) + model = EvalModelTemplate(hparams) # logger file to get meta logger = tutils.get_default_logger(tmpdir) @@ -112,7 +112,7 @@ def test_cpu_slurm_save_load(tmpdir): logger=logger, checkpoint_callback=ModelCheckpoint(tmpdir), ) - model = LightningTestModel(hparams) + model = EvalModelTemplate(hparams) # set the epoch start hook so we can predict before the model does the full training def assert_pred_same(): diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py index 1d0e55df409e01..00147ef2bc089e 100644 --- a/tests/models/test_hooks.py +++ b/tests/models/test_hooks.py @@ -2,29 +2,19 @@ import tests.base.utils as tutils from pytorch_lightning import Trainer -from tests.base import ( - LightTrainDataloader, - LightValidationMixin, - TestModelBase, - LightTestMixin) +from tests.base import EvalModelTemplate @pytest.mark.parametrize('max_steps', [1, 2, 3]) def test_on_before_zero_grad_called(max_steps): - class CurrentTestModel( - LightTrainDataloader, - LightValidationMixin, - LightTestMixin, - TestModelBase, - ): + class CurrentTestModel(EvalModelTemplate): on_before_zero_grad_called = 0 def on_before_zero_grad(self, optimizer): self.on_before_zero_grad_called += 1 - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) + model = CurrentTestModel(tutils.get_default_hparams()) trainer = Trainer( max_steps=max_steps, diff --git a/tests/models/test_horovod.py b/tests/models/test_horovod.py index 0f41dee6e4fb07..14644aee6649d3 100644 --- a/tests/models/test_horovod.py +++ b/tests/models/test_horovod.py @@ -11,7 +11,7 @@ from pytorch_lightning import Trainer import tests.base.utils as tutils -from tests.base import LightningTestModel +from tests.base import EvalModelTemplate from tests.base.models import TestGAN try: @@ -107,7 +107,8 @@ def test_horovod_multi_gpu(tmpdir): @pytest.mark.skipif(not _nccl_available(), reason="test requires Horovod with NCCL support") @pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine") def test_horovod_transfer_batch_to_gpu(tmpdir): - class TestTrainingStepModel(LightningTestModel): + + class TestTrainingStepModel(EvalModelTemplate): def training_step(self, batch, *args, **kwargs): x, y = batch assert str(x.device) != 'cpu' diff --git a/tests/models/test_restore.py b/tests/models/test_restore.py index af0165d498ab0e..0a927a3a94e0ad 100644 --- a/tests/models/test_restore.py +++ b/tests/models/test_restore.py @@ -9,11 +9,7 @@ from pytorch_lightning import Trainer from pytorch_lightning.callbacks import ModelCheckpoint from pytorch_lightning.utilities.exceptions import MisconfigurationException -from tests.base import ( - LightningTestModel, - LightningTestModelWithoutHyperparametersArg, - LightningTestModelWithUnusedHyperparametersArg -) +from tests.base import EvalModelTemplate @pytest.mark.spawn @@ -23,8 +19,7 @@ def test_running_test_pretrained_model_distrib(tmpdir, backend): """Verify `test()` on pretrained model.""" tutils.set_random_master_port() - hparams = tutils.get_default_hparams() - model = LightningTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) # exp file to get meta logger = tutils.get_default_logger(tmpdir) @@ -53,7 +48,7 @@ def test_running_test_pretrained_model_distrib(tmpdir, backend): assert result == 1, 'training failed to complete' pretrained_model = tutils.load_model(logger, trainer.checkpoint_callback.dirpath, - module_class=LightningTestModel) + module_class=EvalModelTemplate) # run test set new_trainer = Trainer(**trainer_options) @@ -72,8 +67,7 @@ def test_running_test_pretrained_model_distrib(tmpdir, backend): def test_running_test_pretrained_model_cpu(tmpdir): """Verify test() on pretrained model.""" - hparams = tutils.get_default_hparams() - model = LightningTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) # logger file to get meta logger = tutils.get_default_logger(tmpdir) @@ -97,7 +91,7 @@ def test_running_test_pretrained_model_cpu(tmpdir): # correct result and ok accuracy assert result == 1, 'training failed to complete' pretrained_model = tutils.load_model( - logger, trainer.checkpoint_callback.dirpath, module_class=LightningTestModel + logger, trainer.checkpoint_callback.dirpath, module_class=EvalModelTemplate ) new_trainer = Trainer(**trainer_options) @@ -110,7 +104,7 @@ def test_running_test_pretrained_model_cpu(tmpdir): def test_load_model_from_checkpoint(tmpdir): """Verify test() on pretrained model.""" hparams = tutils.get_default_hparams() - model = LightningTestModel(hparams) + model = EvalModelTemplate(hparams) trainer_options = dict( progress_bar_refresh_rate=0, @@ -131,7 +125,7 @@ def test_load_model_from_checkpoint(tmpdir): # load last checkpoint last_checkpoint = sorted(glob.glob(os.path.join(trainer.checkpoint_callback.dirpath, "*.ckpt")))[-1] - pretrained_model = LightningTestModel.load_from_checkpoint(last_checkpoint) + pretrained_model = EvalModelTemplate.load_from_checkpoint(last_checkpoint) # test that hparams loaded correctly for k, v in vars(hparams).items(): @@ -152,7 +146,13 @@ def test_load_model_from_checkpoint(tmpdir): def test_dp_resume(tmpdir): """Make sure DP continues training correctly.""" hparams = tutils.get_default_hparams() - model = LightningTestModel(hparams) + model = EvalModelTemplate(hparams) + + trainer_options = dict( + max_epochs=1, + gpus=2, + distributed_backend='dp', + ) # get logger logger = tutils.get_default_logger(tmpdir) @@ -161,13 +161,9 @@ def test_dp_resume(tmpdir): # logger file to get weights checkpoint = tutils.init_checkpoint_callback(logger) - trainer_options = dict( - max_epochs=1, - gpus=2, - distributed_backend='dp', - logger=logger, - checkpoint_callback=checkpoint, - ) + # add these to the trainer options + trainer_options['logger'] = logger + trainer_options['checkpoint_callback'] = checkpoint # fit model trainer = Trainer(**trainer_options) @@ -188,13 +184,11 @@ def test_dp_resume(tmpdir): # init new trainer new_logger = tutils.get_default_logger(tmpdir, version=logger.version) - trainer_options.update( - logger=new_logger, - checkpoint_callback=ModelCheckpoint(tmpdir), - train_percent_check=0.5, - val_percent_check=0.2, - max_epochs=1, - ) + trainer_options['logger'] = new_logger + trainer_options['checkpoint_callback'] = ModelCheckpoint(tmpdir) + trainer_options['train_percent_check'] = 0.5 + trainer_options['val_percent_check'] = 0.2 + trainer_options['max_epochs'] = 1 new_trainer = Trainer(**trainer_options) # set the epoch start hook so we can predict before the model does the full training @@ -210,7 +204,7 @@ def assert_good_acc(): tutils.run_prediction(dataloader, dp_model, dp=True) # new model - model = LightningTestModel(hparams) + model = EvalModelTemplate(hparams) model.on_train_start = assert_good_acc # fit new model which should load hpc weights @@ -223,18 +217,19 @@ def assert_good_acc(): def test_model_saving_loading(tmpdir): """Tests use case where trainer saves the model, and user loads it from tags independently.""" - hparams = tutils.get_default_hparams() - model = LightningTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) # logger file to get meta logger = tutils.get_default_logger(tmpdir) - # fit model - trainer = Trainer( + trainer_options = dict( max_epochs=1, logger=logger, checkpoint_callback=ModelCheckpoint(tmpdir) ) + + # fit model + trainer = Trainer(**trainer_options) result = trainer.fit(model) # traning complete @@ -263,7 +258,7 @@ def test_model_saving_loading(tmpdir): # load new model tags_path = tutils.get_data_path(logger, path_dir=tmpdir) tags_path = os.path.join(tags_path, 'meta_tags.csv') - model_2 = LightningTestModel.load_from_checkpoint( + model_2 = EvalModelTemplate.load_from_checkpoint( checkpoint_path=new_weights_path, tags_csv=tags_path ) @@ -276,8 +271,7 @@ def test_model_saving_loading(tmpdir): def test_load_model_with_missing_hparams(tmpdir): - # fit model - trainer = Trainer( + trainer_options = dict( progress_bar_refresh_rate=0, max_epochs=1, checkpoint_callback=ModelCheckpoint(tmpdir, save_top_k=-1), @@ -285,22 +279,35 @@ def test_load_model_with_missing_hparams(tmpdir): default_root_dir=tmpdir, ) - model = LightningTestModelWithoutHyperparametersArg() + # fit model + trainer = Trainer(**trainer_options) + + class CurrentModelWithoutHparams(EvalModelTemplate): + def __init__(self): + hparams = tutils.get_default_hparams() + super().__init__(hparams) + + class CurrentModelUnusedHparams(EvalModelTemplate): + def __init__(self, hparams): + hparams = tutils.get_default_hparams() + super().__init__(hparams) + + model = CurrentModelWithoutHparams() trainer.fit(model) last_checkpoint = sorted(glob.glob(os.path.join(trainer.checkpoint_callback.dirpath, "*.ckpt")))[-1] # try to load a checkpoint that has hparams but model is missing hparams arg with pytest.raises(MisconfigurationException, match=r".*__init__ is missing the argument 'hparams'.*"): - LightningTestModelWithoutHyperparametersArg.load_from_checkpoint(last_checkpoint) + CurrentModelWithoutHparams.load_from_checkpoint(last_checkpoint) # create a checkpoint without hyperparameters # if the model does not take a hparams argument, it should not throw an error ckpt = torch.load(last_checkpoint) del(ckpt['hparams']) torch.save(ckpt, last_checkpoint) - LightningTestModelWithoutHyperparametersArg.load_from_checkpoint(last_checkpoint) + CurrentModelWithoutHparams.load_from_checkpoint(last_checkpoint) # load checkpoint without hparams again # warn if user's model has hparams argument with pytest.warns(UserWarning, match=r".*Will pass in an empty Namespace instead."): - LightningTestModelWithUnusedHyperparametersArg.load_from_checkpoint(last_checkpoint) + CurrentModelUnusedHparams.load_from_checkpoint(last_checkpoint) diff --git a/tests/trainer/test_dataloaders.py b/tests/trainer/test_dataloaders.py index d847b6c8730c26..92704a9040a9ec 100644 --- a/tests/trainer/test_dataloaders.py +++ b/tests/trainer/test_dataloaders.py @@ -8,22 +8,7 @@ import tests.base.utils as tutils from pytorch_lightning import Trainer from pytorch_lightning.utilities.exceptions import MisconfigurationException -from tests.base import ( - TestModelBase, - LightningTestModel, - LightEmptyTestStep, - LightValidationMultipleDataloadersMixin, - LightTestMultipleDataloadersMixin, - LightTestFitSingleTestDataloadersMixin, - LightTestFitMultipleTestDataloadersMixin, - LightValStepFitMultipleDataloadersMixin, - LightValStepFitSingleDataloaderMixin, - LightTrainDataloader, - LightInfTrainDataloader, - LightInfValDataloader, - LightInfTestDataloader, - LightZeroLenDataloader -) +from tests.base import EvalModelTemplate @pytest.mark.parametrize("dataloader_options", [ @@ -33,16 +18,8 @@ dict(val_check_interval=10000), ]) def test_dataloader_config_errors(tmpdir, dataloader_options): - tutils.reset_seed() - class CurrentTestModel( - LightTrainDataloader, - TestModelBase, - ): - pass - - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) # fit model trainer = Trainer( @@ -58,15 +35,9 @@ class CurrentTestModel( def test_multiple_val_dataloader(tmpdir): """Verify multiple val_dataloader.""" - class CurrentTestModel( - LightTrainDataloader, - LightValidationMultipleDataloadersMixin, - TestModelBase, - ): - pass - - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) + model.val_dataloader = model.val_dataloader__multiple + model.validation_step = model.validation_step__multiple_dataloaders # fit model trainer = Trainer( @@ -92,16 +63,9 @@ class CurrentTestModel( def test_multiple_test_dataloader(tmpdir): """Verify multiple test_dataloader.""" - class CurrentTestModel( - LightTrainDataloader, - LightTestMultipleDataloadersMixin, - LightEmptyTestStep, - TestModelBase, - ): - pass - - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) + model.test_dataloader = model.test_dataloader__multiple + model.test_step = model.test_step__multiple_dataloaders # fit model trainer = Trainer( @@ -128,20 +92,16 @@ class CurrentTestModel( def test_train_dataloader_passed_to_fit(tmpdir): """Verify that train dataloader can be passed to fit """ - class CurrentTestModel(LightTrainDataloader, TestModelBase): - pass - - hparams = tutils.get_default_hparams() - # only train passed to fit - model = CurrentTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, val_percent_check=0.1, train_percent_check=0.2 ) - result = trainer.fit(model, train_dataloader=model._dataloader(train=True)) + fit_options = dict(train_dataloader=model.dataloader(train=True)) + result = trainer.fit(model, **fit_options) assert result == 1 @@ -149,26 +109,18 @@ class CurrentTestModel(LightTrainDataloader, TestModelBase): def test_train_val_dataloaders_passed_to_fit(tmpdir): """ Verify that train & val dataloader can be passed to fit """ - class CurrentTestModel( - LightTrainDataloader, - LightValStepFitSingleDataloaderMixin, - TestModelBase, - ): - pass - - hparams = tutils.get_default_hparams() - # train, val passed to fit - model = CurrentTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, val_percent_check=0.1, train_percent_check=0.2 ) - result = trainer.fit(model, - train_dataloader=model._dataloader(train=True), - val_dataloaders=model._dataloader(train=False)) + fit_options = dict(train_dataloader=model.dataloader(train=True), + val_dataloaders=model.dataloader(train=False)) + + result = trainer.fit(model, **fit_options) assert result == 1 assert len(trainer.val_dataloaders) == 1, \ f'`val_dataloaders` not initiated properly, got {trainer.val_dataloaders}' @@ -177,31 +129,21 @@ class CurrentTestModel( def test_all_dataloaders_passed_to_fit(tmpdir): """Verify train, val & test dataloader(s) can be passed to fit and test method""" - class CurrentTestModel( - LightTrainDataloader, - LightValStepFitSingleDataloaderMixin, - LightTestFitSingleTestDataloadersMixin, - LightEmptyTestStep, - TestModelBase, - ): - pass - - hparams = tutils.get_default_hparams() + model = EvalModelTemplate(tutils.get_default_hparams()) # train, val and test passed to fit - model = CurrentTestModel(hparams) trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, val_percent_check=0.1, train_percent_check=0.2 ) + fit_options = dict(train_dataloader=model.dataloader(train=True), + val_dataloaders=model.dataloader(train=False)) + test_options = dict(test_dataloaders=model.dataloader(train=False)) - result = trainer.fit(model, - train_dataloader=model._dataloader(train=True), - val_dataloaders=model._dataloader(train=False)) - - trainer.test(test_dataloaders=model._dataloader(train=False)) + result = trainer.fit(model, **fit_options) + trainer.test(**test_options) assert result == 1 assert len(trainer.val_dataloaders) == 1, \ @@ -213,32 +155,25 @@ class CurrentTestModel( def test_multiple_dataloaders_passed_to_fit(tmpdir): """Verify that multiple val & test dataloaders can be passed to fit.""" - class CurrentTestModel( - LightningTestModel, - LightValStepFitMultipleDataloadersMixin, - LightTestFitMultipleTestDataloadersMixin, - ): - pass - - hparams = tutils.get_default_hparams() + model = EvalModelTemplate(tutils.get_default_hparams()) + model.validation_step = model.validation_step__multiple_dataloaders + model.test_step = model.test_step__multiple_dataloaders # train, multiple val and multiple test passed to fit - model = CurrentTestModel(hparams) trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, val_percent_check=0.1, train_percent_check=0.2 ) + fit_options = dict(train_dataloader=model.dataloader(train=True), + val_dataloaders=[model.dataloader(train=False), + model.dataloader(train=False)]) + test_options = dict(test_dataloaders=[model.dataloader(train=False), + model.dataloader(train=False)]) - results = trainer.fit( - model, - train_dataloader=model._dataloader(train=True), - val_dataloaders=[model._dataloader(train=False), model._dataloader(train=False)], - ) - assert results - - trainer.test(test_dataloaders=[model._dataloader(train=False), model._dataloader(train=False)]) + trainer.fit(model, **fit_options) + trainer.test(**test_options) assert len(trainer.val_dataloaders) == 2, \ f'Multiple `val_dataloaders` not initiated properly, got {trainer.val_dataloaders}' @@ -249,16 +184,7 @@ class CurrentTestModel( def test_mixing_of_dataloader_options(tmpdir): """Verify that dataloaders can be passed to fit""" - class CurrentTestModel( - LightTrainDataloader, - LightValStepFitSingleDataloaderMixin, - LightTestFitSingleTestDataloadersMixin, - TestModelBase, - ): - pass - - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) trainer_options = dict( default_root_dir=tmpdir, @@ -269,17 +195,14 @@ class CurrentTestModel( # fit model trainer = Trainer(**trainer_options) - fit_options = dict(val_dataloaders=model._dataloader(train=False)) - results = trainer.fit(model, **fit_options) + results = trainer.fit(model, val_dataloaders=model.dataloader(train=False)) assert results # fit model trainer = Trainer(**trainer_options) - fit_options = dict(val_dataloaders=model._dataloader(train=False)) - test_options = dict(test_dataloaders=model._dataloader(train=False)) - - _ = trainer.fit(model, **fit_options) - trainer.test(**test_options) + results = trainer.fit(model, val_dataloaders=model.dataloader(train=False)) + assert results + trainer.test(test_dataloaders=model.dataloader(train=False)) assert len(trainer.val_dataloaders) == 1, \ f'`val_dataloaders` not initiated properly, got {trainer.val_dataloaders}' @@ -287,72 +210,68 @@ class CurrentTestModel( f'`test_dataloaders` not initiated properly, got {trainer.test_dataloaders}' -def test_inf_train_dataloader(tmpdir): +def test_train_inf_dataloader_error(tmpdir): """Test inf train data loader (e.g. IterableDataset)""" + model = EvalModelTemplate(tutils.get_default_hparams()) + model.train_dataloader = model.train_dataloader__infinite - class CurrentTestModel( - LightInfTrainDataloader, - LightningTestModel - ): - pass + trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, val_check_interval=0.5) - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) + with pytest.raises(MisconfigurationException, match='infinite DataLoader'): + trainer.fit(model) - # fit model - with pytest.raises(MisconfigurationException): - trainer = Trainer( - default_root_dir=tmpdir, - max_epochs=1, - val_check_interval=0.5 - ) + +def test_val_inf_dataloader_error(tmpdir): + """Test inf train data loader (e.g. IterableDataset)""" + model = EvalModelTemplate(tutils.get_default_hparams()) + model.val_dataloader = model.val_dataloader__infinite + + trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, val_percent_check=0.5) + + with pytest.raises(MisconfigurationException, match='infinite DataLoader'): trainer.fit(model) - trainer = Trainer( - default_root_dir=tmpdir, - max_epochs=1, - val_check_interval=50 - ) - result = trainer.fit(model) - # verify training completed - assert result == 1 +def test_test_inf_dataloader_error(tmpdir): + """Test inf train data loader (e.g. IterableDataset)""" + model = EvalModelTemplate(tutils.get_default_hparams()) + model.test_dataloader = model.test_dataloader__infinite + + trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, test_percent_check=0.5) + + with pytest.raises(MisconfigurationException, match='infinite DataLoader'): + trainer.test(model) + + +@pytest.mark.parametrize('check_interval', [50, 1.0]) +def test_inf_train_dataloader(tmpdir, check_interval): + """Test inf train data loader (e.g. IterableDataset)""" + + model = EvalModelTemplate(tutils.get_default_hparams()) + model.train_dataloader = model.train_dataloader__infinite trainer = Trainer( default_root_dir=tmpdir, - max_epochs=1 + max_epochs=1, + train_check_interval=check_interval, ) result = trainer.fit(model) - # verify training completed assert result == 1 -def test_inf_val_dataloader(tmpdir): +@pytest.mark.parametrize('check_interval', [1.0]) +def test_inf_val_dataloader(tmpdir, check_interval): """Test inf val data loader (e.g. IterableDataset)""" - class CurrentTestModel( - LightInfValDataloader, - LightningTestModel - ): - pass - - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) - - # fit model - with pytest.raises(MisconfigurationException): - trainer = Trainer( - default_root_dir=tmpdir, - max_epochs=1, - val_percent_check=0.5 - ) - trainer.fit(model) + model = EvalModelTemplate(tutils.get_default_hparams()) + model.val_dataloader = model.val_dataloader__infinite # logger file to get meta trainer = Trainer( default_root_dir=tmpdir, - max_epochs=1 + max_epochs=1, + val_check_interval=check_interval, ) result = trainer.fit(model) @@ -360,35 +279,20 @@ class CurrentTestModel( assert result == 1 -def test_inf_test_dataloader(tmpdir): +@pytest.mark.parametrize('check_interval', [50, 1.0]) +def test_inf_test_dataloader(tmpdir, check_interval): """Test inf test data loader (e.g. IterableDataset)""" - class CurrentTestModel( - LightInfTestDataloader, - LightningTestModel, - LightTestFitSingleTestDataloadersMixin - ): - pass - - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) - - # fit model - with pytest.raises(MisconfigurationException): - trainer = Trainer( - default_root_dir=tmpdir, - max_epochs=1, - test_percent_check=0.5 - ) - trainer.test(model) + model = EvalModelTemplate(tutils.get_default_hparams()) + model.test_dataloader = model.test_dataloader__infinite # logger file to get meta trainer = Trainer( default_root_dir=tmpdir, - max_epochs=1 + max_epochs=1, + test_check_interval=check_interval, ) result = trainer.fit(model) - trainer.test(model) # verify training completed assert result == 1 @@ -397,14 +301,8 @@ class CurrentTestModel( def test_error_on_zero_len_dataloader(tmpdir): """ Test that error is raised if a zero-length dataloader is defined """ - class CurrentTestModel( - LightZeroLenDataloader, - LightningTestModel - ): - pass - - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) + model.train_dataloader = model.train_dataloader__zero_length # fit model with pytest.raises(ValueError): @@ -420,29 +318,22 @@ class CurrentTestModel( def test_warning_with_few_workers(tmpdir): """ Test that error is raised if dataloader with only a few workers is used """ - class CurrentTestModel( - LightTrainDataloader, - LightValStepFitSingleDataloaderMixin, - LightTestFitSingleTestDataloadersMixin, - LightEmptyTestStep, - TestModelBase, - ): - pass - - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) - - fit_options = dict(train_dataloader=model._dataloader(train=True), - val_dataloaders=model._dataloader(train=False)) - test_options = dict(test_dataloaders=model._dataloader(train=False)) + model = EvalModelTemplate(tutils.get_default_hparams()) - trainer = Trainer( + # logger file to get meta + trainer_options = dict( default_root_dir=tmpdir, max_epochs=1, val_percent_check=0.1, train_percent_check=0.2 ) + fit_options = dict(train_dataloader=model.dataloader(train=True), + val_dataloaders=model.dataloader(train=False)) + test_options = dict(test_dataloaders=model.dataloader(train=False)) + + trainer = Trainer(**trainer_options) + # fit model with pytest.warns(UserWarning, match='train'): trainer.fit(model, **fit_options) @@ -492,15 +383,21 @@ def test_batch_size_smaller_than_num_gpus(): num_gpus = 3 batch_size = 3 - class CurrentTestModel( - LightTrainDataloader, - TestModelBase, - ): + class CurrentTestModel(EvalModelTemplate): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + # batch norm doesn't work with batch size 1, we replace it self.c_d1_bn = torch.nn.ReLU() + def training_step(self, *args, **kwargs): + output = super().training_step(*args, **kwargs) + loss = output['loss'] + # we make sure to add some metrics to the output dict, + # this is essential for this test + output['progress_bar'] = {'train_loss': loss} + return output + def train_dataloader(self): dataloader = super().train_dataloader() # construct a dataset with a size that is not divisible by num_gpus @@ -520,6 +417,7 @@ def train_dataloader(self): trainer = Trainer( max_epochs=1, + val_percent_check=0, gpus=num_gpus, ) diff --git a/tests/trainer/test_lr_finder.py b/tests/trainer/test_lr_finder.py index ba6e9c336b130c..ce9d3d3b1b0f3d 100755 --- a/tests/trainer/test_lr_finder.py +++ b/tests/trainer/test_lr_finder.py @@ -4,25 +4,14 @@ import tests.base.utils as tutils from pytorch_lightning import Trainer from pytorch_lightning.utilities.exceptions import MisconfigurationException -from tests.base import ( - LightTrainDataloader, - TestModelBase, - LightTestMultipleOptimizersWithSchedulingMixin, -) +from tests.base import EvalModelTemplate def test_error_on_more_than_1_optimizer(tmpdir): """ Check that error is thrown when more than 1 optimizer is passed """ - class CurrentTestModel( - LightTestMultipleOptimizersWithSchedulingMixin, - LightTrainDataloader, - TestModelBase, - ): - pass - - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) + model.configure_optimizers = model.configure_optimizers__multiple_schedulers # logger file to get meta trainer = Trainer( @@ -37,14 +26,7 @@ class CurrentTestModel( def test_model_reset_correctly(tmpdir): """ Check that model weights are correctly reset after lr_find() """ - class CurrentTestModel( - LightTrainDataloader, - TestModelBase, - ): - pass - - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) # logger file to get meta trainer = Trainer( @@ -66,14 +48,7 @@ class CurrentTestModel( def test_trainer_reset_correctly(tmpdir): """ Check that all trainer parameters are reset correctly after lr_find() """ - class CurrentTestModel( - LightTrainDataloader, - TestModelBase, - ): - pass - - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) # logger file to get meta trainer = Trainer( @@ -82,8 +57,8 @@ class CurrentTestModel( ) changed_attributes = ['callbacks', 'logger', 'max_steps', 'auto_lr_find', - 'progress_bar_refresh_rate', - 'accumulate_grad_batches', + 'progress_bar_refresh_rate', 'early_stop_callback', + 'accumulate_grad_batches', 'enable_early_stop', 'checkpoint_callback'] attributes_before = {} for ca in changed_attributes: @@ -102,15 +77,10 @@ class CurrentTestModel( def test_trainer_arg_bool(tmpdir): - class CurrentTestModel( - LightTrainDataloader, - TestModelBase, - ): - pass - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) + model = EvalModelTemplate(hparams) before_lr = hparams.learning_rate + # logger file to get meta trainer = Trainer( default_save_path=tmpdir, @@ -126,15 +96,10 @@ class CurrentTestModel( def test_trainer_arg_str(tmpdir): - class CurrentTestModel( - LightTrainDataloader, - TestModelBase, - ): - pass - hparams = tutils.get_default_hparams() hparams.__dict__['my_fancy_lr'] = 1.0 # update with non-standard field - model = CurrentTestModel(hparams) + model = EvalModelTemplate(hparams) + before_lr = hparams.my_fancy_lr # logger file to get meta trainer = Trainer( @@ -151,14 +116,9 @@ class CurrentTestModel( def test_call_to_trainer_method(tmpdir): - class CurrentTestModel( - LightTrainDataloader, - TestModelBase, - ): - pass - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) + model = EvalModelTemplate(hparams) + before_lr = hparams.learning_rate # logger file to get meta trainer = Trainer( diff --git a/tests/trainer/test_optimizers.py b/tests/trainer/test_optimizers.py index be0ac5471d24c7..665ba3cdfbc69c 100644 --- a/tests/trainer/test_optimizers.py +++ b/tests/trainer/test_optimizers.py @@ -3,30 +3,15 @@ import tests.base.utils as tutils from pytorch_lightning import Trainer -from tests.base import ( - TestModelBase, - LightTrainDataloader, - LightValidationStepMixin, - LightValidationMixin, - LightTestOptimizerWithSchedulingMixin, - LightTestMultipleOptimizersWithSchedulingMixin, - LightTestOptimizersWithMixedSchedulingMixin, - LightTestReduceLROnPlateauMixin, - LightTestNoneOptimizerMixin, EvalModelTemplate -) +from tests.base import EvalModelTemplate def test_optimizer_with_scheduling(tmpdir): """ Verify that learning rate scheduling is working """ - class CurrentTestModel( - LightTestOptimizerWithSchedulingMixin, - LightTrainDataloader, - TestModelBase): - pass - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) + model = EvalModelTemplate(hparams) + model.configure_optimizers = model.configure_optimizers__single_scheduler # fit model trainer = Trainer( @@ -36,6 +21,7 @@ class CurrentTestModel( train_percent_check=0.2 ) results = trainer.fit(model) + assert results == 1 init_lr = hparams.learning_rate adjusted_lr = [pg['lr'] for pg in trainer.optimizers[0].param_groups] @@ -54,14 +40,9 @@ class CurrentTestModel( def test_multi_optimizer_with_scheduling(tmpdir): """ Verify that learning rate scheduling is working """ - class CurrentTestModel( - LightTestMultipleOptimizersWithSchedulingMixin, - LightTrainDataloader, - TestModelBase): - pass - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) + model = EvalModelTemplate(hparams) + model.configure_optimizers = model.configure_optimizers__multiple_schedulers # fit model trainer = Trainer( @@ -71,6 +52,7 @@ class CurrentTestModel( train_percent_check=0.2 ) results = trainer.fit(model) + assert results == 1 init_lr = hparams.learning_rate adjusted_lr1 = [pg['lr'] for pg in trainer.optimizers[0].param_groups] @@ -93,14 +75,9 @@ class CurrentTestModel( def test_multi_optimizer_with_scheduling_stepping(tmpdir): - class CurrentTestModel( - LightTestOptimizersWithMixedSchedulingMixin, - LightTrainDataloader, - TestModelBase): - pass - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) + model = EvalModelTemplate(hparams) + model.configure_optimizers = model.configure_optimizers__multiple_schedulers # fit model trainer = Trainer( @@ -110,6 +87,7 @@ class CurrentTestModel( train_percent_check=0.2 ) results = trainer.fit(model) + assert results == 1 init_lr = hparams.learning_rate adjusted_lr1 = [pg['lr'] for pg in trainer.optimizers[0].param_groups] @@ -127,7 +105,7 @@ class CurrentTestModel( adjusted_lr2 = adjusted_lr2[0] # Called ones after end of epoch - assert init_lr * 0.1 ** 0 == adjusted_lr1, \ + assert init_lr * 0.1 ** 1 == adjusted_lr1, \ 'lr for optimizer 1 not adjusted correctly' # Called every 3 steps, meaning for 1 epoch of 11 batches, it is called 3 times assert init_lr * 0.1 == adjusted_lr2, \ @@ -136,16 +114,9 @@ class CurrentTestModel( def test_reduce_lr_on_plateau_scheduling(tmpdir): - class CurrentTestModel( - LightTestReduceLROnPlateauMixin, - LightTrainDataloader, - LightValidationMixin, - LightValidationStepMixin, - TestModelBase): - pass - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) + model = EvalModelTemplate(hparams) + model.configure_optimizers = model.configure_optimizers__reduce_lr_on_plateau # fit model trainer = Trainer( @@ -155,7 +126,7 @@ class CurrentTestModel( train_percent_check=0.2 ) results = trainer.fit(model) - assert results + assert results == 1 assert trainer.lr_schedulers[0] == \ dict(scheduler=trainer.lr_schedulers[0]['scheduler'], monitor='val_loss', @@ -233,14 +204,9 @@ def test_none_optimizer_warning(): def test_none_optimizer(tmpdir): - class CurrentTestModel( - LightTestNoneOptimizerMixin, - LightTrainDataloader, - TestModelBase): - pass - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) + model = EvalModelTemplate(hparams) + model.configure_optimizers = model.configure_optimizers__empty # fit model trainer = Trainer( @@ -256,11 +222,9 @@ class CurrentTestModel( def test_configure_optimizer_from_dict(tmpdir): - """Tests if `configure_optimizer` method could return a dictionary with - `optimizer` field only. - """ + """Tests if `configure_optimizer` method could return a dictionary with `optimizer` field only.""" - class CurrentTestModel(LightTrainDataloader, TestModelBase): + class CurrentModel(EvalModelTemplate): def configure_optimizers(self): config = { 'optimizer': torch.optim.SGD(params=self.parameters(), lr=1e-03) @@ -268,7 +232,7 @@ def configure_optimizers(self): return config hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) + model = CurrentModel(hparams) # fit model trainer = Trainer(default_save_path=tmpdir, max_epochs=1) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index d72c04ed80e365..8ab722d8886b28 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -8,34 +8,24 @@ import torch import tests.base.utils as tutils -from pytorch_lightning import Callback +from pytorch_lightning import Callback, LightningModule from pytorch_lightning import Trainer from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint from pytorch_lightning.core.lightning import load_hparams_from_tags_csv from pytorch_lightning.trainer.logging import TrainerLoggingMixin from pytorch_lightning.utilities.exceptions import MisconfigurationException -from tests.base import ( - TestModelBase, - DictHparamsModel, - LightningTestModel, - LightEmptyTestStep, - LightValidationStepMixin, - LightValidationMultipleDataloadersMixin, - LightTrainDataloader, - LightTestDataloader, - LightValidationMixin, EvalModelTemplate, -) +from tests.base import EvalModelTemplate def test_model_pickle(tmpdir): import pickle - model = TestModelBase(tutils.get_default_hparams()) + model = EvalModelTemplate(tutils.get_default_hparams()) pickle.dumps(model) def test_hparams_save_load(tmpdir): - model = DictHparamsModel({'in_features': 28 * 28, 'out_features': 10, 'failed_key': lambda x: x}) + model = EvalModelTemplate(vars(tutils.get_default_hparams())) trainer = Trainer( default_root_dir=tmpdir, @@ -48,19 +38,15 @@ def test_hparams_save_load(tmpdir): # try to load the model now pretrained_model = tutils.load_model_from_checkpoint( trainer.checkpoint_callback.dirpath, - module_class=DictHparamsModel + module_class=EvalModelTemplate ) + assert pretrained_model def test_no_val_module(tmpdir): """Tests use case where trainer saves the model, and user loads it from tags independently.""" - hparams = tutils.get_default_hparams() - - class CurrentTestModel(LightTrainDataloader, TestModelBase): - pass - - model = CurrentTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) # logger file to get meta logger = tutils.get_default_logger(tmpdir) @@ -84,7 +70,7 @@ class CurrentTestModel(LightTrainDataloader, TestModelBase): assert 'hparams' in ckpt.keys(), 'hparams missing from checkpoints' # won't load without hparams in the ckpt - model_2 = LightningTestModel.load_from_checkpoint( + model_2 = EvalModelTemplate.load_from_checkpoint( checkpoint_path=new_weights_path, ) model_2.eval() @@ -93,11 +79,7 @@ class CurrentTestModel(LightTrainDataloader, TestModelBase): def test_no_val_end_module(tmpdir): """Tests use case where trainer saves the model, and user loads it from tags independently.""" - class CurrentTestModel(LightTrainDataloader, LightValidationStepMixin, TestModelBase): - pass - - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) # logger file to get meta logger = tutils.get_default_logger(tmpdir) @@ -120,7 +102,7 @@ class CurrentTestModel(LightTrainDataloader, LightValidationStepMixin, TestModel # load new model tags_path = tutils.get_data_path(logger, path_dir=tmpdir) tags_path = os.path.join(tags_path, 'meta_tags.csv') - model_2 = LightningTestModel.load_from_checkpoint( + model_2 = EvalModelTemplate.load_from_checkpoint( checkpoint_path=new_weights_path, tags_csv=tags_path ) @@ -185,8 +167,7 @@ def _optimizer_step(self, epoch, batch_idx, optimizer, # clear gradients optimizer.zero_grad() - hparams = tutils.get_default_hparams() - model = LightningTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) schedule = {1: 2, 3: 4} trainer = Trainer(accumulate_grad_batches=schedule, @@ -260,9 +241,6 @@ def test_model_checkpoint_options(tmpdir, save_top_k, file_prefix, expected_file def mock_save_function(filepath): open(filepath, 'a').close() - hparams = tutils.get_default_hparams() - _ = LightningTestModel(hparams) - # simulated losses losses = [10, 9, 2.8, 5, 2.5] @@ -288,8 +266,7 @@ def mock_save_function(filepath): def test_model_freeze_unfreeze(): - hparams = tutils.get_default_hparams() - model = LightningTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) model.freeze() model.unfreeze() @@ -302,7 +279,7 @@ def test_resume_from_checkpoint_epoch_restored(tmpdir): def _new_model(): # Create a model that tracks epochs and batches seen - model = LightningTestModel(hparams) + model = EvalModelTemplate(hparams) model.num_epochs_seen = 0 model.num_batches_seen = 0 model.num_on_load_checkpoint_called = 0 @@ -452,15 +429,8 @@ def test_trainer_min_steps_and_epochs(tmpdir): def test_benchmark_option(tmpdir): """Verify benchmark option.""" - class CurrentTestModel( - LightValidationMultipleDataloadersMixin, - LightTrainDataloader, - TestModelBase - ): - pass - - hparams = tutils.get_default_hparams() - model = CurrentTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) + model.val_dataloader = model.val_dataloader__multiple # verify torch.backends.cudnn.benchmark is not turned on assert not torch.backends.cudnn.benchmark @@ -481,40 +451,34 @@ class CurrentTestModel( def test_testpass_overrides(tmpdir): + # todo: check duplicated tests against trainer_checks hparams = tutils.get_default_hparams() - class LocalModel(LightTrainDataloader, TestModelBase): - pass - - class LocalModelNoEnd(LightTrainDataloader, LightTestDataloader, LightEmptyTestStep, TestModelBase): - pass - - class LocalModelNoStep(LightTrainDataloader, TestModelBase): - def test_epoch_end(self, outputs): - return {} - # Misconfig when neither test_step or test_end is implemented - with pytest.raises(MisconfigurationException): - model = LocalModel(hparams) + with pytest.raises(MisconfigurationException, match='.*not implement `test_dataloader`.*'): + model = EvalModelTemplate(hparams) + model.test_dataloader = model.test_dataloader__empty Trainer().test(model) # Misconfig when neither test_step or test_end is implemented with pytest.raises(MisconfigurationException): - model = LocalModelNoStep(hparams) + model = EvalModelTemplate(hparams) + model.test_step = LightningModule.test_step Trainer().test(model) # No exceptions when one or both of test_step or test_end are implemented - model = LocalModelNoEnd(hparams) + model = EvalModelTemplate(hparams) + model.test_step_end = LightningModule.test_step_end Trainer().test(model) - model = LightningTestModel(hparams) + model = EvalModelTemplate(hparams) Trainer().test(model) def test_disabled_validation(): """Verify that `val_percent_check=0` disables the validation loop unless `fast_dev_run=True`.""" - class CurrentModel(LightTrainDataloader, LightValidationMixin, TestModelBase): + class CurrentModel(EvalModelTemplate): validation_step_invoked = False validation_epoch_end_invoked = False @@ -564,59 +528,56 @@ def validation_epoch_end(self, *args, **kwargs): def test_nan_loss_detection(tmpdir): - test_step = 8 - class InfLossModel(LightTrainDataloader, TestModelBase): + class CurrentModel(EvalModelTemplate): + test_batch_inf_loss = 8 - def training_step(self, batch, batch_idx): - output = super().training_step(batch, batch_idx) - if batch_idx == test_step: + def training_step(self, batch, batch_idx, optimizer_idx=None): + output = super().training_step(batch, batch_idx, optimizer_idx) + if batch_idx == self.test_batch_inf_loss: if isinstance(output, dict): output['loss'] *= torch.tensor(math.inf) # make loss infinite else: output /= 0 return output - hparams = tutils.get_default_hparams() - model = InfLossModel(hparams) + model = CurrentModel(tutils.get_default_hparams()) # fit model trainer = Trainer( default_root_dir=tmpdir, - max_steps=(test_step + 1), + max_steps=(model.test_batch_inf_loss + 1), terminate_on_nan=True ) with pytest.raises(ValueError, match=r'.*The loss returned in `training_step` is nan or inf.*'): trainer.fit(model) - assert trainer.global_step == test_step + assert trainer.global_step == model.test_step_inf_loss for param in model.parameters(): assert torch.isfinite(param).all() def test_nan_params_detection(tmpdir): - test_step = 8 - class NanParamModel(LightTrainDataloader, TestModelBase): + class CurrentModel(EvalModelTemplate): + test_batch_nan = 8 def on_after_backward(self): - if self.global_step == test_step: + if self.global_step == self.test_batch_nan: # simulate parameter that became nan torch.nn.init.constant_(self.c_d1.bias, math.nan) - hparams = tutils.get_default_hparams() - - model = NanParamModel(hparams) + model = CurrentModel(tutils.get_default_hparams()) trainer = Trainer( default_root_dir=tmpdir, - max_steps=(test_step + 1), + max_steps=(model.test_batch_nan + 1), terminate_on_nan=True ) with pytest.raises(ValueError, match=r'.*Detected nan and/or inf values in `c_d1.bias`.*'): trainer.fit(model) - assert trainer.global_step == test_step + assert trainer.global_step == model.test_batch_nan # after aborting the training loop, model still has nan-valued params params = torch.cat([param.view(-1) for param in model.parameters()]) @@ -626,7 +587,7 @@ def on_after_backward(self): def test_trainer_interrupted_flag(tmpdir): """Test the flag denoting that a user interrupted training.""" - model = DictHparamsModel({'in_features': 28 * 28, 'out_features': 10}) + model = EvalModelTemplate(tutils.get_default_hparams()) class InterruptCallback(Callback): def __init__(self): @@ -656,8 +617,7 @@ def test_gradient_clipping(tmpdir): Test gradient clipping """ - hparams = tutils.get_default_hparams() - model = LightningTestModel(hparams) + model = EvalModelTemplate(tutils.get_default_hparams()) # test that gradient is clipped correctly def _optimizer_step(*args, **kwargs): diff --git a/tests/trainer/test_trainer_tricks.py b/tests/trainer/test_trainer_tricks.py index 495237d8dc91c1..f378b9e3c261ff 100755 --- a/tests/trainer/test_trainer_tricks.py +++ b/tests/trainer/test_trainer_tricks.py @@ -12,7 +12,7 @@ def test_model_reset_correctly(tmpdir): - ''' Check that model weights are correctly reset after scaling batch size''' + """ Check that model weights are correctly reset after scaling batch size. """ tutils.reset_seed() class CurrentTestModel( @@ -42,7 +42,7 @@ class CurrentTestModel( def test_trainer_reset_correctly(tmpdir): - ''' Check that all trainer parameters are reset correctly after scaling batch size''' + """ Check that all trainer parameters are reset correctly after scaling batch size. """ tutils.reset_seed() class CurrentTestModel( @@ -79,7 +79,7 @@ class CurrentTestModel( def test_trainer_arg_bool(tmpdir): - ''' Check that trainer arg works with bool input ''' + """ Check that trainer arg works with bool input. """ tutils.reset_seed() class CurrentTestModel( @@ -131,7 +131,7 @@ class CurrentTestModel( def test_call_to_trainer_method(tmpdir): - ''' Test that calling the trainer method itself works ''' + """ Test that calling the trainer method itself works. """ tutils.reset_seed() class CurrentTestModel(