From ad00b975781e71570e94ef47347db875951e90d6 Mon Sep 17 00:00:00 2001 From: tchaton Date: Mon, 18 Jan 2021 19:45:25 +0000 Subject: [PATCH 01/28] add profiler --- pytorch_lightning/profiler/__init__.py | 9 +- pytorch_lightning/profiler/profilers.py | 104 ++++++++++++++++++ .../trainer/connectors/profiler_connector.py | 9 +- pytorch_lightning/trainer/evaluation_loop.py | 6 +- pytorch_lightning/trainer/training_loop.py | 5 +- pytorch_lightning/utilities/__init__.py | 1 + pytorch_lightning/utilities/imports.py | 1 + tests/trainer/test_trainer.py | 25 ++++- 8 files changed, 153 insertions(+), 7 deletions(-) diff --git a/pytorch_lightning/profiler/__init__.py b/pytorch_lightning/profiler/__init__.py index c9ea6eb8ebaf6..329dff1e64e78 100644 --- a/pytorch_lightning/profiler/__init__.py +++ b/pytorch_lightning/profiler/__init__.py @@ -116,11 +116,18 @@ def custom_processing_step(self, data): """ -from pytorch_lightning.profiler.profilers import AdvancedProfiler, BaseProfiler, PassThroughProfiler, SimpleProfiler +from pytorch_lightning.profiler.profilers import ( + AdvancedProfiler, + BaseProfiler, + PassThroughProfiler, + PytorchProfiler, + SimpleProfiler, +) __all__ = [ 'BaseProfiler', 'SimpleProfiler', 'AdvancedProfiler', 'PassThroughProfiler', + "PytorchProfiler", ] diff --git a/pytorch_lightning/profiler/profilers.py b/pytorch_lightning/profiler/profilers.py index 377ebfb7a51d5..3b44464bcdafa 100644 --- a/pytorch_lightning/profiler/profilers.py +++ b/pytorch_lightning/profiler/profilers.py @@ -25,9 +25,11 @@ from typing import Optional, Union import numpy as np +import torch from pytorch_lightning import _logger as log from pytorch_lightning.utilities.cloud_io import get_filesystem +from pytorch_lightning.utilities.exceptions import MisconfigurationException class BaseProfiler(ABC): @@ -282,3 +284,105 @@ def __del__(self): """Close profiler's stream.""" if self.output_file: self.output_file.close() + + +class PytorchProfiler(BaseProfiler): + """ + This profiler uses PyTorch's Autograd Profiler and let's you inspect the cost of + different operators inside your model - both on the CPU and GPU + """ + + PROFILED_FUNCTIONS = ["training_step", "validation_step", "test_step"] + + def __init__(self, output_filename: Optional[str] = None, + enabled=True, + use_cuda=False, + record_shapes=True, + profile_memory=True, + with_stack=True, + sort_by_key: str = "self_cuda_memory_usage"): + """ + Args: + output_filename: optionally save profile results to file instead of printing + to std out when training is finished. + line_count_restriction: this can be used to limit the number of functions + reported for each action. either an integer (to select a count of lines), + or a decimal fraction between 0.0 and 1.0 inclusive (to select a percentage of lines) + """ + self.profiled_actions = {} + self.enabled = enabled + self.use_cuda = use_cuda + self.record_shapes = record_shapes + self.profile_memory = profile_memory + self.with_stack = with_stack + self.sort_by_key = sort_by_key + if self.sort_by_key not in self.available_sort_by_keys: + raise MisconfigurationException( + f"Found sort_by_key: {sort_by_key}. Should be within {self.available_sort_by_keys}. ") + + self.output_fname = output_filename + self.output_file = None + if self.output_fname: + fs = get_filesystem(self.output_fname) + self.output_file = fs.open(self.output_fname, "w") + + streaming_out = [self.output_file.write] if self.output_file else [log.info] + super().__init__(output_streams=streaming_out) + + def start(self, action_name: str) -> None: + if action_name not in self.profiled_actions and action_name in self.PROFILED_FUNCTIONS: + self.profiled_actions[action_name] = torch.autograd.profiler.profile( + enabled=self.enabled, + use_cuda=self.use_cuda, + record_shapes=self.record_shapes, + profile_memory=self.profile_memory).__enter__() + + def stop(self, action_name: str) -> None: + if action_name in self.PROFILED_FUNCTIONS: + pr = self.profiled_actions.get(action_name) + if pr is None: + raise ValueError( # pragma: no-cover + f"Attempting to stop recording an action ({action_name}) which was never started." + ) + # todo: Find a better solution + try: + _ = pr.__exit__(None, None, None) + except RuntimeError as e: + if "Expected debug info of type 2" in str(e): + pass + else: + raise RuntimeError(str(e)) + + def summary(self) -> str: + recorded_stats = {} + for action_name, pr in self.profiled_actions.items(): + table = self.profiled_actions[action_name].key_averages().table(sort_by=self.sort_by_key) + recorded_stats[action_name] = table + + # log to standard out + output_string = f"{os.linesep}Profiler Report{os.linesep}" + for action, stats in recorded_stats.items(): + output_string += ( + f"{os.linesep}Profile stats for: {action}{os.linesep}{stats}" + ) + + return output_string + + def describe(self): + """Logs a profile report after the conclusion of the training run.""" + super().describe() + if self.output_file: + self.output_file.flush() + + def __del__(self): + """Close profiler's stream.""" + if self.output_file: + self.output_file.close() + + @property + def available_sort_by_keys(self): + return [ + "cpu_time", "cuda_time", "cpu_time_total", + "cuda_time_total", "cpu_memory_usage", "cuda_memory_usage", + "self_cpu_memory_usage", "self_cuda_memory_usage", "count" + ] diff --git a/pytorch_lightning/trainer/connectors/profiler_connector.py b/pytorch_lightning/trainer/connectors/profiler_connector.py index 3ecc168231b38..e2992a82bbcf2 100644 --- a/pytorch_lightning/trainer/connectors/profiler_connector.py +++ b/pytorch_lightning/trainer/connectors/profiler_connector.py @@ -14,13 +14,20 @@ from typing import Union -from pytorch_lightning.profiler import BaseProfiler, PassThroughProfiler, SimpleProfiler, AdvancedProfiler +from pytorch_lightning.profiler import ( + AdvancedProfiler, + BaseProfiler, + PassThroughProfiler, + PytorchProfiler, + SimpleProfiler, +) from pytorch_lightning.utilities import rank_zero_warn from pytorch_lightning.utilities.exceptions import MisconfigurationException PROFILERS = { "simple": SimpleProfiler, "advanced": AdvancedProfiler, + "pytorch": PytorchProfiler, } diff --git a/pytorch_lightning/trainer/evaluation_loop.py b/pytorch_lightning/trainer/evaluation_loop.py index a8fa9f43684ca..1a334149d9292 100644 --- a/pytorch_lightning/trainer/evaluation_loop.py +++ b/pytorch_lightning/trainer/evaluation_loop.py @@ -171,10 +171,12 @@ def evaluation_step(self, test_mode, batch, batch_idx, dataloader_idx): # run actual test step if self.testing: model_ref._current_fx_name = "test_step" - output = self.trainer.accelerator_backend.test_step(args) + with self.trainer.profiler.profile("test_step"): + output = self.trainer.accelerator_backend.test_step(args) else: model_ref._current_fx_name = "validation_step" - output = self.trainer.accelerator_backend.validation_step(args) + with self.trainer.profiler.profile("validation_step"): + output = self.trainer.accelerator_backend.validation_step(args) # capture any logged information self.trainer.logger_connector.cache_logged_metrics() diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py index 6b49dc63f52b4..1b07634908a7e 100644 --- a/pytorch_lightning/trainer/training_loop.py +++ b/pytorch_lightning/trainer/training_loop.py @@ -24,7 +24,7 @@ from pytorch_lightning.core.step_result import EvalResult, Result from pytorch_lightning.trainer.states import TrainerState from pytorch_lightning.trainer.supporters import Accumulator, TensorRunningAccum -from pytorch_lightning.utilities import _TPU_AVAILABLE, AMPType, parsing, DeviceType +from pytorch_lightning.utilities import _TPU_AVAILABLE, AMPType, DeviceType, parsing from pytorch_lightning.utilities.distributed import rank_zero_info, rank_zero_warn from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.memory import recursive_detach @@ -339,7 +339,8 @@ def training_step(self, split_batch, batch_idx, opt_idx, hiddens): # manually capture logged metrics model_ref._current_fx_name = 'training_step' model_ref._results = Result() - training_step_output = self.trainer.accelerator_backend.training_step(args) + with self.trainer.profiler.profile("training_step"): + training_step_output = self.trainer.accelerator_backend.training_step(args) self.trainer.logger_connector.cache_logged_metrics() self._check_training_step_output(training_step_output) diff --git a/pytorch_lightning/utilities/__init__.py b/pytorch_lightning/utilities/__init__.py index 0a5ed04eb72a3..4b48e6595be6e 100644 --- a/pytorch_lightning/utilities/__init__.py +++ b/pytorch_lightning/utilities/__init__.py @@ -34,6 +34,7 @@ _module_available, _NATIVE_AMP_AVAILABLE, _OMEGACONF_AVAILABLE, + _PYTORCH_GREATER_EQUAL_1_7_0, _RPC_AVAILABLE, _TORCHTEXT_AVAILABLE, _XLA_AVAILABLE, diff --git a/pytorch_lightning/utilities/imports.py b/pytorch_lightning/utilities/imports.py index acdebfbf239e4..775c683b92bef 100644 --- a/pytorch_lightning/utilities/imports.py +++ b/pytorch_lightning/utilities/imports.py @@ -53,3 +53,4 @@ def _module_available(module_path: str) -> bool: _GROUP_AVAILABLE = platform.system() != 'Windows' and _module_available('torch.distributed.group') _FAIRSCALE_PIPE_AVAILABLE = _FAIRSCALE_AVAILABLE and LooseVersion(torch.__version__) >= LooseVersion("1.6.0") _BOLTS_AVAILABLE = _module_available('pl_bolts') +_PYTORCH_GREATER_EQUAL_1_7_0 = LooseVersion(torch.__version__) >= LooseVersion("1.7.0") diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 97785d9e61a86..75170c89947c3 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -33,7 +33,7 @@ from pytorch_lightning.profiler.profilers import AdvancedProfiler, PassThroughProfiler, SimpleProfiler from pytorch_lightning.trainer.logging import TrainerLoggingMixin from pytorch_lightning.trainer.states import TrainerState -from pytorch_lightning.utilities import _NATIVE_AMP_AVAILABLE +from pytorch_lightning.utilities import _NATIVE_AMP_AVAILABLE, _PYTORCH_GREATER_EQUAL_1_7_0 from pytorch_lightning.utilities.cloud_io import load as pl_load from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.base import BoringModel, EvalModelTemplate @@ -1421,6 +1421,7 @@ def test_log_every_n_steps(log_metrics_mock, tmpdir, train_batches, max_steps, l ('simple', SimpleProfiler), ('Simple', SimpleProfiler), ('advanced', AdvancedProfiler), + ('pytorch', AdvancedProfiler), ]) def test_trainer_profiler_correct_args(profiler, expected): kwargs = {'profiler': profiler} if profiler is not None else {} @@ -1441,3 +1442,25 @@ def test_trainer_profiler_incorrect_arg_type(profiler): match=r"Only None, bool, str and subclasses of `BaseProfiler`" r" are valid values for `Trainer`'s `profiler` parameter. *"): Trainer(profiler=profiler) + + +# @pytest.mark.skipif(not _PYTORCH_GREATER_EQUAL_1_7_0, reason='test needs PyTorch 1.7+') +def test_pytorch_profiler(tmpdir): + class TestModel(BoringModel): + def training_step(self, batch, batch_idx): + output = self.layer(batch) + loss = self.loss(batch, output) + return {"loss": loss} + + model = TestModel() + + limit_train_batches = 2 + trainer = Trainer( + default_root_dir=tmpdir, + limit_train_batches=limit_train_batches, + limit_val_batches=2, + max_epochs=1, + profiler='pytorch' + ) + + trainer.fit(model) From cfae67b4253db7761ade48dec6dd14fd353117c8 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 18 Jan 2021 20:18:52 +0000 Subject: [PATCH 02/28] add profiler --- pytorch_lightning/profiler/profilers.py | 50 +++++++++++-------- .../trainer/connectors/profiler_connector.py | 6 ++- pytorch_lightning/utilities/__init__.py | 2 +- pytorch_lightning/utilities/imports.py | 2 +- .../test_train_loop_logging_1_0.py | 1 + tests/trainer/test_trainer.py | 4 +- 6 files changed, 39 insertions(+), 26 deletions(-) diff --git a/pytorch_lightning/profiler/profilers.py b/pytorch_lightning/profiler/profilers.py index 3b44464bcdafa..0a14aa27f8270 100644 --- a/pytorch_lightning/profiler/profilers.py +++ b/pytorch_lightning/profiler/profilers.py @@ -30,6 +30,7 @@ from pytorch_lightning import _logger as log from pytorch_lightning.utilities.cloud_io import get_filesystem from pytorch_lightning.utilities.exceptions import MisconfigurationException +from pytorch_lightning.utilities import rank_zero_only class BaseProfiler(ABC): @@ -294,28 +295,31 @@ class PytorchProfiler(BaseProfiler): PROFILED_FUNCTIONS = ["training_step", "validation_step", "test_step"] - def __init__(self, output_filename: Optional[str] = None, + def __init__(self, + output_filename: Optional[str] = None, enabled=True, - use_cuda=False, + use_cuda=True, record_shapes=True, profile_memory=True, - with_stack=True, + group_by_input_shape=True, sort_by_key: str = "self_cuda_memory_usage"): """ Args: output_filename: optionally save profile results to file instead of printing to std out when training is finished. - line_count_restriction: this can be used to limit the number of functions - reported for each action. either an integer (to select a count of lines), - or a decimal fraction between 0.0 and 1.0 inclusive (to select a percentage of lines) + enabled: Setting this to False makes this context manager a no-op. Default: True + use_cuda: Enables timing of CUDA events as well using the cudaEvent API. + Adds approximately 4us of overhead to each tensor operation. Default: True + record_shapes: If shapes recording is set, information about input dimensions will be collected. + profile_memory: Whether to report memory usage, default: True """ self.profiled_actions = {} self.enabled = enabled self.use_cuda = use_cuda self.record_shapes = record_shapes self.profile_memory = profile_memory - self.with_stack = with_stack self.sort_by_key = sort_by_key + self.group_by_input_shape = group_by_input_shape and record_shapes if self.sort_by_key not in self.available_sort_by_keys: raise MisconfigurationException( f"Found sort_by_key: {sort_by_key}. Should be within {self.available_sort_by_keys}. ") @@ -330,6 +334,8 @@ def __init__(self, output_filename: Optional[str] = None, super().__init__(output_streams=streaming_out) def start(self, action_name: str) -> None: + # PyTorch profiler doesn't seem to work with multiple processes + self.enabled = os.getenv("LOCAL_RANK", None) is None if action_name not in self.profiled_actions and action_name in self.PROFILED_FUNCTIONS: self.profiled_actions[action_name] = torch.autograd.profiler.profile( enabled=self.enabled, @@ -338,13 +344,14 @@ def start(self, action_name: str) -> None: profile_memory=self.profile_memory).__enter__() def stop(self, action_name: str) -> None: - if action_name in self.PROFILED_FUNCTIONS: + if action_name in self.PROFILED_FUNCTIONS and self.enabled: pr = self.profiled_actions.get(action_name) if pr is None: raise ValueError( # pragma: no-cover f"Attempting to stop recording an action ({action_name}) which was never started." ) - # todo: Find a better solution + + # todo: Find a better solution to exit context manager try: _ = pr.__exit__(None, None, None) except RuntimeError as e: @@ -355,18 +362,21 @@ def stop(self, action_name: str) -> None: def summary(self) -> str: recorded_stats = {} - for action_name, pr in self.profiled_actions.items(): - table = self.profiled_actions[action_name].key_averages().table(sort_by=self.sort_by_key) - recorded_stats[action_name] = table - - # log to standard out - output_string = f"{os.linesep}Profiler Report{os.linesep}" - for action, stats in recorded_stats.items(): - output_string += ( - f"{os.linesep}Profile stats for: {action}{os.linesep}{stats}" - ) + if self.enabled: + for action_name, pr in self.profiled_actions.items(): + table = self.profiled_actions[action_name].key_averages( + group_by_input_shape=self.group_by_input_shape).table(sort_by=self.sort_by_key) + recorded_stats[action_name] = table + + # log to standard out + output_string = f"{os.linesep}Profiler Report{os.linesep}" + for action, stats in recorded_stats.items(): + output_string += ( + f"{os.linesep}Profile stats for: {action}{os.linesep}{stats}" + ) - return output_string + return output_string + return '' def describe(self): """Logs a profile report after the conclusion of the training run.""" diff --git a/pytorch_lightning/trainer/connectors/profiler_connector.py b/pytorch_lightning/trainer/connectors/profiler_connector.py index e2992a82bbcf2..0c2dfec93715a 100644 --- a/pytorch_lightning/trainer/connectors/profiler_connector.py +++ b/pytorch_lightning/trainer/connectors/profiler_connector.py @@ -21,15 +21,17 @@ PytorchProfiler, SimpleProfiler, ) -from pytorch_lightning.utilities import rank_zero_warn +from pytorch_lightning.utilities import rank_zero_warn, _PYTORCH_GREATER_EQUAL_1_6_0 from pytorch_lightning.utilities.exceptions import MisconfigurationException PROFILERS = { "simple": SimpleProfiler, "advanced": AdvancedProfiler, - "pytorch": PytorchProfiler, } +if _PYTORCH_GREATER_EQUAL_1_6_0: + PROFILERS["pytorch"] = PytorchProfiler + class ProfilerConnector: diff --git a/pytorch_lightning/utilities/__init__.py b/pytorch_lightning/utilities/__init__.py index 4b48e6595be6e..fe21849d2962d 100644 --- a/pytorch_lightning/utilities/__init__.py +++ b/pytorch_lightning/utilities/__init__.py @@ -34,7 +34,7 @@ _module_available, _NATIVE_AMP_AVAILABLE, _OMEGACONF_AVAILABLE, - _PYTORCH_GREATER_EQUAL_1_7_0, + _PYTORCH_GREATER_EQUAL_1_6_0, _RPC_AVAILABLE, _TORCHTEXT_AVAILABLE, _XLA_AVAILABLE, diff --git a/pytorch_lightning/utilities/imports.py b/pytorch_lightning/utilities/imports.py index 775c683b92bef..09a3d36938bce 100644 --- a/pytorch_lightning/utilities/imports.py +++ b/pytorch_lightning/utilities/imports.py @@ -53,4 +53,4 @@ def _module_available(module_path: str) -> bool: _GROUP_AVAILABLE = platform.system() != 'Windows' and _module_available('torch.distributed.group') _FAIRSCALE_PIPE_AVAILABLE = _FAIRSCALE_AVAILABLE and LooseVersion(torch.__version__) >= LooseVersion("1.6.0") _BOLTS_AVAILABLE = _module_available('pl_bolts') -_PYTORCH_GREATER_EQUAL_1_7_0 = LooseVersion(torch.__version__) >= LooseVersion("1.7.0") +_PYTORCH_GREATER_EQUAL_1_6_0 = LooseVersion(torch.__version__) >= LooseVersion("1.6.0") diff --git a/tests/trainer/logging_process/test_train_loop_logging_1_0.py b/tests/trainer/logging_process/test_train_loop_logging_1_0.py index f418db2bd72a5..514bfb49ec79a 100644 --- a/tests/trainer/logging_process/test_train_loop_logging_1_0.py +++ b/tests/trainer/logging_process/test_train_loop_logging_1_0.py @@ -740,6 +740,7 @@ def validation_step(self, batch, batch_idx): weights_summary=None, accelerator="ddp", gpus=2, + profiler="pytorch" ) trainer.fit(model) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 75170c89947c3..7d28afc6b0093 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -33,7 +33,7 @@ from pytorch_lightning.profiler.profilers import AdvancedProfiler, PassThroughProfiler, SimpleProfiler from pytorch_lightning.trainer.logging import TrainerLoggingMixin from pytorch_lightning.trainer.states import TrainerState -from pytorch_lightning.utilities import _NATIVE_AMP_AVAILABLE, _PYTORCH_GREATER_EQUAL_1_7_0 +from pytorch_lightning.utilities import _NATIVE_AMP_AVAILABLE, _PYTORCH_GREATER_EQUAL_1_6_0 from pytorch_lightning.utilities.cloud_io import load as pl_load from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.base import BoringModel, EvalModelTemplate @@ -1444,7 +1444,7 @@ def test_trainer_profiler_incorrect_arg_type(profiler): Trainer(profiler=profiler) -# @pytest.mark.skipif(not _PYTORCH_GREATER_EQUAL_1_7_0, reason='test needs PyTorch 1.7+') +# @pytest.mark.skipif(not _PYTORCH_GREATER_EQUAL_1_6_0, reason='test needs PyTorch 1.7+') def test_pytorch_profiler(tmpdir): class TestModel(BoringModel): def training_step(self, batch, batch_idx): From 5931c18d0268995e0c523c8acf3b609f9883524e Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 19 Jan 2021 10:24:51 +0000 Subject: [PATCH 03/28] update --- pytorch_lightning/profiler/__init__.py | 65 +++++++++++ pytorch_lightning/profiler/profilers.py | 108 ++++++++++++++---- .../trainer/connectors/profiler_connector.py | 7 +- pytorch_lightning/utilities/__init__.py | 1 - pytorch_lightning/utilities/imports.py | 3 +- tests/trainer/test_trainer.py | 17 ++- 6 files changed, 164 insertions(+), 37 deletions(-) diff --git a/pytorch_lightning/profiler/__init__.py b/pytorch_lightning/profiler/__init__.py index 329dff1e64e78..e3339d65e80a6 100644 --- a/pytorch_lightning/profiler/__init__.py +++ b/pytorch_lightning/profiler/__init__.py @@ -97,6 +97,71 @@ method `profile()` which returns a context handler. Simply pass in the name of your action that you want to track and the profiler will record performance for code executed within this context. +.. code-block:: python + + from pytorch_lightning.profiler import Profiler, PassThroughProfiler + + class MyModel(LightningModule): + def __init__(self, profiler=None): + self.profiler = profiler or PassThroughProfiler() + + def custom_processing_step(self, data): + with profiler.profile('my_custom_action'): + # custom processing step + return data + + profiler = Profiler() + model = MyModel(profiler) + trainer = Trainer(profiler=profiler, max_epochs=1) + + +PyTorch Profiling +-------------------- + +Autograd includes a profiler that lets you inspect the cost of different operators inside your model - both on the CPU and GPU. + +.. _cProfiler: https://docs.python.org/3/library/profile.html#module-cProfile + +.. code-block:: python + + trainer = Trainer(..., profiler="advanced") + + or + + profiler = AdvancedProfiler() + trainer = Trainer(..., profiler=profiler) + +The profiler's results will be printed at the completion of a training `fit()`. This profiler +report can be quite long, so you can also specify an `output_filename` to save the report instead +of logging it to the output in your terminal. The output below shows the profiling for the action +`get_train_batch`. + +.. code-block:: python + + Profiler Report + + Profile stats for: get_train_batch + 4869394 function calls (4863767 primitive calls) in 18.893 seconds + Ordered by: cumulative time + List reduced from 76 to 10 due to restriction <10> + ncalls tottime percall cumtime percall filename:lineno(function) + 3752/1876 0.011 0.000 18.887 0.010 {built-in method builtins.next} + 1876 0.008 0.000 18.877 0.010 dataloader.py:344(__next__) + 1876 0.074 0.000 18.869 0.010 dataloader.py:383(_next_data) + 1875 0.012 0.000 18.721 0.010 fetch.py:42(fetch) + 1875 0.084 0.000 18.290 0.010 fetch.py:44() + 60000 1.759 0.000 18.206 0.000 mnist.py:80(__getitem__) + 60000 0.267 0.000 13.022 0.000 transforms.py:68(__call__) + 60000 0.182 0.000 7.020 0.000 transforms.py:93(__call__) + 60000 1.651 0.000 6.839 0.000 functional.py:42(to_tensor) + 60000 0.260 0.000 5.734 0.000 transforms.py:167(__call__) + +You can also reference this profiler in your LightningModule to profile specific actions of interest. +If you don't want to always have the profiler turned on, you can optionally pass a `PassThroughProfiler` +which will allow you to skip profiling without having to make any code changes. Each profiler has a +method `profile()` which returns a context handler. Simply pass in the name of your action that you want +to track and the profiler will record performance for code executed within this context. + .. code-block:: python from pytorch_lightning.profiler import Profiler, PassThroughProfiler diff --git a/pytorch_lightning/profiler/profilers.py b/pytorch_lightning/profiler/profilers.py index 0a14aa27f8270..6b0f68974ece5 100644 --- a/pytorch_lightning/profiler/profilers.py +++ b/pytorch_lightning/profiler/profilers.py @@ -16,6 +16,7 @@ import cProfile import io +import inspect import os import pstats import time @@ -299,10 +300,16 @@ def __init__(self, output_filename: Optional[str] = None, enabled=True, use_cuda=True, - record_shapes=True, - profile_memory=True, + record_shapes=False, + profile_memory=False, group_by_input_shape=True, - sort_by_key: str = "self_cuda_memory_usage"): + with_stack=True, + use_kineto=False, + use_cpu = True, + emit_nvtx=False, + export_to_chrome=False, + path_to_export_trace=None, + sort_by_key: str = "cpu_time_total"): """ Args: output_filename: optionally save profile results to file instead of printing @@ -311,15 +318,32 @@ def __init__(self, use_cuda: Enables timing of CUDA events as well using the cudaEvent API. Adds approximately 4us of overhead to each tensor operation. Default: True record_shapes: If shapes recording is set, information about input dimensions will be collected. - profile_memory: Whether to report memory usage, default: True - """ + profile_memory: Whether to report memory usage, default: True (1.6.0) + with_stack: record source information (file and line number) for the ops (1.7.0) + use_kineto: experimental support for Kineto profiler (1.8.0) + use_cpu: use_kineto=True and can be used to lower the overhead for GPU-only profiling (1.8.0) + emit_nvtx: Context manager that makes every autograd operation emit an NVTX range + * Run: nvprof --profile-from-start off -o trace_name.prof -- + To visualize, you can either use: + * nvvp trace_name.prof + * torch.autograd.profiler.load_nvprof(path) + export_to_chrome: Wether to export the sequence of profiled operators for Chrome. + sort_by_key: Keys to sort out profiled table + path_to_export_trace: Path to exported traces. By default, it will be save where the file being is being run. + """ self.profiled_actions = {} self.enabled = enabled self.use_cuda = use_cuda self.record_shapes = record_shapes self.profile_memory = profile_memory self.sort_by_key = sort_by_key + self.with_stack = with_stack self.group_by_input_shape = group_by_input_shape and record_shapes + self.use_kineto = use_kineto + self.use_cpu = use_cpu + self.emit_nvtx = emit_nvtx + self.export_to_chrome = export_to_chrome + self.path_to_export_trace = path_to_export_trace if self.sort_by_key not in self.available_sort_by_keys: raise MisconfigurationException( f"Found sort_by_key: {sort_by_key}. Should be within {self.available_sort_by_keys}. ") @@ -334,39 +358,73 @@ def __init__(self, super().__init__(output_streams=streaming_out) def start(self, action_name: str) -> None: - # PyTorch profiler doesn't seem to work with multiple processes + # PyTorch Profiler doesn't seem to work with multiple processes + # Disable Profiler. self.enabled = os.getenv("LOCAL_RANK", None) is None if action_name not in self.profiled_actions and action_name in self.PROFILED_FUNCTIONS: - self.profiled_actions[action_name] = torch.autograd.profiler.profile( - enabled=self.enabled, - use_cuda=self.use_cuda, - record_shapes=self.record_shapes, - profile_memory=self.profile_memory).__enter__() + self.profiled_actions[action_name] = [] + if self.emit_nvtx: + self._create_profiler(action_name, torch.cuda.profiler.profile, enter=False) + # warmup + x = torch.rand(100, 100, device='cuda') + temp = x * x + self._create_profiler(action_name, torch.autograd.profiler.emit_nvtx) + else: + self._create_profiler(action_name, torch.autograd.profiler.profile) + + def _create_profiler(self, action_name, profiler, enter=False): + init_args = inspect.signature(profiler.__init__).parameters + profiler_args = { + k: v for k, v in vars(self).items() if k in init_args + } + profiler = profiler(**profiler_args) + if enter: + profiler = profiler.__enter__() + self.profiled_actions[action_name].append(profiler) def stop(self, action_name: str) -> None: if action_name in self.PROFILED_FUNCTIONS and self.enabled: - pr = self.profiled_actions.get(action_name) - if pr is None: + profilers = self.profiled_actions.get(action_name) + if not profilers: raise ValueError( # pragma: no-cover f"Attempting to stop recording an action ({action_name}) which was never started." ) - - # todo: Find a better solution to exit context manager - try: - _ = pr.__exit__(None, None, None) - except RuntimeError as e: - if "Expected debug info of type 2" in str(e): - pass - else: - raise RuntimeError(str(e)) + else: + for pr in profilers[::-1]: + self._handle_exit(pr) + + def _handle_exit(self, pr): + # todo: Find a better solution to exit context manager + if pr is None: + return + try: + _ = pr.__exit__(None, None, None) + except RuntimeError as e: + if "Expected debug info of type 2" in str(e): + pass + elif "can't disable profiler when it's not running" in str(e): + pass + elif "generator didn't stop" in str(e): + pass + else: + raise RuntimeError(str(e)) def summary(self) -> str: recorded_stats = {} if self.enabled: for action_name, pr in self.profiled_actions.items(): - table = self.profiled_actions[action_name].key_averages( - group_by_input_shape=self.group_by_input_shape).table(sort_by=self.sort_by_key) - recorded_stats[action_name] = table + pr = pr[-1] + if self.export_to_chrome: + filename = f"{action_name}_trace.json" + path_to_trace = filename if self.path_to_export_trace is None \ + else os.path.join(self.path_to_export_trace, filename) + pr.export_chrome_trace(path_to_trace) + if self.emit_nvtx: + return "" + else: + table = pr.key_averages( + group_by_input_shape=self.group_by_input_shape).table(sort_by=self.sort_by_key) + recorded_stats[action_name] = table # log to standard out output_string = f"{os.linesep}Profiler Report{os.linesep}" diff --git a/pytorch_lightning/trainer/connectors/profiler_connector.py b/pytorch_lightning/trainer/connectors/profiler_connector.py index 0c2dfec93715a..b7f333626a152 100644 --- a/pytorch_lightning/trainer/connectors/profiler_connector.py +++ b/pytorch_lightning/trainer/connectors/profiler_connector.py @@ -21,18 +21,15 @@ PytorchProfiler, SimpleProfiler, ) -from pytorch_lightning.utilities import rank_zero_warn, _PYTORCH_GREATER_EQUAL_1_6_0 +from pytorch_lightning.utilities import rank_zero_warn from pytorch_lightning.utilities.exceptions import MisconfigurationException PROFILERS = { "simple": SimpleProfiler, "advanced": AdvancedProfiler, + "pytorch": PytorchProfiler } -if _PYTORCH_GREATER_EQUAL_1_6_0: - PROFILERS["pytorch"] = PytorchProfiler - - class ProfilerConnector: def __init__(self, trainer): diff --git a/pytorch_lightning/utilities/__init__.py b/pytorch_lightning/utilities/__init__.py index fe21849d2962d..0a5ed04eb72a3 100644 --- a/pytorch_lightning/utilities/__init__.py +++ b/pytorch_lightning/utilities/__init__.py @@ -34,7 +34,6 @@ _module_available, _NATIVE_AMP_AVAILABLE, _OMEGACONF_AVAILABLE, - _PYTORCH_GREATER_EQUAL_1_6_0, _RPC_AVAILABLE, _TORCHTEXT_AVAILABLE, _XLA_AVAILABLE, diff --git a/pytorch_lightning/utilities/imports.py b/pytorch_lightning/utilities/imports.py index 09a3d36938bce..19493f0619a79 100644 --- a/pytorch_lightning/utilities/imports.py +++ b/pytorch_lightning/utilities/imports.py @@ -52,5 +52,4 @@ def _module_available(module_path: str) -> bool: _RPC_AVAILABLE = platform.system() != 'Windows' and _module_available('torch.distributed.rpc') _GROUP_AVAILABLE = platform.system() != 'Windows' and _module_available('torch.distributed.group') _FAIRSCALE_PIPE_AVAILABLE = _FAIRSCALE_AVAILABLE and LooseVersion(torch.__version__) >= LooseVersion("1.6.0") -_BOLTS_AVAILABLE = _module_available('pl_bolts') -_PYTORCH_GREATER_EQUAL_1_6_0 = LooseVersion(torch.__version__) >= LooseVersion("1.6.0") +_BOLTS_AVAILABLE = _module_available('pl_bolts') \ No newline at end of file diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 7d28afc6b0093..ec8a9b3178a5d 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -30,10 +30,10 @@ from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint from pytorch_lightning.core.saving import load_hparams_from_tags_csv, load_hparams_from_yaml, save_hparams_to_tags_csv from pytorch_lightning.loggers import TensorBoardLogger -from pytorch_lightning.profiler.profilers import AdvancedProfiler, PassThroughProfiler, SimpleProfiler +from pytorch_lightning.profiler.profilers import AdvancedProfiler, PassThroughProfiler, PytorchProfiler, SimpleProfiler from pytorch_lightning.trainer.logging import TrainerLoggingMixin from pytorch_lightning.trainer.states import TrainerState -from pytorch_lightning.utilities import _NATIVE_AMP_AVAILABLE, _PYTORCH_GREATER_EQUAL_1_6_0 +from pytorch_lightning.utilities import _NATIVE_AMP_AVAILABLE from pytorch_lightning.utilities.cloud_io import load as pl_load from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.base import BoringModel, EvalModelTemplate @@ -1421,7 +1421,7 @@ def test_log_every_n_steps(log_metrics_mock, tmpdir, train_batches, max_steps, l ('simple', SimpleProfiler), ('Simple', SimpleProfiler), ('advanced', AdvancedProfiler), - ('pytorch', AdvancedProfiler), + ('pytorch', PytorchProfiler), ]) def test_trainer_profiler_correct_args(profiler, expected): kwargs = {'profiler': profiler} if profiler is not None else {} @@ -1444,7 +1444,6 @@ def test_trainer_profiler_incorrect_arg_type(profiler): Trainer(profiler=profiler) -# @pytest.mark.skipif(not _PYTORCH_GREATER_EQUAL_1_6_0, reason='test needs PyTorch 1.7+') def test_pytorch_profiler(tmpdir): class TestModel(BoringModel): def training_step(self, batch, batch_idx): @@ -1464,3 +1463,13 @@ def training_step(self, batch, batch_idx): ) trainer.fit(model) + + +def test_pytorch_profiler_2(tmpdir): + print(f'Version = {torch.__version__}') + + x = torch.rand(100, 100, device='cuda') + + with torch.cuda.profiler.profile(): + with torch.autograd.profiler.emit_nvtx(): + temp = x * x \ No newline at end of file From c85661ad0a61a74b353a04dcf258ac5e45093add Mon Sep 17 00:00:00 2001 From: tchaton Date: Tue, 19 Jan 2021 10:46:14 +0000 Subject: [PATCH 04/28] resolve flake8 --- pytorch_lightning/profiler/__init__.py | 9 +++--- pytorch_lightning/profiler/profilers.py | 32 ++++++++----------- .../trainer/connectors/profiler_connector.py | 1 + pytorch_lightning/utilities/imports.py | 2 +- tests/trainer/test_trainer.py | 10 ------ 5 files changed, 20 insertions(+), 34 deletions(-) diff --git a/pytorch_lightning/profiler/__init__.py b/pytorch_lightning/profiler/__init__.py index e3339d65e80a6..ff714248642d3 100644 --- a/pytorch_lightning/profiler/__init__.py +++ b/pytorch_lightning/profiler/__init__.py @@ -118,17 +118,16 @@ def custom_processing_step(self, data): PyTorch Profiling -------------------- -Autograd includes a profiler that lets you inspect the cost of different operators inside your model - both on the CPU and GPU. - -.. _cProfiler: https://docs.python.org/3/library/profile.html#module-cProfile +Autograd includes a profiler that lets you inspect the cost of different operators +inside your model - both on the CPU and GPU. .. code-block:: python - trainer = Trainer(..., profiler="advanced") + trainer = Trainer(..., profiler="pytorch") or - profiler = AdvancedProfiler() + profiler = PytorchProfiler() trainer = Trainer(..., profiler=profiler) The profiler's results will be printed at the completion of a training `fit()`. This profiler diff --git a/pytorch_lightning/profiler/profilers.py b/pytorch_lightning/profiler/profilers.py index 6b0f68974ece5..3f541e5144d09 100644 --- a/pytorch_lightning/profiler/profilers.py +++ b/pytorch_lightning/profiler/profilers.py @@ -15,8 +15,8 @@ """Profiler to check if there are any bottlenecks in your code.""" import cProfile -import io import inspect +import io import os import pstats import time @@ -31,7 +31,6 @@ from pytorch_lightning import _logger as log from pytorch_lightning.utilities.cloud_io import get_filesystem from pytorch_lightning.utilities.exceptions import MisconfigurationException -from pytorch_lightning.utilities import rank_zero_only class BaseProfiler(ABC): @@ -296,16 +295,16 @@ class PytorchProfiler(BaseProfiler): PROFILED_FUNCTIONS = ["training_step", "validation_step", "test_step"] - def __init__(self, + def __init__(self, output_filename: Optional[str] = None, enabled=True, - use_cuda=True, + use_cuda=False, record_shapes=False, profile_memory=False, group_by_input_shape=True, with_stack=True, use_kineto=False, - use_cpu = True, + use_cpu=True, emit_nvtx=False, export_to_chrome=False, path_to_export_trace=None, @@ -315,7 +314,7 @@ def __init__(self, output_filename: optionally save profile results to file instead of printing to std out when training is finished. enabled: Setting this to False makes this context manager a no-op. Default: True - use_cuda: Enables timing of CUDA events as well using the cudaEvent API. + use_cuda: Enables timing of CUDA events as well using the cudaEvent API. Adds approximately 4us of overhead to each tensor operation. Default: True record_shapes: If shapes recording is set, information about input dimensions will be collected. profile_memory: Whether to report memory usage, default: True (1.6.0) @@ -329,10 +328,13 @@ def __init__(self, * torch.autograd.profiler.load_nvprof(path) export_to_chrome: Wether to export the sequence of profiled operators for Chrome. sort_by_key: Keys to sort out profiled table - path_to_export_trace: Path to exported traces. By default, it will be save where the file being is being run. - """ + path_to_export_trace: Path to exported traces. By default, it will be save + where the file being is being run. + """ self.profiled_actions = {} - self.enabled = enabled + # PyTorch Profiler doesn't seem to work with multiple processes + enabled = enabled and os.getenv("LOCAL_RANK", None) is None + self.profiled_actions_enabled = {n: enabled for n in self.PROFILED_FUNCTIONS} self.use_cuda = use_cuda self.record_shapes = record_shapes self.profile_memory = profile_memory @@ -358,16 +360,11 @@ def __init__(self, super().__init__(output_streams=streaming_out) def start(self, action_name: str) -> None: - # PyTorch Profiler doesn't seem to work with multiple processes - # Disable Profiler. - self.enabled = os.getenv("LOCAL_RANK", None) is None if action_name not in self.profiled_actions and action_name in self.PROFILED_FUNCTIONS: + self.enabled = self.profiled_actions_enabled[action_name] self.profiled_actions[action_name] = [] if self.emit_nvtx: self._create_profiler(action_name, torch.cuda.profiler.profile, enter=False) - # warmup - x = torch.rand(100, 100, device='cuda') - temp = x * x self._create_profiler(action_name, torch.autograd.profiler.emit_nvtx) else: self._create_profiler(action_name, torch.autograd.profiler.profile) @@ -392,11 +389,10 @@ def stop(self, action_name: str) -> None: else: for pr in profilers[::-1]: self._handle_exit(pr) + self.profiled_actions_enabled[action_name] = True def _handle_exit(self, pr): # todo: Find a better solution to exit context manager - if pr is None: - return try: _ = pr.__exit__(None, None, None) except RuntimeError as e: @@ -407,7 +403,7 @@ def _handle_exit(self, pr): elif "generator didn't stop" in str(e): pass else: - raise RuntimeError(str(e)) + raise RuntimeError(str(e)) def summary(self) -> str: recorded_stats = {} diff --git a/pytorch_lightning/trainer/connectors/profiler_connector.py b/pytorch_lightning/trainer/connectors/profiler_connector.py index b7f333626a152..2daf0ae2b9e4a 100644 --- a/pytorch_lightning/trainer/connectors/profiler_connector.py +++ b/pytorch_lightning/trainer/connectors/profiler_connector.py @@ -30,6 +30,7 @@ "pytorch": PytorchProfiler } + class ProfilerConnector: def __init__(self, trainer): diff --git a/pytorch_lightning/utilities/imports.py b/pytorch_lightning/utilities/imports.py index 19493f0619a79..acdebfbf239e4 100644 --- a/pytorch_lightning/utilities/imports.py +++ b/pytorch_lightning/utilities/imports.py @@ -52,4 +52,4 @@ def _module_available(module_path: str) -> bool: _RPC_AVAILABLE = platform.system() != 'Windows' and _module_available('torch.distributed.rpc') _GROUP_AVAILABLE = platform.system() != 'Windows' and _module_available('torch.distributed.group') _FAIRSCALE_PIPE_AVAILABLE = _FAIRSCALE_AVAILABLE and LooseVersion(torch.__version__) >= LooseVersion("1.6.0") -_BOLTS_AVAILABLE = _module_available('pl_bolts') \ No newline at end of file +_BOLTS_AVAILABLE = _module_available('pl_bolts') diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index ec8a9b3178a5d..007285a0416ec 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -1463,13 +1463,3 @@ def training_step(self, batch, batch_idx): ) trainer.fit(model) - - -def test_pytorch_profiler_2(tmpdir): - print(f'Version = {torch.__version__}') - - x = torch.rand(100, 100, device='cuda') - - with torch.cuda.profiler.profile(): - with torch.autograd.profiler.emit_nvtx(): - temp = x * x \ No newline at end of file From 9a62eb84759e9368a20da660989a978181b63f51 Mon Sep 17 00:00:00 2001 From: tchaton Date: Tue, 19 Jan 2021 11:03:59 +0000 Subject: [PATCH 05/28] update doc --- pytorch_lightning/profiler/__init__.py | 89 +++++++++++++++---------- pytorch_lightning/profiler/profilers.py | 14 ++-- 2 files changed, 61 insertions(+), 42 deletions(-) diff --git a/pytorch_lightning/profiler/__init__.py b/pytorch_lightning/profiler/__init__.py index ff714248642d3..6d82889294eae 100644 --- a/pytorch_lightning/profiler/__init__.py +++ b/pytorch_lightning/profiler/__init__.py @@ -127,56 +127,75 @@ def custom_processing_step(self, data): or - profiler = PytorchProfiler() + profiler = PytorchProfiler( + output_filename = ... + enabled = ... + use_cuda = ... + record_shapes = ... + profile_memory = ... + with_stack = ... + use_kineto = ... + use_cpu = ... + emit_nvtx = ... + export_to_chrome = ... + sort_by_key = ... + path_to_export_trace = ... + ) trainer = Trainer(..., profiler=profiler) The profiler's results will be printed at the completion of a training `fit()`. This profiler report can be quite long, so you can also specify an `output_filename` to save the report instead of logging it to the output in your terminal. The output below shows the profiling for the action `get_train_batch`. +This profiler will record only for `training_step`, `evaluation_step` and `test_step` functions. .. code-block:: python Profiler Report - Profile stats for: get_train_batch - 4869394 function calls (4863767 primitive calls) in 18.893 seconds - Ordered by: cumulative time - List reduced from 76 to 10 due to restriction <10> - ncalls tottime percall cumtime percall filename:lineno(function) - 3752/1876 0.011 0.000 18.887 0.010 {built-in method builtins.next} - 1876 0.008 0.000 18.877 0.010 dataloader.py:344(__next__) - 1876 0.074 0.000 18.869 0.010 dataloader.py:383(_next_data) - 1875 0.012 0.000 18.721 0.010 fetch.py:42(fetch) - 1875 0.084 0.000 18.290 0.010 fetch.py:44() - 60000 1.759 0.000 18.206 0.000 mnist.py:80(__getitem__) - 60000 0.267 0.000 13.022 0.000 transforms.py:68(__call__) - 60000 0.182 0.000 7.020 0.000 transforms.py:93(__call__) - 60000 1.651 0.000 6.839 0.000 functional.py:42(to_tensor) - 60000 0.260 0.000 5.734 0.000 transforms.py:167(__call__) - -You can also reference this profiler in your LightningModule to profile specific actions of interest. -If you don't want to always have the profiler turned on, you can optionally pass a `PassThroughProfiler` -which will allow you to skip profiling without having to make any code changes. Each profiler has a -method `profile()` which returns a context handler. Simply pass in the name of your action that you want -to track and the profiler will record performance for code executed within this context. + Profile stats for: training_step + --------------------- --------------- --------------- --------------- --------------- --------------- + Name Self CPU total % Self CPU total CPU total % CPU total CPU time avg + --------------------- --------------- --------------- --------------- --------------- --------------- + t 62.10% 1.044ms 62.77% 1.055ms 1.055ms + addmm 32.32% 543.135us 32.69% 549.362us 549.362us + mse_loss 1.35% 22.657us 3.58% 60.105us 60.105us + mean 0.22% 3.694us 2.05% 34.523us 34.523us + div_ 0.64% 10.756us 1.90% 32.001us 16.000us + ones_like 0.21% 3.461us 0.81% 13.669us 13.669us + sum_out 0.45% 7.638us 0.74% 12.432us 12.432us + transpose 0.23% 3.786us 0.68% 11.393us 11.393us + as_strided 0.60% 10.060us 0.60% 10.060us 3.353us + to 0.18% 3.059us 0.44% 7.464us 7.464us + empty_like 0.14% 2.387us 0.41% 6.859us 6.859us + empty_strided 0.38% 6.351us 0.38% 6.351us 3.175us + fill_ 0.28% 4.782us 0.33% 5.566us 2.783us + expand 0.20% 3.336us 0.28% 4.743us 4.743us + empty 0.27% 4.456us 0.27% 4.456us 2.228us + copy_ 0.15% 2.526us 0.15% 2.526us 2.526us + broadcast_tensors 0.15% 2.492us 0.15% 2.492us 2.492us + size 0.06% 0.967us 0.06% 0.967us 0.484us + is_complex 0.06% 0.961us 0.06% 0.961us 0.481us + stride 0.03% 0.517us 0.03% 0.517us 0.517us + --------------------- --------------- --------------- --------------- --------------- --------------- + Self CPU time total: 1.681ms + +When running with `PytorchProfiler(emit_nvtx=True)`. You should run as following: + +nvprof --profile-from-start off -o trace_name.prof -- + +To visualize the profiled operation, you can either: + +* Use: nvvp trace_name.prof + +* Use: torch.autograd.profiler.load_nvprof(path) .. code-block:: python - from pytorch_lightning.profiler import Profiler, PassThroughProfiler - - class MyModel(LightningModule): - def __init__(self, profiler=None): - self.profiler = profiler or PassThroughProfiler() + >>> import torch + >>> torch.autograd.profiler.load_nvprof(".../trace_name.prof") + [] - def custom_processing_step(self, data): - with profiler.profile('my_custom_action'): - # custom processing step - return data - - profiler = Profiler() - model = MyModel(profiler) - trainer = Trainer(profiler=profiler, max_epochs=1) """ diff --git a/pytorch_lightning/profiler/profilers.py b/pytorch_lightning/profiler/profilers.py index 3f541e5144d09..c961d9c830df5 100644 --- a/pytorch_lightning/profiler/profilers.py +++ b/pytorch_lightning/profiler/profilers.py @@ -301,10 +301,10 @@ def __init__(self, use_cuda=False, record_shapes=False, profile_memory=False, - group_by_input_shape=True, - with_stack=True, + group_by_input_shape=False, + with_stack=False, use_kineto=False, - use_cpu=True, + use_cpu=False, emit_nvtx=False, export_to_chrome=False, path_to_export_trace=None, @@ -369,15 +369,15 @@ def start(self, action_name: str) -> None: else: self._create_profiler(action_name, torch.autograd.profiler.profile) - def _create_profiler(self, action_name, profiler, enter=False): + def _create_profiler(self, action_name, profiler, enter=True): init_args = inspect.signature(profiler.__init__).parameters profiler_args = { k: v for k, v in vars(self).items() if k in init_args } - profiler = profiler(**profiler_args) + pr = profiler(**profiler_args) if enter: - profiler = profiler.__enter__() - self.profiled_actions[action_name].append(profiler) + pr = pr.__enter__() + self.profiled_actions[action_name].append(pr) def stop(self, action_name: str) -> None: if action_name in self.PROFILED_FUNCTIONS and self.enabled: From 6f54b69d2960cb17a2460b36e5be8d52cd58a183 Mon Sep 17 00:00:00 2001 From: tchaton Date: Tue, 19 Jan 2021 11:04:49 +0000 Subject: [PATCH 06/28] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b56321765bbf0..47d7429a29c0d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,6 +57,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - `Recall` and `Precision` metrics (and their functional counterparts `recall` and `precision`) can now be generalized to Recall@K and Precision@K with the use of `top_k` parameter ([#4842](https://github.com/PyTorchLightning/pytorch-lightning/pull/4842)) +- Added `PytorchProfiler` ([#5560](https://github.com/PyTorchLightning/pytorch-lightning/pull/5560)) + ### Changed From 1bbe314dee11c396adf571ee991f64cc17ec58bd Mon Sep 17 00:00:00 2001 From: tchaton Date: Tue, 19 Jan 2021 11:23:05 +0000 Subject: [PATCH 07/28] clean doc --- pytorch_lightning/profiler/__init__.py | 9 +-------- trace_name.prof | 0 2 files changed, 1 insertion(+), 8 deletions(-) create mode 100644 trace_name.prof diff --git a/pytorch_lightning/profiler/__init__.py b/pytorch_lightning/profiler/__init__.py index 6d82889294eae..bc3c9c5bc29ed 100644 --- a/pytorch_lightning/profiler/__init__.py +++ b/pytorch_lightning/profiler/__init__.py @@ -188,14 +188,7 @@ def custom_processing_step(self, data): * Use: nvvp trace_name.prof -* Use: torch.autograd.profiler.load_nvprof(path) - -.. code-block:: python - - >>> import torch - >>> torch.autograd.profiler.load_nvprof(".../trace_name.prof") - [] - +* Use: python -c 'import torch; print(torch.autograd.profiler.load_nvprof("trace_name.prof"))' """ diff --git a/trace_name.prof b/trace_name.prof new file mode 100644 index 0000000000000..e69de29bb2d1d From bd035da941b0751307215bcb7d98d81a69ba3b4c Mon Sep 17 00:00:00 2001 From: tchaton Date: Tue, 19 Jan 2021 11:30:58 +0000 Subject: [PATCH 08/28] delete prof file --- .gitignore | 1 + trace_name.prof | 0 2 files changed, 1 insertion(+) delete mode 100644 trace_name.prof diff --git a/.gitignore b/.gitignore index 743fdaaf33dc2..237dbef370a2a 100644 --- a/.gitignore +++ b/.gitignore @@ -141,3 +141,4 @@ pytorch\ lightning test-reports/ wandb .forked/ +*.prof diff --git a/trace_name.prof b/trace_name.prof deleted file mode 100644 index e69de29bb2d1d..0000000000000 From e689cdabef902f997b99de5d534a1a143f03c0cc Mon Sep 17 00:00:00 2001 From: tchaton Date: Thu, 21 Jan 2021 19:00:31 +0000 Subject: [PATCH 09/28] merge pr codebase --- CHANGELOG.md | 2 +- pytorch_lightning/profiler/__init__.py | 25 +-- pytorch_lightning/profiler/profilers.py | 163 +++++++++++------- .../trainer/connectors/profiler_connector.py | 4 +- tests/trainer/test_trainer.py | 75 ++++++-- 5 files changed, 174 insertions(+), 95 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 47d7429a29c0d..537cc317dae3d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,7 +57,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - `Recall` and `Precision` metrics (and their functional counterparts `recall` and `precision`) can now be generalized to Recall@K and Precision@K with the use of `top_k` parameter ([#4842](https://github.com/PyTorchLightning/pytorch-lightning/pull/4842)) -- Added `PytorchProfiler` ([#5560](https://github.com/PyTorchLightning/pytorch-lightning/pull/5560)) +- Added `PyTorchProfiler` ([#5560](https://github.com/PyTorchLightning/pytorch-lightning/pull/5560)) ### Changed diff --git a/pytorch_lightning/profiler/__init__.py b/pytorch_lightning/profiler/__init__.py index bc3c9c5bc29ed..3f395fdcfd9aa 100644 --- a/pytorch_lightning/profiler/__init__.py +++ b/pytorch_lightning/profiler/__init__.py @@ -121,29 +121,18 @@ def custom_processing_step(self, data): Autograd includes a profiler that lets you inspect the cost of different operators inside your model - both on the CPU and GPU. +Find the Pytorch Profiler doc at [PyTorch Profiler](https://pytorch-lightning.readthedocs.io/en/stable/profiler.html) + .. code-block:: python trainer = Trainer(..., profiler="pytorch") or - profiler = PytorchProfiler( - output_filename = ... - enabled = ... - use_cuda = ... - record_shapes = ... - profile_memory = ... - with_stack = ... - use_kineto = ... - use_cpu = ... - emit_nvtx = ... - export_to_chrome = ... - sort_by_key = ... - path_to_export_trace = ... - ) + profiler = PyTorchProfiler(...) trainer = Trainer(..., profiler=profiler) -The profiler's results will be printed at the completion of a training `fit()`. This profiler +The profiler's results will be printed on the completion of a training `fit()`. This profiler report can be quite long, so you can also specify an `output_filename` to save the report instead of logging it to the output in your terminal. The output below shows the profiling for the action `get_train_batch`. @@ -180,7 +169,7 @@ def custom_processing_step(self, data): --------------------- --------------- --------------- --------------- --------------- --------------- Self CPU time total: 1.681ms -When running with `PytorchProfiler(emit_nvtx=True)`. You should run as following: +When running with `PyTorchProfiler(emit_nvtx=True)`. You should run as following: nvprof --profile-from-start off -o trace_name.prof -- @@ -196,7 +185,7 @@ def custom_processing_step(self, data): AdvancedProfiler, BaseProfiler, PassThroughProfiler, - PytorchProfiler, + PyTorchProfiler, SimpleProfiler, ) @@ -205,5 +194,5 @@ def custom_processing_step(self, data): 'SimpleProfiler', 'AdvancedProfiler', 'PassThroughProfiler', - "PytorchProfiler", + "PyTorchProfiler", ] diff --git a/pytorch_lightning/profiler/profilers.py b/pytorch_lightning/profiler/profilers.py index c961d9c830df5..6c2141db50330 100644 --- a/pytorch_lightning/profiler/profilers.py +++ b/pytorch_lightning/profiler/profilers.py @@ -287,62 +287,83 @@ def __del__(self): self.output_file.close() -class PytorchProfiler(BaseProfiler): +class PyTorchProfiler(BaseProfiler): """ - This profiler uses PyTorch's Autograd Profiler and let's you inspect the cost of + This profiler uses PyTorch's Autograd Profiler and lets you inspect the cost of different operators inside your model - both on the CPU and GPU """ - PROFILED_FUNCTIONS = ["training_step", "validation_step", "test_step"] + PROFILER_OVERHEAD_MAX_TOLERANCE = 0.0005 def __init__(self, output_filename: Optional[str] = None, - enabled=True, - use_cuda=False, - record_shapes=False, - profile_memory=False, - group_by_input_shape=False, - with_stack=False, - use_kineto=False, - use_cpu=False, - emit_nvtx=False, - export_to_chrome=False, - path_to_export_trace=None, - sort_by_key: str = "cpu_time_total"): + enabled: bool = True, + use_cuda: bool = False, + record_shapes: bool = False, + profile_memory: bool = False, + group_by_input_shapes: bool = False, + with_stack: bool = False, + use_kineto: bool = False, + use_cpu: bool = False, + emit_nvtx: bool = False, + export_to_chrome: bool = False, + path_to_export_trace: bool = None, + row_limit: int = 20, + sort_by_key: Optional[str] = None, + profiled_functions=["training_step_and_backward", "validation_step", "test_step"]): """ Args: + output_filename: optionally save profile results to file instead of printing to std out when training is finished. + enabled: Setting this to False makes this context manager a no-op. Default: True + use_cuda: Enables timing of CUDA events as well using the cudaEvent API. Adds approximately 4us of overhead to each tensor operation. Default: True + record_shapes: If shapes recording is set, information about input dimensions will be collected. + profile_memory: Whether to report memory usage, default: True (1.6.0) + with_stack: record source information (file and line number) for the ops (1.7.0) + use_kineto: experimental support for Kineto profiler (1.8.0) + use_cpu: use_kineto=True and can be used to lower the overhead for GPU-only profiling (1.8.0) + emit_nvtx: Context manager that makes every autograd operation emit an NVTX range * Run: nvprof --profile-from-start off -o trace_name.prof -- To visualize, you can either use: * nvvp trace_name.prof * torch.autograd.profiler.load_nvprof(path) + export_to_chrome: Wether to export the sequence of profiled operators for Chrome. - sort_by_key: Keys to sort out profiled table + path_to_export_trace: Path to exported traces. By default, it will be save where the file being is being run. + + row_limit: Limit the number of rows in a table, `0` is a special value that + removes the limit completely. + + sort_by_key: Keys to sort out profiled table + + profiled_functions: list of profiled functions which will create a context manager on. + Any other will be pass through. """ self.profiled_actions = {} # PyTorch Profiler doesn't seem to work with multiple processes - enabled = enabled and os.getenv("LOCAL_RANK", None) is None - self.profiled_actions_enabled = {n: enabled for n in self.PROFILED_FUNCTIONS} + self.enabled = enabled and os.getenv("LOCAL_RANK", None) is None + self.profiled_functions = profiled_functions self.use_cuda = use_cuda self.record_shapes = record_shapes self.profile_memory = profile_memory - self.sort_by_key = sort_by_key + self.sort_by_key = sort_by_key or ("cuda_time_total" if self.use_cuda else "cpu_time_total") self.with_stack = with_stack - self.group_by_input_shape = group_by_input_shape and record_shapes + self.group_by_input_shapes = group_by_input_shapes and record_shapes self.use_kineto = use_kineto self.use_cpu = use_cpu + self.row_limit = row_limit self.emit_nvtx = emit_nvtx self.export_to_chrome = export_to_chrome self.path_to_export_trace = path_to_export_trace @@ -350,8 +371,14 @@ def __init__(self, raise MisconfigurationException( f"Found sort_by_key: {sort_by_key}. Should be within {self.available_sort_by_keys}. ") + self.profiled_actions = {} + self.context_names = {} + self.running_stack = [] + self.profiler = None + self.output_fname = output_filename self.output_file = None + if self.output_fname: fs = get_filesystem(self.output_fname) self.output_file = fs.open(self.output_fname, "w") @@ -360,14 +387,22 @@ def __init__(self, super().__init__(output_streams=streaming_out) def start(self, action_name: str) -> None: - if action_name not in self.profiled_actions and action_name in self.PROFILED_FUNCTIONS: - self.enabled = self.profiled_actions_enabled[action_name] - self.profiled_actions[action_name] = [] - if self.emit_nvtx: - self._create_profiler(action_name, torch.cuda.profiler.profile, enter=False) - self._create_profiler(action_name, torch.autograd.profiler.emit_nvtx) - else: - self._create_profiler(action_name, torch.autograd.profiler.profile) + # stop the running profiler if any + if action_name in self.profiled_functions: + if len(self.running_stack) > 0: + self._stop(self.running_stack[-1]) + self.running_stack.append(action_name) + + self.context_names[action_name] = "/".join(self.running_stack) + + self._start(action_name) + + def _start(self, action_name: str) -> None: + if self.emit_nvtx: + self._create_profiler(action_name, torch.cuda.profiler.profile, enter=False) + self._create_profiler(action_name, torch.autograd.profiler.emit_nvtx) + else: + self._create_profiler(action_name, torch.autograd.profiler.profile) def _create_profiler(self, action_name, profiler, enter=True): init_args = inspect.signature(profiler.__init__).parameters @@ -377,49 +412,62 @@ def _create_profiler(self, action_name, profiler, enter=True): pr = profiler(**profiler_args) if enter: pr = pr.__enter__() - self.profiled_actions[action_name].append(pr) + self.profiler = pr + + def _stop(self, action_name: str) -> None: + if self.profiler is None: + return + self.profiler.__exit__( + exc_type=None, + exc_val=None, + exc_tb=None + ) + events = self.profiler.function_events + self.profiler = None + for name in self.running_stack: + if name not in self.profiled_actions: + self.profiled_actions[name] = events + else: + self.profiled_actions[name] += events def stop(self, action_name: str) -> None: - if action_name in self.PROFILED_FUNCTIONS and self.enabled: - profilers = self.profiled_actions.get(action_name) - if not profilers: + if action_name in self.profiled_functions: + if len(self.running_stack) == 0 or self.running_stack[-1] != action_name: raise ValueError( # pragma: no-cover f"Attempting to stop recording an action ({action_name}) which was never started." ) - else: - for pr in profilers[::-1]: - self._handle_exit(pr) - self.profiled_actions_enabled[action_name] = True - - def _handle_exit(self, pr): - # todo: Find a better solution to exit context manager - try: - _ = pr.__exit__(None, None, None) - except RuntimeError as e: - if "Expected debug info of type 2" in str(e): - pass - elif "can't disable profiler when it's not running" in str(e): - pass - elif "generator didn't stop" in str(e): - pass - else: - raise RuntimeError(str(e)) + self._stop(action_name) + self.running_stack.pop() + # restore running profiler + if len(self.running_stack) > 0: + self._start(self.running_stack[-1]) def summary(self) -> str: recorded_stats = {} + output_string = '' + if self.enabled: - for action_name, pr in self.profiled_actions.items(): - pr = pr[-1] + for action_name, events in self.profiled_actions.items(): + + # next line is a workaround for a pytorch issue (fixed on master, still present + # on 1.7). Without it the code fails with `AssertionError: There is already a CPU + # parent event for detach` + events.populate_cpu_children = lambda: None + if self.export_to_chrome: filename = f"{action_name}_trace.json" path_to_trace = filename if self.path_to_export_trace is None \ else os.path.join(self.path_to_export_trace, filename) - pr.export_chrome_trace(path_to_trace) + events.export_chrome_trace(path_to_trace) + if self.emit_nvtx: - return "" + return output_string + else: - table = pr.key_averages( - group_by_input_shape=self.group_by_input_shape).table(sort_by=self.sort_by_key) + table = events.key_averages( + group_by_input_shapes=self.group_by_input_shapes).table( + sort_by=self.sort_by_key, + row_limit=self.row_limit) recorded_stats[action_name] = table # log to standard out @@ -429,8 +477,7 @@ def summary(self) -> str: f"{os.linesep}Profile stats for: {action}{os.linesep}{stats}" ) - return output_string - return '' + return output_string def describe(self): """Logs a profile report after the conclusion of the training run.""" diff --git a/pytorch_lightning/trainer/connectors/profiler_connector.py b/pytorch_lightning/trainer/connectors/profiler_connector.py index 2daf0ae2b9e4a..d2e6ada35412e 100644 --- a/pytorch_lightning/trainer/connectors/profiler_connector.py +++ b/pytorch_lightning/trainer/connectors/profiler_connector.py @@ -18,7 +18,7 @@ AdvancedProfiler, BaseProfiler, PassThroughProfiler, - PytorchProfiler, + PyTorchProfiler, SimpleProfiler, ) from pytorch_lightning.utilities import rank_zero_warn @@ -27,7 +27,7 @@ PROFILERS = { "simple": SimpleProfiler, "advanced": AdvancedProfiler, - "pytorch": PytorchProfiler + "pytorch": PyTorchProfiler } diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 007285a0416ec..74d99a1eea8cc 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -30,7 +30,7 @@ from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint from pytorch_lightning.core.saving import load_hparams_from_tags_csv, load_hparams_from_yaml, save_hparams_to_tags_csv from pytorch_lightning.loggers import TensorBoardLogger -from pytorch_lightning.profiler.profilers import AdvancedProfiler, PassThroughProfiler, PytorchProfiler, SimpleProfiler +from pytorch_lightning.profiler.profilers import AdvancedProfiler, PassThroughProfiler, PyTorchProfiler, SimpleProfiler from pytorch_lightning.trainer.logging import TrainerLoggingMixin from pytorch_lightning.trainer.states import TrainerState from pytorch_lightning.utilities import _NATIVE_AMP_AVAILABLE @@ -39,6 +39,12 @@ from tests.base import BoringModel, EvalModelTemplate +@pytest.fixture +def pytorch_profiler(tmpdir): + profiler = PyTorchProfiler(output_filename=os.path.join(tmpdir, "profiler.txt")) + return profiler + + @pytest.mark.parametrize("url_ckpt", [True, False]) def test_no_val_module(monkeypatch, tmpdir, tmpdir_server, url_ckpt): """Tests use case where trainer saves the model, and user loads it from tags independently.""" @@ -1421,7 +1427,7 @@ def test_log_every_n_steps(log_metrics_mock, tmpdir, train_batches, max_steps, l ('simple', SimpleProfiler), ('Simple', SimpleProfiler), ('advanced', AdvancedProfiler), - ('pytorch', PytorchProfiler), + ('pytorch', PyTorchProfiler), ]) def test_trainer_profiler_correct_args(profiler, expected): kwargs = {'profiler': profiler} if profiler is not None else {} @@ -1444,22 +1450,59 @@ def test_trainer_profiler_incorrect_arg_type(profiler): Trainer(profiler=profiler) -def test_pytorch_profiler(tmpdir): - class TestModel(BoringModel): - def training_step(self, batch, batch_idx): - output = self.layer(batch) - loss = self.loss(batch, output) - return {"loss": loss} +def _get_pytorch_profiler_total_duration(events): + total_time = sum([e.cpu_time + e.cuda_time for e in events]) + return total_time / 1e6 # convert microseconds to seconds - model = TestModel() - limit_train_batches = 2 +def test_autograd_profiler_overhead(pytorch_profiler, n_iter=5): + """Ensure that the profiler doesn't introduce too much overhead during training.""" + for _ in range(n_iter): + with pytorch_profiler.profile("test_step"): + a = torch.ones(42) + b = torch.abs(a) + _ = a + b + + action_profile = pytorch_profiler.profiled_actions["test_step"] + total_duration = _get_pytorch_profiler_total_duration(action_profile) + average_duration = total_duration / n_iter + assert average_duration < pytorch_profiler.PROFILER_OVERHEAD_MAX_TOLERANCE + pytorch_profiler.describe() + data = Path(pytorch_profiler.output_fname).read_text() + assert len(data) > 0 + + +def test_autograd_profiler_describe(tmpdir, pytorch_profiler): + """Ensure the profiler won't fail when reporting the summary.""" + with pytorch_profiler.profile("test_step"): + pass + + # log to stdout and print to file + pytorch_profiler.describe() + data = Path(pytorch_profiler.output_fname).read_text() + assert len(data) > 0 + + +def test_pytorch_profiler_value_errors(pytorch_profiler): + """Ensure errors are raised where expected.""" + + action = "test_step" + with pytest.raises(ValueError): + pytorch_profiler.stop(action) + + pytorch_profiler.start(action) + pytorch_profiler.stop(action) + + +def test_pytorch_profiler_trainer(tmpdir): + + profiler = PyTorchProfiler(output_filename=os.path.join(tmpdir, "profiler.txt")) + + model = BoringModel() trainer = Trainer( - default_root_dir=tmpdir, - limit_train_batches=limit_train_batches, - limit_val_batches=2, - max_epochs=1, - profiler='pytorch' + fast_dev_run=True, + profiler=profiler ) - trainer.fit(model) + assert len(profiler.summary()) > 0 + assert set(profiler.profiled_actions.keys()) == {'training_step_and_backward', 'validation_step'} From 803aaa2cfc4964ea28c3fc7a8cc5115df7133ba5 Mon Sep 17 00:00:00 2001 From: tchaton Date: Thu, 21 Jan 2021 19:07:12 +0000 Subject: [PATCH 10/28] update --- pytorch_lightning/profiler/profilers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pytorch_lightning/profiler/profilers.py b/pytorch_lightning/profiler/profilers.py index 6c2141db50330..fe90884f9e8c1 100644 --- a/pytorch_lightning/profiler/profilers.py +++ b/pytorch_lightning/profiler/profilers.py @@ -293,7 +293,7 @@ class PyTorchProfiler(BaseProfiler): different operators inside your model - both on the CPU and GPU """ - PROFILER_OVERHEAD_MAX_TOLERANCE = 0.0005 + PROFILER_OVERHEAD_MAX_TOLERANCE = 7.5e-4 def __init__(self, output_filename: Optional[str] = None, @@ -417,11 +417,13 @@ def _create_profiler(self, action_name, profiler, enter=True): def _stop(self, action_name: str) -> None: if self.profiler is None: return + self.profiler.__exit__( exc_type=None, exc_val=None, exc_tb=None ) + events = self.profiler.function_events self.profiler = None for name in self.running_stack: From 698b43adcab4cb9662ae80f78fdda290e1a742ad Mon Sep 17 00:00:00 2001 From: tchaton Date: Thu, 21 Jan 2021 19:19:50 +0000 Subject: [PATCH 11/28] update doc --- pytorch_lightning/profiler/__init__.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/pytorch_lightning/profiler/__init__.py b/pytorch_lightning/profiler/__init__.py index 3f395fdcfd9aa..cab5eb191f906 100644 --- a/pytorch_lightning/profiler/__init__.py +++ b/pytorch_lightning/profiler/__init__.py @@ -134,15 +134,16 @@ def custom_processing_step(self, data): The profiler's results will be printed on the completion of a training `fit()`. This profiler report can be quite long, so you can also specify an `output_filename` to save the report instead -of logging it to the output in your terminal. The output below shows the profiling for the action -`get_train_batch`. -This profiler will record only for `training_step`, `evaluation_step` and `test_step` functions. +of logging it to the output in your terminal. + +This profiler will record only for `training_step_and_backward`, `evaluation_step` and `test_step` functions by default. +The output below shows the profiling for the action `training_step_and_backward`. .. code-block:: python Profiler Report - Profile stats for: training_step + Profile stats for: training_step_and_backward --------------------- --------------- --------------- --------------- --------------- --------------- Name Self CPU total % Self CPU total CPU total % CPU total CPU time avg --------------------- --------------- --------------- --------------- --------------- --------------- @@ -169,6 +170,17 @@ def custom_processing_step(self, data): --------------------- --------------- --------------- --------------- --------------- --------------- Self CPU time total: 1.681ms +If you need to profile more functions, do as follow: + +.. code-block:: python + + profiler = Profiler(profiled_functions=["my_own_profiled_function"]) + + with profiler.profile("my_own_profiled_function"): + + ... + + When running with `PyTorchProfiler(emit_nvtx=True)`. You should run as following: nvprof --profile-from-start off -o trace_name.prof -- From da9a56d6220a9b035920713e0329425f87faee80 Mon Sep 17 00:00:00 2001 From: tchaton Date: Thu, 21 Jan 2021 19:24:47 +0000 Subject: [PATCH 12/28] update doc --- pytorch_lightning/profiler/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/profiler/__init__.py b/pytorch_lightning/profiler/__init__.py index cab5eb191f906..197ad1a7f7fc9 100644 --- a/pytorch_lightning/profiler/__init__.py +++ b/pytorch_lightning/profiler/__init__.py @@ -178,7 +178,7 @@ def custom_processing_step(self, data): with profiler.profile("my_own_profiled_function"): - ... + pass When running with `PyTorchProfiler(emit_nvtx=True)`. You should run as following: From 3b119fd4bcfb176a590cb836740ef6eae27e6631 Mon Sep 17 00:00:00 2001 From: tchaton Date: Thu, 21 Jan 2021 19:46:01 +0000 Subject: [PATCH 13/28] update doc --- pytorch_lightning/profiler/__init__.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/pytorch_lightning/profiler/__init__.py b/pytorch_lightning/profiler/__init__.py index 197ad1a7f7fc9..ef8189717a02f 100644 --- a/pytorch_lightning/profiler/__init__.py +++ b/pytorch_lightning/profiler/__init__.py @@ -170,17 +170,6 @@ def custom_processing_step(self, data): --------------------- --------------- --------------- --------------- --------------- --------------- Self CPU time total: 1.681ms -If you need to profile more functions, do as follow: - -.. code-block:: python - - profiler = Profiler(profiled_functions=["my_own_profiled_function"]) - - with profiler.profile("my_own_profiled_function"): - - pass - - When running with `PyTorchProfiler(emit_nvtx=True)`. You should run as following: nvprof --profile-from-start off -o trace_name.prof -- From 75c966f1e1d3275c8f2553ec718382edfcf99b3f Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 22 Jan 2021 08:58:05 +0000 Subject: [PATCH 14/28] update on comments --- pytorch_lightning/profiler/profilers.py | 85 ++++++++++++++----------- tests/trainer/test_trainer.py | 32 +++++++++- 2 files changed, 78 insertions(+), 39 deletions(-) diff --git a/pytorch_lightning/profiler/profilers.py b/pytorch_lightning/profiler/profilers.py index fe90884f9e8c1..927942e432354 100644 --- a/pytorch_lightning/profiler/profilers.py +++ b/pytorch_lightning/profiler/profilers.py @@ -23,13 +23,14 @@ from abc import ABC, abstractmethod from collections import defaultdict from contextlib import contextmanager -from typing import Optional, Union +from typing import List, Optional, Union import numpy as np import torch from pytorch_lightning import _logger as log from pytorch_lightning.utilities.cloud_io import get_filesystem +from pytorch_lightning.utilities.distributed import rank_zero_warn from pytorch_lightning.utilities.exceptions import MisconfigurationException @@ -294,23 +295,31 @@ class PyTorchProfiler(BaseProfiler): """ PROFILER_OVERHEAD_MAX_TOLERANCE = 7.5e-4 - - def __init__(self, - output_filename: Optional[str] = None, - enabled: bool = True, - use_cuda: bool = False, - record_shapes: bool = False, - profile_memory: bool = False, - group_by_input_shapes: bool = False, - with_stack: bool = False, - use_kineto: bool = False, - use_cpu: bool = False, - emit_nvtx: bool = False, - export_to_chrome: bool = False, - path_to_export_trace: bool = None, - row_limit: int = 20, - sort_by_key: Optional[str] = None, - profiled_functions=["training_step_and_backward", "validation_step", "test_step"]): + PROFILED_FUNCTIONS = ["training_step_and_backward", "validation_step", "test_step"] + AVAILABLE_SORT_KEYS = [ + "cpu_time", "cuda_time", "cpu_time_total", + "cuda_time_total", "cpu_memory_usage", "cuda_memory_usage", + "self_cpu_memory_usage", "self_cuda_memory_usage", "count" + ] + + def __init__( + self, + output_filename: Optional[str] = None, + enabled: bool = True, + use_cuda: bool = False, + record_shapes: bool = False, + profile_memory: bool = False, + group_by_input_shapes: bool = False, + with_stack: bool = False, + use_kineto: bool = False, + use_cpu: bool = False, + emit_nvtx: bool = False, + export_to_chrome: bool = False, + path_to_export_trace: str = None, + row_limit: int = 20, + sort_by_key: Optional[str] = None, + profiled_functions: Optional[List] = None, + ): """ Args: @@ -326,6 +335,8 @@ def __init__(self, profile_memory: Whether to report memory usage, default: True (1.6.0) + group_by_input_shapes: Include operator input shapes and group calls by shape. + with_stack: record source information (file and line number) for the ops (1.7.0) use_kineto: experimental support for Kineto profiler (1.8.0) @@ -340,7 +351,7 @@ def __init__(self, export_to_chrome: Wether to export the sequence of profiled operators for Chrome. - path_to_export_trace: Path to exported traces. By default, it will be save + path_to_export_trace: Directory path to export traces. By default, it will be save where the file being is being run. row_limit: Limit the number of rows in a table, `0` is a special value that @@ -351,10 +362,12 @@ def __init__(self, profiled_functions: list of profiled functions which will create a context manager on. Any other will be pass through. """ + self.profiled_actions = {} # PyTorch Profiler doesn't seem to work with multiple processes + # todo: Try to find a solution self.enabled = enabled and os.getenv("LOCAL_RANK", None) is None - self.profiled_functions = profiled_functions + self.profiled_functions = profiled_functions or self.PROFILED_FUNCTIONS self.use_cuda = use_cuda self.record_shapes = record_shapes self.profile_memory = profile_memory @@ -367,9 +380,15 @@ def __init__(self, self.emit_nvtx = emit_nvtx self.export_to_chrome = export_to_chrome self.path_to_export_trace = path_to_export_trace - if self.sort_by_key not in self.available_sort_by_keys: + + if export_to_chrome and path_to_export_trace is None: + rank_zero_warn( + "The exported trace would be save locally as `path_to_export_trace` is empty" + "Note: Each functions will generate its own traced file. ") + + if self.sort_by_key not in self.AVAILABLE_SORT_KEYS: raise MisconfigurationException( - f"Found sort_by_key: {sort_by_key}. Should be within {self.available_sort_by_keys}. ") + f"Found sort_by_key: {sort_by_key}. Should be within {self.AVAILABLE_SORT_KEYS}. ") self.profiled_actions = {} self.context_names = {} @@ -424,13 +443,13 @@ def _stop(self, action_name: str) -> None: exc_tb=None ) - events = self.profiler.function_events + function_events = self.profiler.function_events self.profiler = None for name in self.running_stack: if name not in self.profiled_actions: - self.profiled_actions[name] = events + self.profiled_actions[name] = function_events else: - self.profiled_actions[name] += events + self.profiled_actions[name] += function_events def stop(self, action_name: str) -> None: if action_name in self.profiled_functions: @@ -449,24 +468,24 @@ def summary(self) -> str: output_string = '' if self.enabled: - for action_name, events in self.profiled_actions.items(): + for action_name, function_events in self.profiled_actions.items(): # next line is a workaround for a pytorch issue (fixed on master, still present # on 1.7). Without it the code fails with `AssertionError: There is already a CPU # parent event for detach` - events.populate_cpu_children = lambda: None + function_events.populate_cpu_children = lambda: None if self.export_to_chrome: filename = f"{action_name}_trace.json" path_to_trace = filename if self.path_to_export_trace is None \ else os.path.join(self.path_to_export_trace, filename) - events.export_chrome_trace(path_to_trace) + function_events.export_chrome_trace(path_to_trace) if self.emit_nvtx: return output_string else: - table = events.key_averages( + table = function_events.key_averages( group_by_input_shapes=self.group_by_input_shapes).table( sort_by=self.sort_by_key, row_limit=self.row_limit) @@ -491,11 +510,3 @@ def __del__(self): """Close profiler's stream.""" if self.output_file: self.output_file.close() - - @property - def available_sort_by_keys(self): - return [ - "cpu_time", "cuda_time", "cpu_time_total", - "cuda_time_total", "cpu_memory_usage", "cuda_memory_usage", - "self_cpu_memory_usage", "self_cuda_memory_usage", "count" - ] diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 74d99a1eea8cc..a0523a2927256 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -1455,7 +1455,7 @@ def _get_pytorch_profiler_total_duration(events): return total_time / 1e6 # convert microseconds to seconds -def test_autograd_profiler_overhead(pytorch_profiler, n_iter=5): +def test_pytorch_profiler_overhead(pytorch_profiler, n_iter=5): """Ensure that the profiler doesn't introduce too much overhead during training.""" for _ in range(n_iter): with pytorch_profiler.profile("test_step"): @@ -1472,7 +1472,7 @@ def test_autograd_profiler_overhead(pytorch_profiler, n_iter=5): assert len(data) > 0 -def test_autograd_profiler_describe(tmpdir, pytorch_profiler): +def test_pytorch_profiler_describe(tmpdir, pytorch_profiler): """Ensure the profiler won't fail when reporting the summary.""" with pytorch_profiler.profile("test_step"): pass @@ -1495,6 +1495,7 @@ def test_pytorch_profiler_value_errors(pytorch_profiler): def test_pytorch_profiler_trainer(tmpdir): + """Ensure that the profiler can be given to the training and default step are properly recorded. """ profiler = PyTorchProfiler(output_filename=os.path.join(tmpdir, "profiler.txt")) @@ -1506,3 +1507,30 @@ def test_pytorch_profiler_trainer(tmpdir): trainer.fit(model) assert len(profiler.summary()) > 0 assert set(profiler.profiled_actions.keys()) == {'training_step_and_backward', 'validation_step'} + + +def test_pytorch_profiler_nested(tmpdir): + """Ensure that the profiler handles nested context""" + + pytorch_profiler = PyTorchProfiler( + profiled_functions=["a", "b", "c"], + use_cuda=False, + output_filename=os.path.join(tmpdir, "profiler.txt")) + + with pytorch_profiler.profile("a"): + a = torch.ones(42) + with pytorch_profiler.profile("b"): + b = torch.zeros(42) + with pytorch_profiler.profile("c"): + _ = a + b + + pa = pytorch_profiler.profiled_actions + + expected_a = ['ones', 'empty', 'fill_', 'zeros', 'empty', 'zero_', 'fill_', 'add', 'empty'] + assert [e.name for e in pa['a']] == expected_a + + expected_b = ['zeros', 'empty', 'zero_', 'fill_'] + assert [e.name for e in pa['b']] == expected_b + + expected_c = ['add', 'empty'] + assert [e.name for e in pa['c']] == expected_c From f6ae283a687c785555977e6147e0fbabea5666d2 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 22 Jan 2021 09:03:41 +0000 Subject: [PATCH 15/28] update docstring --- pytorch_lightning/profiler/profilers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pytorch_lightning/profiler/profilers.py b/pytorch_lightning/profiler/profilers.py index 927942e432354..968b0a60f9f3c 100644 --- a/pytorch_lightning/profiler/profilers.py +++ b/pytorch_lightning/profiler/profilers.py @@ -289,10 +289,6 @@ def __del__(self): class PyTorchProfiler(BaseProfiler): - """ - This profiler uses PyTorch's Autograd Profiler and lets you inspect the cost of - different operators inside your model - both on the CPU and GPU - """ PROFILER_OVERHEAD_MAX_TOLERANCE = 7.5e-4 PROFILED_FUNCTIONS = ["training_step_and_backward", "validation_step", "test_step"] @@ -321,6 +317,10 @@ def __init__( profiled_functions: Optional[List] = None, ): """ + + This profiler uses PyTorch's Autograd Profiler and lets you inspect the cost of + different operators inside your model - both on the CPU and GPU + Args: output_filename: optionally save profile results to file instead of printing From f0aed961e57c188b1654753944cfe10fbdf597b8 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 22 Jan 2021 09:08:30 +0000 Subject: [PATCH 16/28] update docstring --- pytorch_lightning/profiler/__init__.py | 5 +++-- pytorch_lightning/profiler/profilers.py | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/pytorch_lightning/profiler/__init__.py b/pytorch_lightning/profiler/__init__.py index ef8189717a02f..c133c4c2d7396 100644 --- a/pytorch_lightning/profiler/__init__.py +++ b/pytorch_lightning/profiler/__init__.py @@ -50,7 +50,7 @@ Advanced Profiling --------------------- +------------------ If you want more information on the functions called during each event, you can use the `AdvancedProfiler`. This option uses Python's cProfiler_ to provide a report of time spent on *each* function called within your code. @@ -116,7 +116,7 @@ def custom_processing_step(self, data): PyTorch Profiling --------------------- +----------------- Autograd includes a profiler that lets you inspect the cost of different operators inside your model - both on the CPU and GPU. @@ -138,6 +138,7 @@ def custom_processing_step(self, data): This profiler will record only for `training_step_and_backward`, `evaluation_step` and `test_step` functions by default. The output below shows the profiling for the action `training_step_and_backward`. +The user can provide ``PyTorchProfiler(profiled_functions=[...])`` to extend the scope of profiled functions. .. code-block:: python diff --git a/pytorch_lightning/profiler/profilers.py b/pytorch_lightning/profiler/profilers.py index 968b0a60f9f3c..42533565fa45d 100644 --- a/pytorch_lightning/profiler/profilers.py +++ b/pytorch_lightning/profiler/profilers.py @@ -344,10 +344,10 @@ def __init__( use_cpu: use_kineto=True and can be used to lower the overhead for GPU-only profiling (1.8.0) emit_nvtx: Context manager that makes every autograd operation emit an NVTX range - * Run: nvprof --profile-from-start off -o trace_name.prof -- + Run: nvprof --profile-from-start off -o trace_name.prof -- To visualize, you can either use: - * nvvp trace_name.prof - * torch.autograd.profiler.load_nvprof(path) + nvvp trace_name.prof + torch.autograd.profiler.load_nvprof(path) export_to_chrome: Wether to export the sequence of profiled operators for Chrome. @@ -355,7 +355,7 @@ def __init__( where the file being is being run. row_limit: Limit the number of rows in a table, `0` is a special value that - removes the limit completely. + removes the limit completely. sort_by_key: Keys to sort out profiled table From 5dd2b4df9545aae725bc2e14cffcae2ea7e5a1eb Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Fri, 22 Jan 2021 10:30:49 +0100 Subject: [PATCH 17/28] try --- pytorch_lightning/profiler/profilers.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pytorch_lightning/profiler/profilers.py b/pytorch_lightning/profiler/profilers.py index 42533565fa45d..1fe20c9c0189e 100644 --- a/pytorch_lightning/profiler/profilers.py +++ b/pytorch_lightning/profiler/profilers.py @@ -317,7 +317,6 @@ def __init__( profiled_functions: Optional[List] = None, ): """ - This profiler uses PyTorch's Autograd Profiler and lets you inspect the cost of different operators inside your model - both on the CPU and GPU @@ -326,12 +325,12 @@ def __init__( output_filename: optionally save profile results to file instead of printing to std out when training is finished. - enabled: Setting this to False makes this context manager a no-op. Default: True + enabled: Setting this to False makes this context manager a no-op. use_cuda: Enables timing of CUDA events as well using the cudaEvent API. - Adds approximately 4us of overhead to each tensor operation. Default: True + Adds approximately 4us of overhead to each tensor operation. - record_shapes: If shapes recording is set, information about input dimensions will be collected. + record_shapes: If shapes recording is set, information about input dimensions will be collected. profile_memory: Whether to report memory usage, default: True (1.6.0) @@ -344,8 +343,12 @@ def __init__( use_cpu: use_kineto=True and can be used to lower the overhead for GPU-only profiling (1.8.0) emit_nvtx: Context manager that makes every autograd operation emit an NVTX range - Run: nvprof --profile-from-start off -o trace_name.prof -- - To visualize, you can either use: + Run:: + + nvprof --profile-from-start off -o trace_name.prof -- + + To visualize, you can either use:: + nvvp trace_name.prof torch.autograd.profiler.load_nvprof(path) From 03b3ea5fc804d126185e26e4e7f6c46f7ea9e10d Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 22 Jan 2021 09:38:18 +0000 Subject: [PATCH 18/28] update test --- tests/trainer/test_trainer.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index a0523a2927256..de11aec9e7d42 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -17,6 +17,7 @@ import sys from argparse import Namespace from copy import deepcopy +from distutils.version import LooseVersion from pathlib import Path from unittest.mock import ANY, call, patch @@ -1526,11 +1527,23 @@ def test_pytorch_profiler_nested(tmpdir): pa = pytorch_profiler.profiled_actions - expected_a = ['ones', 'empty', 'fill_', 'zeros', 'empty', 'zero_', 'fill_', 'add', 'empty'] - assert [e.name for e in pa['a']] == expected_a + # From PyTorch 1.6.0, more operation are being traced. + if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"): + expected_a = ['ones', 'empty', 'fill_', 'zeros', 'empty', 'zero_', 'fill_', 'add', 'empty'] + assert [e.name for e in pa['a']] == expected_a - expected_b = ['zeros', 'empty', 'zero_', 'fill_'] - assert [e.name for e in pa['b']] == expected_b + expected_b = ['zeros', 'empty', 'zero_', 'fill_'] + assert [e.name for e in pa['b']] == expected_b - expected_c = ['add', 'empty'] - assert [e.name for e in pa['c']] == expected_c + expected_c = ['add', 'empty'] + assert [e.name for e in pa['c']] == expected_c + + else: + expected_a = ['add'] + assert [e.name for e in pa['a']] == expected_a + + expected_b = [] + assert [e.name for e in pa['b']] == expected_b + + expected_c = ['add'] + assert [e.name for e in pa['c']] == expected_c From 1e6a9535b6344ac9be31eb9b0e710b8469cad60c Mon Sep 17 00:00:00 2001 From: chaton Date: Fri, 22 Jan 2021 10:21:19 +0000 Subject: [PATCH 19/28] Update pytorch_lightning/profiler/__init__.py Co-authored-by: Jirka Borovec --- pytorch_lightning/profiler/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/profiler/__init__.py b/pytorch_lightning/profiler/__init__.py index c133c4c2d7396..e3d7a8a1ab973 100644 --- a/pytorch_lightning/profiler/__init__.py +++ b/pytorch_lightning/profiler/__init__.py @@ -171,9 +171,9 @@ def custom_processing_step(self, data): --------------------- --------------- --------------- --------------- --------------- --------------- Self CPU time total: 1.681ms -When running with `PyTorchProfiler(emit_nvtx=True)`. You should run as following: +When running with `PyTorchProfiler(emit_nvtx=True)`. You should run as following:: -nvprof --profile-from-start off -o trace_name.prof -- + nvprof --profile-from-start off -o trace_name.prof -- To visualize the profiled operation, you can either: From 21ae2da97e62c561eae28520c47ca76a6f174f3f Mon Sep 17 00:00:00 2001 From: chaton Date: Fri, 22 Jan 2021 10:21:32 +0000 Subject: [PATCH 20/28] Update pytorch_lightning/profiler/__init__.py Co-authored-by: Jirka Borovec --- pytorch_lightning/profiler/__init__.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/profiler/__init__.py b/pytorch_lightning/profiler/__init__.py index e3d7a8a1ab973..7398b21c6c1b8 100644 --- a/pytorch_lightning/profiler/__init__.py +++ b/pytorch_lightning/profiler/__init__.py @@ -177,9 +177,13 @@ def custom_processing_step(self, data): To visualize the profiled operation, you can either: -* Use: nvvp trace_name.prof +* Use:: -* Use: python -c 'import torch; print(torch.autograd.profiler.load_nvprof("trace_name.prof"))' + nvvp trace_name.prof + +* Use:: + + python -c 'import torch; print(torch.autograd.profiler.load_nvprof("trace_name.prof"))' """ From f6f0d890770807f17724ac3f7e1274df5e85c39b Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 22 Jan 2021 10:34:23 +0000 Subject: [PATCH 21/28] update on comments --- pytorch_lightning/profiler/profilers.py | 124 +++++++++++++----------- tests/trainer/test_trainer.py | 27 ++---- 2 files changed, 71 insertions(+), 80 deletions(-) diff --git a/pytorch_lightning/profiler/profilers.py b/pytorch_lightning/profiler/profilers.py index 1fe20c9c0189e..f9551fe90ced9 100644 --- a/pytorch_lightning/profiler/profilers.py +++ b/pytorch_lightning/profiler/profilers.py @@ -290,13 +290,12 @@ def __del__(self): class PyTorchProfiler(BaseProfiler): - PROFILER_OVERHEAD_MAX_TOLERANCE = 7.5e-4 - PROFILED_FUNCTIONS = ["training_step_and_backward", "validation_step", "test_step"] - AVAILABLE_SORT_KEYS = [ + PROFILED_FUNCTIONS = ("training_step_and_backward", "validation_step", "test_step") + AVAILABLE_SORT_KEYS = ( "cpu_time", "cuda_time", "cpu_time_total", "cuda_time_total", "cpu_memory_usage", "cuda_memory_usage", "self_cpu_memory_usage", "self_cuda_memory_usage", "count" - ] + ) def __init__( self, @@ -332,15 +331,16 @@ def __init__( record_shapes: If shapes recording is set, information about input dimensions will be collected. - profile_memory: Whether to report memory usage, default: True (1.6.0) + profile_memory: Whether to report memory usage, default: True (Introduced in PyTorch 1.6.0) group_by_input_shapes: Include operator input shapes and group calls by shape. - with_stack: record source information (file and line number) for the ops (1.7.0) + with_stack: record source information (file and line number) for the ops (Introduced in PyTorch 1.7.0) - use_kineto: experimental support for Kineto profiler (1.8.0) + use_kineto: experimental support for Kineto profiler (Introduced in PyTorch 1.8.0) - use_cpu: use_kineto=True and can be used to lower the overhead for GPU-only profiling (1.8.0) + use_cpu: use_kineto=True and can be used to lower the overhead + for GPU-only profiling (Introduced in PyTorch 1.8.0) emit_nvtx: Context manager that makes every autograd operation emit an NVTX range Run:: @@ -353,9 +353,10 @@ def __init__( torch.autograd.profiler.load_nvprof(path) export_to_chrome: Wether to export the sequence of profiled operators for Chrome. + It will generate a ``.json`` file which can be read by Chrome. - path_to_export_trace: Directory path to export traces. By default, it will be save - where the file being is being run. + path_to_export_trace: Directory path to export ``.json`` traces when using ``export_to_chrome=True``. + By default, it will be save where the file being is being run. row_limit: Limit the number of rows in a table, `0` is a special value that removes the limit completely. @@ -386,8 +387,8 @@ def __init__( if export_to_chrome and path_to_export_trace is None: rank_zero_warn( - "The exported trace would be save locally as `path_to_export_trace` is empty" - "Note: Each functions will generate its own traced file. ") + "The exported trace would be save locally as `path_to_export_trace` is empty." + " Note: Each functions will generate its own traced file.") if self.sort_by_key not in self.AVAILABLE_SORT_KEYS: raise MisconfigurationException( @@ -409,15 +410,16 @@ def __init__( super().__init__(output_streams=streaming_out) def start(self, action_name: str) -> None: - # stop the running profiler if any - if action_name in self.profiled_functions: - if len(self.running_stack) > 0: - self._stop(self.running_stack[-1]) - self.running_stack.append(action_name) + if action_name not in self.profiled_functions: + return + + if len(self.running_stack) > 0: + self._stop(self.running_stack[-1]) + self.running_stack.append(action_name) - self.context_names[action_name] = "/".join(self.running_stack) + self.context_names[action_name] = "/".join(self.running_stack) - self._start(action_name) + self._start(action_name) def _start(self, action_name: str) -> None: if self.emit_nvtx: @@ -455,51 +457,55 @@ def _stop(self, action_name: str) -> None: self.profiled_actions[name] += function_events def stop(self, action_name: str) -> None: - if action_name in self.profiled_functions: - if len(self.running_stack) == 0 or self.running_stack[-1] != action_name: - raise ValueError( # pragma: no-cover - f"Attempting to stop recording an action ({action_name}) which was never started." - ) - self._stop(action_name) - self.running_stack.pop() - # restore running profiler - if len(self.running_stack) > 0: - self._start(self.running_stack[-1]) + if action_name not in self.profiled_functions: + return + + if len(self.running_stack) == 0 or self.running_stack[-1] != action_name: + raise ValueError( # pragma: no-cover + f"Attempting to stop recording an action ({action_name}) which was never started." + ) + self._stop(action_name) + self.running_stack.pop() + # restore running profiler + if len(self.running_stack) > 0: + self._start(self.running_stack[-1]) def summary(self) -> str: recorded_stats = {} output_string = '' - if self.enabled: - for action_name, function_events in self.profiled_actions.items(): - - # next line is a workaround for a pytorch issue (fixed on master, still present - # on 1.7). Without it the code fails with `AssertionError: There is already a CPU - # parent event for detach` - function_events.populate_cpu_children = lambda: None - - if self.export_to_chrome: - filename = f"{action_name}_trace.json" - path_to_trace = filename if self.path_to_export_trace is None \ - else os.path.join(self.path_to_export_trace, filename) - function_events.export_chrome_trace(path_to_trace) - - if self.emit_nvtx: - return output_string - - else: - table = function_events.key_averages( - group_by_input_shapes=self.group_by_input_shapes).table( - sort_by=self.sort_by_key, - row_limit=self.row_limit) - recorded_stats[action_name] = table - - # log to standard out - output_string = f"{os.linesep}Profiler Report{os.linesep}" - for action, stats in recorded_stats.items(): - output_string += ( - f"{os.linesep}Profile stats for: {action}{os.linesep}{stats}" - ) + if not self.enabled: + return output_string + + for action_name, function_events in self.profiled_actions.items(): + + # next line is a workaround for a pytorch issue (fixed on master, still present + # on 1.7). Without it the code fails with `AssertionError: There is already a CPU + # parent event for detach` + function_events.populate_cpu_children = lambda: None + + if self.export_to_chrome: + filename = f"{action_name}_trace.json" + path_to_trace = filename if self.path_to_export_trace is None \ + else os.path.join(self.path_to_export_trace, filename) + function_events.export_chrome_trace(path_to_trace) + + if self.emit_nvtx: + return output_string + + else: + table = function_events.key_averages( + group_by_input_shapes=self.group_by_input_shapes).table( + sort_by=self.sort_by_key, + row_limit=self.row_limit) + recorded_stats[action_name] = table + + # log to standard out + output_string = f"{os.linesep}Profiler Report{os.linesep}" + for action, stats in recorded_stats.items(): + output_string += ( + f"{os.linesep}Profile stats for: {action}{os.linesep}{stats}" + ) return output_string diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index de11aec9e7d42..b5a970f236ac5 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -1452,27 +1452,10 @@ def test_trainer_profiler_incorrect_arg_type(profiler): def _get_pytorch_profiler_total_duration(events): - total_time = sum([e.cpu_time + e.cuda_time for e in events]) + total_time = sum([evt.cpu_time + evt.cuda_time for evt in events]) return total_time / 1e6 # convert microseconds to seconds -def test_pytorch_profiler_overhead(pytorch_profiler, n_iter=5): - """Ensure that the profiler doesn't introduce too much overhead during training.""" - for _ in range(n_iter): - with pytorch_profiler.profile("test_step"): - a = torch.ones(42) - b = torch.abs(a) - _ = a + b - - action_profile = pytorch_profiler.profiled_actions["test_step"] - total_duration = _get_pytorch_profiler_total_duration(action_profile) - average_duration = total_duration / n_iter - assert average_duration < pytorch_profiler.PROFILER_OVERHEAD_MAX_TOLERANCE - pytorch_profiler.describe() - data = Path(pytorch_profiler.output_fname).read_text() - assert len(data) > 0 - - def test_pytorch_profiler_describe(tmpdir, pytorch_profiler): """Ensure the profiler won't fail when reporting the summary.""" with pytorch_profiler.profile("test_step"): @@ -1529,14 +1512,16 @@ def test_pytorch_profiler_nested(tmpdir): # From PyTorch 1.6.0, more operation are being traced. if LooseVersion(torch.__version__) >= LooseVersion("1.6.0"): + prefix_to_remove = "aten::" if LooseVersion(torch.__version__) >= LooseVersion("1.7.1") else '' + expected_a = ['ones', 'empty', 'fill_', 'zeros', 'empty', 'zero_', 'fill_', 'add', 'empty'] - assert [e.name for e in pa['a']] == expected_a + assert [e.name.replace(prefix_to_remove, '') for e in pa['a']] == expected_a expected_b = ['zeros', 'empty', 'zero_', 'fill_'] - assert [e.name for e in pa['b']] == expected_b + assert [e.name.replace(prefix_to_remove, '') for e in pa['b']] == expected_b expected_c = ['add', 'empty'] - assert [e.name for e in pa['c']] == expected_c + assert [e.name.replace(prefix_to_remove, '') for e in pa['c']] == expected_c else: expected_a = ['add'] From 2ea05de42ead7f77ff50187aa98ea87dc5767576 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 22 Jan 2021 10:35:47 +0000 Subject: [PATCH 22/28] remove old code --- tests/trainer/test_trainer.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index b5a970f236ac5..3437e59f2cf86 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -1451,12 +1451,7 @@ def test_trainer_profiler_incorrect_arg_type(profiler): Trainer(profiler=profiler) -def _get_pytorch_profiler_total_duration(events): - total_time = sum([evt.cpu_time + evt.cuda_time for evt in events]) - return total_time / 1e6 # convert microseconds to seconds - - -def test_pytorch_profiler_describe(tmpdir, pytorch_profiler): +def test_pytorch_profiler_describe(pytorch_profiler): """Ensure the profiler won't fail when reporting the summary.""" with pytorch_profiler.profile("test_step"): pass From c397603069587f8353bbb8504fb9596c1cb93588 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 25 Jan 2021 10:22:17 +0000 Subject: [PATCH 23/28] add support for ddp --- pytorch_lightning/profiler/__init__.py | 4 ++ pytorch_lightning/profiler/profilers.py | 43 ++++++++++++++++--- .../trainer/connectors/profiler_connector.py | 4 ++ pytorch_lightning/trainer/training_loop.py | 3 ++ tests/special_tests.sh | 1 + tests/trainer/test_trainer.py | 35 ++++++++++++--- 6 files changed, 79 insertions(+), 11 deletions(-) diff --git a/pytorch_lightning/profiler/__init__.py b/pytorch_lightning/profiler/__init__.py index 7398b21c6c1b8..5c5cd65d3fd28 100644 --- a/pytorch_lightning/profiler/__init__.py +++ b/pytorch_lightning/profiler/__init__.py @@ -132,6 +132,10 @@ def custom_processing_step(self, data): profiler = PyTorchProfiler(...) trainer = Trainer(..., profiler=profiler) + +.. note:: This profiler works with DistributedDataParallel. If output_filename is provided, each rank will save the profiled operation to their own file. + + The profiler's results will be printed on the completion of a training `fit()`. This profiler report can be quite long, so you can also specify an `output_filename` to save the report instead of logging it to the output in your terminal. diff --git a/pytorch_lightning/profiler/profilers.py b/pytorch_lightning/profiler/profilers.py index f9551fe90ced9..2de6a1026fa1e 100644 --- a/pytorch_lightning/profiler/profilers.py +++ b/pytorch_lightning/profiler/profilers.py @@ -29,6 +29,7 @@ import torch from pytorch_lightning import _logger as log +from pytorch_lightning.utilities import rank_zero_only from pytorch_lightning.utilities.cloud_io import get_filesystem from pytorch_lightning.utilities.distributed import rank_zero_warn from pytorch_lightning.utilities.exceptions import MisconfigurationException @@ -100,6 +101,10 @@ def summary(self) -> str: """Create profiler summary in text format.""" + def on_train_start(self, local_rank: int): + self.local_rank = local_rank + + class PassThroughProfiler(BaseProfiler): """ This class should be used when you don't want the (small) overhead of profiling. @@ -314,6 +319,7 @@ def __init__( row_limit: int = 20, sort_by_key: Optional[str] = None, profiled_functions: Optional[List] = None, + local_rank: Optional[int] = None, ): """ This profiler uses PyTorch's Autograd Profiler and lets you inspect the cost of @@ -322,7 +328,9 @@ def __init__( Args: output_filename: optionally save profile results to file instead of printing - to std out when training is finished. + to std out when training is finished. When using ``ddp``, + each rank will stream the profiled operation to their own file + with the extension ``_{rank}.txt`` enabled: Setting this to False makes this context manager a no-op. @@ -365,12 +373,13 @@ def __init__( profiled_functions: list of profiled functions which will create a context manager on. Any other will be pass through. + + local_rank: When running in distributed setting, local_rank is used for each process + to write to their own file if `output_fname` is provided. """ self.profiled_actions = {} - # PyTorch Profiler doesn't seem to work with multiple processes - # todo: Try to find a solution - self.enabled = enabled and os.getenv("LOCAL_RANK", None) is None + self.enabled = enabled self.profiled_functions = profiled_functions or self.PROFILED_FUNCTIONS self.use_cuda = use_cuda self.record_shapes = record_shapes @@ -401,14 +410,36 @@ def __init__( self.output_fname = output_filename self.output_file = None + self.local_rank = local_rank + if self.local_rank is not None: + self.on_fit_start(self.local_rank) + self.on_fit_start = super().on_fit_start + + def on_train_start(self, local_rank: Optional[str] = None): + self.local_rank = local_rank + + if local_rank != 0: + self.wrap_functions_into_rank_zero_only() if self.output_fname: + if local_rank is not None: + if '.txt' not in self.output_fname: + raise MisconfigurationException("Log file should be .txt file.") + + self.output_fname = self.output_fname.replace(".txt", f"_{self.local_rank}.txt") + fs = get_filesystem(self.output_fname) self.output_file = fs.open(self.output_fname, "w") streaming_out = [self.output_file.write] if self.output_file else [log.info] super().__init__(output_streams=streaming_out) + def wrap_functions_into_rank_zero_only(self): + self.start = rank_zero_only(self.start) + self.stop = rank_zero_only(self.stop) + self.summary = rank_zero_only(self.summary) + self.describe = rank_zero_only(self.describe) + def start(self, action_name: str) -> None: if action_name not in self.profiled_functions: return @@ -485,7 +516,7 @@ def summary(self) -> str: function_events.populate_cpu_children = lambda: None if self.export_to_chrome: - filename = f"{action_name}_trace.json" + filename = f"{action_name}_{self.local_rank}_trace.json" path_to_trace = filename if self.path_to_export_trace is None \ else os.path.join(self.path_to_export_trace, filename) function_events.export_chrome_trace(path_to_trace) @@ -504,7 +535,7 @@ def summary(self) -> str: output_string = f"{os.linesep}Profiler Report{os.linesep}" for action, stats in recorded_stats.items(): output_string += ( - f"{os.linesep}Profile stats for: {action}{os.linesep}{stats}" + f"{os.linesep}Profile stats for: {action} rank: {self.local_rank} {os.linesep}{stats}" ) return output_string diff --git a/pytorch_lightning/trainer/connectors/profiler_connector.py b/pytorch_lightning/trainer/connectors/profiler_connector.py index d2e6ada35412e..2e66d2370e40f 100644 --- a/pytorch_lightning/trainer/connectors/profiler_connector.py +++ b/pytorch_lightning/trainer/connectors/profiler_connector.py @@ -58,3 +58,7 @@ def on_trainer_init(self, profiler: Union[BaseProfiler, bool, str]): raise ValueError("When passing string value for the `profiler` parameter of" " `Trainer`, it can only be 'simple' or 'advanced'") self.trainer.profiler = profiler or PassThroughProfiler() + + def on_train_start(self, trainer): + local_rank = trainer.local_rank if trainer.world_size > 1 else None + self.trainer.profiler.on_train_start(local_rank) diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py index 1b07634908a7e..312a23f46c7ad 100644 --- a/pytorch_lightning/trainer/training_loop.py +++ b/pytorch_lightning/trainer/training_loop.py @@ -111,6 +111,9 @@ def on_train_start(self): # hook self.trainer.call_hook("on_train_start") + # provide rank to profiler + self.trainer.profile_connector.on_train_start(self.trainer) + def setup_fit(self, model, train_dataloader, val_dataloaders, datamodule): # bind logger and other properties self.trainer.model_connector.copy_trainer_model_properties(model) diff --git a/tests/special_tests.sh b/tests/special_tests.sh index ea14841c74bad..8650be6fd4682 100644 --- a/tests/special_tests.sh +++ b/tests/special_tests.sh @@ -23,3 +23,4 @@ python ${DEFAULTS} tests/plugins/test_ddp_sequential_plugin.py::test_ddp_sequent python ${DEFAULTS} tests/utilities/test_all_gather_grad.py::test_all_gather_collection # python ${DEFAULTS} tests/plugins/test_ddp_sequential_plugin.py::test_ddp_sequential_plugin_ddp_rpc_with_wrong_balance python ${DEFAULTS} tests/trainer/logging_process/test_train_loop_logging_1_0.py::test_logging_sync_dist_true_ddp +python ${DEFAULTS} tests/trainer/test_trainer.py::test_pytorch_profiler_trainer_ddp diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 3437e59f2cf86..01b9f72609e87 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -14,6 +14,7 @@ import math import os import pickle +from pytorch_lightning.accelerators import accelerator import sys from argparse import Namespace from copy import deepcopy @@ -1473,19 +1474,43 @@ def test_pytorch_profiler_value_errors(pytorch_profiler): pytorch_profiler.stop(action) -def test_pytorch_profiler_trainer(tmpdir): +@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") +@pytest.mark.skipif(not os.getenv("PL_RUNNING_SPECIAL_TESTS", '0') == '1', + reason="test should be run outside of pytest") +@pytest.mark.parametrize("use_output_filename", [False, True]) +def test_pytorch_profiler_trainer_ddp(tmpdir, use_output_filename): """Ensure that the profiler can be given to the training and default step are properly recorded. """ - profiler = PyTorchProfiler(output_filename=os.path.join(tmpdir, "profiler.txt")) + if use_output_filename: + output_filename = os.path.join(tmpdir, "profiler.txt") + else: + output_filename = None + + profiler = PyTorchProfiler(output_filename=output_filename) model = BoringModel() trainer = Trainer( fast_dev_run=True, - profiler=profiler + profiler=profiler, + accelerator="ddp", + gpus=2 + ) trainer.fit(model) - assert len(profiler.summary()) > 0 - assert set(profiler.profiled_actions.keys()) == {'training_step_and_backward', 'validation_step'} + + enabled = use_output_filename or not use_output_filename and profiler.local_rank == 0 + + if enabled: + assert len(profiler.summary()) > 0 + assert set(profiler.profiled_actions.keys()) == {'training_step_and_backward', 'validation_step'} + else: + assert profiler.summary() is None + assert set(profiler.profiled_actions.keys()) == set() + + if use_output_filename: + profiler.describe() + data = Path(profiler.output_fname).read_text() + assert len(data) > 0 def test_pytorch_profiler_nested(tmpdir): From 1db6e678d9d2c0233aeb486cd9c42cab698599e3 Mon Sep 17 00:00:00 2001 From: tchaton Date: Mon, 25 Jan 2021 10:25:11 +0000 Subject: [PATCH 24/28] resolve flake8 --- pytorch_lightning/profiler/__init__.py | 3 ++- pytorch_lightning/profiler/profilers.py | 9 ++++----- tests/trainer/test_trainer.py | 5 ++--- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/pytorch_lightning/profiler/__init__.py b/pytorch_lightning/profiler/__init__.py index 5c5cd65d3fd28..63b4b013fdbf0 100644 --- a/pytorch_lightning/profiler/__init__.py +++ b/pytorch_lightning/profiler/__init__.py @@ -133,7 +133,8 @@ def custom_processing_step(self, data): trainer = Trainer(..., profiler=profiler) -.. note:: This profiler works with DistributedDataParallel. If output_filename is provided, each rank will save the profiled operation to their own file. +This profiler works with PyTorch ``DistributedDataParallel``. +If ``output_filename`` is provided, each rank will save their profiled operation to their own file. The profiler's results will be printed on the completion of a training `fit()`. This profiler diff --git a/pytorch_lightning/profiler/profilers.py b/pytorch_lightning/profiler/profilers.py index 2de6a1026fa1e..28478e7c66fed 100644 --- a/pytorch_lightning/profiler/profilers.py +++ b/pytorch_lightning/profiler/profilers.py @@ -100,7 +100,6 @@ def describe(self) -> None: def summary(self) -> str: """Create profiler summary in text format.""" - def on_train_start(self, local_rank: int): self.local_rank = local_rank @@ -328,8 +327,8 @@ def __init__( Args: output_filename: optionally save profile results to file instead of printing - to std out when training is finished. When using ``ddp``, - each rank will stream the profiled operation to their own file + to std out when training is finished. When using ``ddp``, + each rank will stream the profiled operation to their own file with the extension ``_{rank}.txt`` enabled: Setting this to False makes this context manager a no-op. @@ -374,7 +373,7 @@ def __init__( profiled_functions: list of profiled functions which will create a context manager on. Any other will be pass through. - local_rank: When running in distributed setting, local_rank is used for each process + local_rank: When running in distributed setting, local_rank is used for each process to write to their own file if `output_fname` is provided. """ @@ -425,7 +424,7 @@ def on_train_start(self, local_rank: Optional[str] = None): if local_rank is not None: if '.txt' not in self.output_fname: raise MisconfigurationException("Log file should be .txt file.") - + self.output_fname = self.output_fname.replace(".txt", f"_{self.local_rank}.txt") fs = get_filesystem(self.output_fname) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 01b9f72609e87..26b7befe0e974 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -14,7 +14,6 @@ import math import os import pickle -from pytorch_lightning.accelerators import accelerator import sys from argparse import Namespace from copy import deepcopy @@ -1499,14 +1498,14 @@ def test_pytorch_profiler_trainer_ddp(tmpdir, use_output_filename): trainer.fit(model) enabled = use_output_filename or not use_output_filename and profiler.local_rank == 0 - + if enabled: assert len(profiler.summary()) > 0 assert set(profiler.profiled_actions.keys()) == {'training_step_and_backward', 'validation_step'} else: assert profiler.summary() is None assert set(profiler.profiled_actions.keys()) == set() - + if use_output_filename: profiler.describe() data = Path(profiler.output_fname).read_text() From e9866bbc2b43943787d782d412b737e51e738d1b Mon Sep 17 00:00:00 2001 From: chaton Date: Mon, 25 Jan 2021 15:22:54 +0000 Subject: [PATCH 25/28] Update pytorch_lightning/profiler/__init__.py Co-authored-by: Sean Naren --- pytorch_lightning/profiler/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pytorch_lightning/profiler/__init__.py b/pytorch_lightning/profiler/__init__.py index 63b4b013fdbf0..9d12deb31470b 100644 --- a/pytorch_lightning/profiler/__init__.py +++ b/pytorch_lightning/profiler/__init__.py @@ -145,6 +145,8 @@ def custom_processing_step(self, data): The output below shows the profiling for the action `training_step_and_backward`. The user can provide ``PyTorchProfiler(profiled_functions=[...])`` to extend the scope of profiled functions. +.. note:: When using the PyTorch Profiler, wall clock time will not not be representative of the true wall clock time. This is due to forcing profiled operations to be measured synchronously, when many CUDA ops happen asynchronously. It is recommended to use this Profiler to find bottlenecks/breakdowns, however for end to end wall clock time use the `SimpleProfiler`. + .. code-block:: python Profiler Report From d65beee811daeb1abaedc37d348dcfa0be4cf6d4 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 25 Jan 2021 15:30:29 +0000 Subject: [PATCH 26/28] resolve tests --- pytorch_lightning/profiler/profilers.py | 18 ++++++++++-------- tests/trainer/test_trainer.py | 2 +- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/pytorch_lightning/profiler/profilers.py b/pytorch_lightning/profiler/profilers.py index 28478e7c66fed..4f881f3e217ff 100644 --- a/pytorch_lightning/profiler/profilers.py +++ b/pytorch_lightning/profiler/profilers.py @@ -100,7 +100,7 @@ def describe(self) -> None: def summary(self) -> str: """Create profiler summary in text format.""" - def on_train_start(self, local_rank: int): + def on_train_start(self, local_rank: Optional[int] = None): self.local_rank = local_rank @@ -409,15 +409,15 @@ def __init__( self.output_fname = output_filename self.output_file = None - self.local_rank = local_rank - if self.local_rank is not None: - self.on_fit_start(self.local_rank) - self.on_fit_start = super().on_fit_start + if local_rank is not None: + self.on_train_start(local_rank=local_rank) + self.on_train_start = super().on_train_start def on_train_start(self, local_rank: Optional[str] = None): self.local_rank = local_rank - if local_rank != 0: + # when logging to `log.info`, only perform profiling on rank 0 + if local_rank != 0 and self.output_fname is None: self.wrap_functions_into_rank_zero_only() if self.output_fname: @@ -504,6 +504,8 @@ def summary(self) -> str: recorded_stats = {} output_string = '' + local_rank = '0' if self.local_rank is None else self.local_rank + if not self.enabled: return output_string @@ -515,7 +517,7 @@ def summary(self) -> str: function_events.populate_cpu_children = lambda: None if self.export_to_chrome: - filename = f"{action_name}_{self.local_rank}_trace.json" + filename = f"{action_name}_{local_rank}_trace.json" path_to_trace = filename if self.path_to_export_trace is None \ else os.path.join(self.path_to_export_trace, filename) function_events.export_chrome_trace(path_to_trace) @@ -534,7 +536,7 @@ def summary(self) -> str: output_string = f"{os.linesep}Profiler Report{os.linesep}" for action, stats in recorded_stats.items(): output_string += ( - f"{os.linesep}Profile stats for: {action} rank: {self.local_rank} {os.linesep}{stats}" + f"{os.linesep}Profile stats for: {action} rank: {local_rank} {os.linesep}{stats}" ) return output_string diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 26b7befe0e974..b9723878adad5 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -42,7 +42,7 @@ @pytest.fixture def pytorch_profiler(tmpdir): - profiler = PyTorchProfiler(output_filename=os.path.join(tmpdir, "profiler.txt")) + profiler = PyTorchProfiler(output_filename=os.path.join(tmpdir, "profiler.txt"), local_rank=0) return profiler From 8338c5ef398395da628d285d7b72a80bf2f412a8 Mon Sep 17 00:00:00 2001 From: tchaton Date: Mon, 25 Jan 2021 15:34:46 +0000 Subject: [PATCH 27/28] resolve flake8 --- pytorch_lightning/core/memory.py | 7 +++++-- pytorch_lightning/profiler/__init__.py | 2 +- pytorch_lightning/profiler/profilers.py | 3 +-- tests/core/test_memory.py | 7 +++++-- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/pytorch_lightning/core/memory.py b/pytorch_lightning/core/memory.py index 4c1710cd36de0..9c30d6c5d6270 100644 --- a/pytorch_lightning/core/memory.py +++ b/pytorch_lightning/core/memory.py @@ -16,7 +16,7 @@ import shutil import subprocess from collections import OrderedDict -from typing import Tuple, Dict, Union, List, Any +from typing import Any, Dict, List, Tuple, Union import numpy as np import torch @@ -182,7 +182,8 @@ def __init__(self, model, mode: str = MODE_DEFAULT): self._model = model self._mode = mode self._layer_summary = self.summarize() - self._precision_megabytes = (self._model.precision / 8.0) * 1e-6 # 1 byte -> 8 bits + # 1 byte -> 8 bits + self._precision_megabytes = (self._model.precision / 8.0) * 1e-6 @property def named_modules(self) -> List[Tuple[str, nn.Module]]: @@ -389,9 +390,11 @@ def get_gpu_memory_map() -> Dict[str, int]: } return gpu_memory_map + def get_formatted_model_size(total_model_size: float) -> float: return f"{total_model_size:,.3f}" + def get_human_readable_count(number: int) -> str: """ Abbreviates an integer number with K, M, B, T for thousands, millions, diff --git a/pytorch_lightning/profiler/__init__.py b/pytorch_lightning/profiler/__init__.py index 9d12deb31470b..546ed45e18263 100644 --- a/pytorch_lightning/profiler/__init__.py +++ b/pytorch_lightning/profiler/__init__.py @@ -145,7 +145,7 @@ def custom_processing_step(self, data): The output below shows the profiling for the action `training_step_and_backward`. The user can provide ``PyTorchProfiler(profiled_functions=[...])`` to extend the scope of profiled functions. -.. note:: When using the PyTorch Profiler, wall clock time will not not be representative of the true wall clock time. This is due to forcing profiled operations to be measured synchronously, when many CUDA ops happen asynchronously. It is recommended to use this Profiler to find bottlenecks/breakdowns, however for end to end wall clock time use the `SimpleProfiler`. +.. note:: When using the PyTorch Profiler, wall clock time will not not be representative of the true wall clock time. This is due to forcing profiled operations to be measured synchronously, when many CUDA ops happen asynchronously. It is recommended to use this Profiler to find bottlenecks/breakdowns, however for end to end wall clock time use the `SimpleProfiler`. # noqa E501 .. code-block:: python diff --git a/pytorch_lightning/profiler/profilers.py b/pytorch_lightning/profiler/profilers.py index 4f881f3e217ff..a1221524faf4b 100644 --- a/pytorch_lightning/profiler/profilers.py +++ b/pytorch_lightning/profiler/profilers.py @@ -503,8 +503,7 @@ def stop(self, action_name: str) -> None: def summary(self) -> str: recorded_stats = {} output_string = '' - - local_rank = '0' if self.local_rank is None else self.local_rank + local_rank = '0' if self.local_rank is None else self.local_rank if not self.enabled: return output_string diff --git a/tests/core/test_memory.py b/tests/core/test_memory.py index 699b248013020..7e83c928b31e2 100644 --- a/tests/core/test_memory.py +++ b/tests/core/test_memory.py @@ -40,8 +40,10 @@ class PreCalculatedModel(BoringModel): def __init__(self, precision: int = 32): super().__init__() - self.layer = nn.Linear(32, 1000, bias=False) # 32K params - self.layer1 = nn.Linear(1000, 218, bias=False) # 218K params + # 32K params + self.layer = nn.Linear(32, 1000, bias=False) + # 218K params + self.layer1 = nn.Linear(1000, 218, bias=False) # calculate model size based on precision. self.pre_calculated_model_size = 1.0 / (32 / precision) @@ -50,6 +52,7 @@ def forward(self, x): x = self.layer(x) return self.layer1(x) + class UnorderedModel(LightningModule): """ A model in which the layers not defined in order of execution """ From 9ae56ccd501c93c54634a9144bf1ddad2b2c6451 Mon Sep 17 00:00:00 2001 From: tchaton Date: Tue, 26 Jan 2021 08:56:23 +0000 Subject: [PATCH 28/28] resolve flake8 --- tests/base/model_test_steps.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/base/model_test_steps.py b/tests/base/model_test_steps.py index dfbbd7d2d31e6..db70959bfddef 100644 --- a/tests/base/model_test_steps.py +++ b/tests/base/model_test_steps.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import random from abc import ABC from collections import OrderedDict