From 99a94dc8c6ce41e079fb9092c0981680c2b8f028 Mon Sep 17 00:00:00 2001 From: Justus Schock <12886177+justusschock@users.noreply.github.com> Date: Wed, 1 Apr 2020 16:30:56 +0200 Subject: [PATCH 01/44] Create utils.py --- pytorch_lightning/metrics/utils.py | 131 +++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100644 pytorch_lightning/metrics/utils.py diff --git a/pytorch_lightning/metrics/utils.py b/pytorch_lightning/metrics/utils.py new file mode 100644 index 0000000000000..9942545273546 --- /dev/null +++ b/pytorch_lightning/metrics/utils.py @@ -0,0 +1,131 @@ +import numbers +from typing import Union, Any, Optional + +import numpy as np +import torch +from torch.utils.data._utils.collate import default_convert + +from pytorch_lightning.utilities.apply_to_collection import apply_to_collection + + +def _apply_to_inputs(func_to_apply, *dec_args, **dec_kwargs): + def decorator_fn(func_to_decorate): + def new_func(*args, **kwargs): + args = func_to_apply(args, *dec_args, **dec_kwargs) + kwargs = func_to_apply(kwargs, *dec_args, **dec_kwargs) + return func_to_decorate(*args, **kwargs) + + return new_func + + return decorator_fn + + +def _apply_to_outputs(func_to_apply, *dec_args, **dec_kwargs): + def decorator_fn(function_to_decorate): + def new_func(*args, **kwargs): + result = function_to_decorate(*args, **kwargs) + return func_to_apply(result, *dec_args, **dec_kwargs) + + return new_func + + return decorator_fn + + +def _convert_to_tensor(data: Any) -> Any: + """ + Maps all kind of collections and numbers to tensors + + Args: + data: the data to convert to tensor + + Returns: + the converted data + + """ + if isinstance(data, numbers.Number): + return torch.tensor([data]) + + else: + return default_convert(data) + + +def _convert_to_numpy(data: Union[torch.Tensor, np.ndarray, numbers.Number]) -> np.ndarray: + """ + converts all tensors and numpy arrays to numpy arrays + Args: + data: the tensor or array to convert to numpy + + Returns: + the resulting numpy array + + """ + if isinstance(data, torch.Tensor): + return data.cpu().detach().numpy() + elif isinstance(data, numbers.Number): + return np.array([data]) + return data + + +def _numpy_metric_conversion(func_to_decorate): + # Applies collection conversion from tensor to numpy to all inputs + # we need to include numpy arrays here, since otherwise they will also be treated as sequences + func_convert_inputs = _apply_to_inputs( + apply_to_collection, (torch.Tensor, np.ndarray, numbers.Number), _convert_to_numpy)(func_to_decorate) + # converts all inputs back to tensors (device doesn't matter here, since this is handled by BaseMetric) + func_convert_in_out = _apply_to_outputs(_convert_to_tensor)(func_convert_inputs) + return func_convert_in_out + + +def _tensor_metric_conversion(func_to_decorate): + # Converts all inputs to tensor if possible + func_convert_inputs = _apply_to_inputs(_convert_to_tensor)(func_to_decorate) + # convert all outputs to tensor if possible + return _apply_to_outputs(_convert_to_tensor)(func_convert_inputs) + + +def _sync_ddp(result: Union[torch.Tensor], + group: Any = torch.distributed.group.WORLD, + reduce_op: torch.distributed.ReduceOp = torch.distributed.ReduceOp.SUM, + ) -> torch.Tensor: + """ + Function to reduce the tensors from several ddp processes to one master process + + Args: + result: the value to sync and reduce (typically tensor or number) + device: the device to put the synced and reduced value to + dtype: the datatype to convert the synced and reduced value to + group: the process group to gather results from. Defaults to all processes (world) + reduce_op: the reduction operation. Defaults to sum + + Returns: + reduced value + + """ + + if torch.distributed.is_available() and torch.distributed.is_initialized(): + # sync all processes before reduction + torch.distributed.barrier(group=group) + torch.distributed.all_reduce(result, op=reduce_op, group=group, + async_op=False) + + return result + + +def numpy_metric(group: Any = torch.distributed.group.WORLD, + reduce_op: torch.distributed.ReduceOp = torch.distributed.ReduceOp.SUM): + def decorator_fn(func_to_decorate): + return _apply_to_outputs(apply_to_collection, torch.Tensor, _sync_ddp, + group=group, + reduce_op=reduce_op)(_numpy_metric_conversion(func_to_decorate)) + + return decorator_fn + + +def tensor_metric(group: Any = torch.distributed.group.WORLD, + reduce_op: torch.distributed.ReduceOp = torch.distributed.ReduceOp.SUM): + def decorator_fn(func_to_decorate): + return _apply_to_outputs(apply_to_collection, torch.Tensor, _sync_ddp, + group=group, + reduce_op=reduce_op)(_tensor_metric_conversion(func_to_decorate)) + + return decorator_fn From 4f546b5d7c46e32faf8d9c8722a9ca6467a4c683 Mon Sep 17 00:00:00 2001 From: Justus Schock <12886177+justusschock@users.noreply.github.com> Date: Wed, 1 Apr 2020 16:32:01 +0200 Subject: [PATCH 02/44] Create __init__.py --- tests/metrics/__init__.py | 205 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 205 insertions(+) diff --git a/tests/metrics/__init__.py b/tests/metrics/__init__.py index e69de29bb2d1d..a6dfcf8be94a4 100644 --- a/tests/metrics/__init__.py +++ b/tests/metrics/__init__.py @@ -0,0 +1,205 @@ +import numpy as np +import pytest +import torch +import torch.distributed as dist + +import tests.base.utils as tutils +from pytorch_lightning.metrics.utils import _apply_to_inputs, _apply_to_outputs, \ + _convert_to_tensor, _convert_to_numpy, _numpy_metric_conversion, \ + _tensor_metric_conversion, _sync_ddp, tensor_metric, numpy_metric + + +def test_apply_to_inputs(): + def apply_fn(inputs, factor): + if isinstance(inputs, (float, int)): + return inputs * factor + elif isinstance(inputs, dict): + return {k: apply_fn(v, factor) for k, v in inputs.items()} + elif isinstance(inputs, (tuple, list)): + return [apply_fn(x, factor) for x in inputs] + + @_apply_to_inputs(apply_fn, factor=2.) + def test_fn(*args, **kwargs): + return args, kwargs + + for args in [[], [1., 2.]]: + for kwargs in [{}, {1., 2.}]: + result_args, result_kwargs = test_fn(*args, **kwargs) + assert isinstance(result_args, list) + assert isinstance(result_kwargs, dict) + assert len(result_args) == len(args) + assert len(result_kwargs) == len(kwargs) + assert all([k in result_kwargs for k in kwargs.keys()]) + for arg, result_arg in zip(args, result_args): + assert arg * 2. == result_arg + + for key in kwargs.keys(): + arg = kwargs[key], + result_arg = result_kwargs[key] + assert arg * 2. == result_arg + + +def test_apply_to_outputs(): + def apply_fn(inputs, additional_str): + return str(inputs) + additional_str + + @_apply_to_outputs(apply_fn, additional_str='_str') + def test_fn(*args, **kwargs): + return 'dummy' + + assert test_fn() == 'dummy_str' + + +def test_convert_to_tensor(): + for test_item in [1., np.array([1.])]: + assert isinstance(_convert_to_tensor(test_item), torch.Tensor) + assert test_item.item() == 1. + + +def test_convert_to_numpy(): + for test_item in [1., torch.tensor([1.])]: + result = _convert_to_numpy(test_item) + assert isinstance(result, np.ndarray) + assert result.item() == 1. + + +def test_numpy_metric_conversion(): + @_numpy_metric_conversion + def numpy_test_metric(*args, **kwargs): + for arg in args: + assert isinstance(arg, np.ndarray) + + for v in kwargs.values(): + assert isinstance(v, np.ndarray) + + return 5. + + result = numpy_test_metric(torch.tensor([1.]), dummy_kwarg=2.) + assert isinstance(result, torch.Tensor) + assert result.item() == 5. + + +def test_tensor_metric_conversion(): + @_tensor_metric_conversion + def tensor_test_metric(*args, **kwargs): + for arg in args: + assert isinstance(arg, torch.Tensor) + + for v in kwargs.values(): + assert isinstance(v, torch.Tensor) + + return 5. + + result = tensor_test_metric(np.array([1.]), dummy_kwarg=2.) + assert isinstance(result, torch.Tensor) + assert result.item() == 5. + + +@pytest.mark.skipif(torch.cuda.device_count() < 2, "test requires multi-GPU machine") +def test_sync_reduce_ddp(): + """Make sure sync-reduce works with DDP""" + tutils.reset_seed() + tutils.set_random_master_port() + + dist.init_process_group('gloo') + + tensor = torch.tensor([1.], device='cuda:0') + + reduced_tensor = _sync_ddp(tensor) + + assert reduced_tensor.item() == dist.get_world_size(), \ + 'Sync-Reduce does not work properly with DDP and Tensors' + + number = 1. + reduced_number = _sync_ddp(number) + assert isinstance(reduced_number, torch.Tensor), 'When reducing a number we should get a tensor out' + assert reduced_number.item() == dist.get_world_size(), \ + 'Sync-Reduce does not work properly with DDP and Numbers' + + dist.destroy_process_group() + + +def test_sync_reduce_simple(): + """Make sure sync-reduce works without DDP""" + tensor = torch.tensor([1.], device='cpu') + + reduced_tensor = _sync_ddp(tensor) + + assert torch.allclose(tensor, + reduced_tensor), 'Sync-Reduce does not work properly without DDP and Tensors' + + number = 1. + + reduced_number = _sync_ddp(number) + assert isinstance(reduced_number, torch.Tensor), 'When reducing a number we should get a tensor out' + assert reduced_number.item() == number, 'Sync-Reduce does not work properly without DDP and Numbers' + + +def _test_tensor_metric(is_ddp: bool): + @tensor_metric() + def tensor_test_metric(*args, **kwargs): + for arg in args: + assert isinstance(arg, torch.Tensor) + + for v in kwargs.values(): + assert isinstance(v, torch.Tensor) + + return 5. + + if is_ddp: + factor = dist.get_world_size() + else: + factor = 1. + + result = tensor_test_metric(np.array([1.]), dummy_kwarg=2.) + assert isinstance(result, torch.Tensor) + assert result.item() == 5. * factor + + +@pytest.mark.skipif(torch.cuda.device_count() < 2, "test requires multi-GPU machine") +def test_tensor_metric_ddp(): + tutils.reset_seed() + tutils.set_random_master_port() + + dist.init_process_group('gloo') + _test_tensor_metric(True) + dist.destroy_process_group() + + +def test_tensor_metric_simple(): + _test_tensor_metric(False) + + +def _test_numpy_metric(is_ddp: bool): + @numpy_metric() + def numpy_test_metric(*args, **kwargs): + for arg in args: + assert isinstance(arg, np.ndarray) + + for v in kwargs.values(): + assert isinstance(v, np.ndarray) + + return 5. + + if is_ddp: + factor = dist.get_world_size() + else: + factor = 1. + + result = numpy_test_metric(torch.tensor([1.]), dummy_kwarg=2.) + assert isinstance(result, torch.Tensor) + assert result.item() == 5. * factor + + +@pytest.mark.skipif(torch.cuda.device_count() < 2, "test requires multi-GPU machine") +def test_numpy_metric_ddp(): + tutils.reset_seed() + tutils.set_random_master_port() + + dist.init_process_group('gloo') + _test_tensor_metric(True) + dist.destroy_process_group() + + +def test_numpy_metric_simple(): + _test_tensor_metric(False) From ae19aa897fe50eda66964170e156d1bb50551127 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Wed, 1 Apr 2020 16:36:58 +0200 Subject: [PATCH 03/44] redo sklearn metrics --- environment.yml | 2 + pytorch_lightning/metrics/sklearn.py | 141 +++++++++++++++++++++++++++ requirements-extra.txt | 3 +- 3 files changed, 145 insertions(+), 1 deletion(-) create mode 100644 pytorch_lightning/metrics/sklearn.py diff --git a/environment.yml b/environment.yml index f2718a99c3a45..cad6c002d1a4d 100644 --- a/environment.yml +++ b/environment.yml @@ -26,6 +26,8 @@ dependencies: - autopep8 - check-manifest - twine==1.13.0 + - pillow<7.0.0 + - scikit-learn>=0.16.1 - pip: - test-tube>=0.7.5 diff --git a/pytorch_lightning/metrics/sklearn.py b/pytorch_lightning/metrics/sklearn.py new file mode 100644 index 0000000000000..d4875b25bfe5b --- /dev/null +++ b/pytorch_lightning/metrics/sklearn.py @@ -0,0 +1,141 @@ +from typing import Any, Optional, Union + +import numpy as np + +import torch + +from pytorch_lightning import _logger as lightning_logger +from pytorch_lightning.metrics.metric import NumpyMetric + + +class SklearnMetric(NumpyMetric): + def __init__(self, metric_name: str, + reduce_group: Any = torch.distributed.group.WORLD, + reduce_op: Any = torch.distributed.ReduceOp.SUM, **kwargs): + """ + Bridge between PyTorch Lightning and scikit-learn metrics + + .. warning:: + Every metric call will cause a GPU synchronization, which may slow down your code + + .. note:: + The order of targets and predictions may be different from the order typically used in PyTorch + + Args: + metric_name: the metric name to import anc compute from scikit-learn.metrics + reduce_group: the process group for DDP reduces (only needed for DDP training). + Defaults to all processes (world) + reduce_op: the operation to perform during reduction within DDP (only needed for DDP training). + Defaults to sum. + **kwargs: additonal keyword arguments (will be forwarded to metric call) + """ + super().__init__(name=metric_name, reduce_group=reduce_group, + reduce_op=reduce_op) + + self.metric_kwargs = kwargs + + lightning_logger.debug( + 'Every metric call will cause a GPU synchronization, which may slow down your code') + + @property + def metric_fn(self): + import sklearn.metrics + return getattr(sklearn.metrics, self.name) + + def forward(self, *args, **kwargs) -> Union[np.ndarray, int, float]: + """ + Carries the actual metric computation and therefore co + Args: + *args: Positional arguments forwarded to metric call (should be already converted to numpy) + **kwargs: keyword arguments forwarded to metric call (should be already converted to numpy) + + Returns: + the metric value (will be converted to tensor by baseclass + + """ + return self.metric_fn(*args, **kwargs) + + +# metrics : accuracy, auc, average_precision (AP), confusion_matrix, f1, fbeta, hamm, precision, recall, precision_recall_curve, roc, roc_auc, r2, jaccard + +class Accuracy(SklearnMetric): + def __init__(self, normalize: bool = True, + reduce_group: Any = torch.distributed.group.WORLD, + reduce_op: Any = torch.distributed.ReduceOp.SUM): + """ + Calculates the Accuracy Score + + .. warning:: + Every metric call will cause a GPU synchronization, which may slow down your code + + Args: + normalize: If ``False``, return the number of correctly classified samples. + Otherwise, return the fraction of correctly classified samples. + reduce_group: the process group for DDP reduces (only needed for DDP training). + Defaults to all processes (world) + reduce_op: the operation to perform during reduction within DDP (only needed for DDP training). + Defaults to sum. + """ + super().__init__(metric_name='accuracy_score', + reduce_group=reduce_group, + reduce_op=reduce_op, + normalize=normalize) + + def forward(self, y_pred: np.ndarray, y_true: np.ndarray, + sample_weight: Optional[np.ndarray] = None) -> float: + """ + Computes the accuracy + Args: + y_pred: the array containing the predictions (already in categorical form) + y_true: the array containing the targets (in categorical form) + sample_weight: + + Returns: + Accuracy Score + + + """ + return super().forward(y_pred=y_pred, y_true=y_true, sample_weight=sample_weight) + +class AUC(SklearnMetric): + def __init__(self, reorder: bool = False, + reduce_group: Any = torch.distributed.group.WORLD, + reduce_op: Any = torch.distributed.ReduceOp.SUM + ): + """ + Calculates the Area Under the Curve using the trapoezoidal rule + + .. warning:: + Every metric call will cause a GPU synchronization, which may slow down your code + + Args: + reorder: If ``True``, assume that the curve is ascending in the case of ties, as for an ROC curve. + If the curve is non-ascending, the result will be wrong. + reduce_group: the process group for DDP reduces (only needed for DDP training). + Defaults to all processes (world) + reduce_op: the operation to perform during reduction within DDP (only needed for DDP training). + Defaults to sum. + """ + + super().__init__(metric_name='auc', + reduce_group=reduce_group, + reduce_op=reduce_op, + reorder=reorder) + + def forward(self, x: np.ndarray, y: np.ndarray) -> float: + """ + Computes the AUC + Args: + x: x coordinates. + y: y coordinates. + + Returns: + AUC calculated with trapezoidal rule + + """ + return super().forward(x=x, y=y) + + + + + diff --git a/requirements-extra.txt b/requirements-extra.txt index 30bc84ab5190b..cdc10043bc858 100644 --- a/requirements-extra.txt +++ b/requirements-extra.txt @@ -9,4 +9,5 @@ trains>=0.14.1 matplotlib>=3.1.1 # no need to install with [pytorch] as pytorch is already installed and torchvision is required only for Horovod examples horovod>=0.19.1 -omegaconf==2.0.0 \ No newline at end of file +omegaconf>=2.0.0 +scikit-learn>=0.16.1 From fec66b4237ec02e5182670b83910c9219a94d615 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Fri, 3 Apr 2020 14:06:55 +0200 Subject: [PATCH 04/44] add some more metrics --- pytorch_lightning/metrics/sklearn.py | 199 ++++++++++++++++++++++++++- 1 file changed, 193 insertions(+), 6 deletions(-) diff --git a/pytorch_lightning/metrics/sklearn.py b/pytorch_lightning/metrics/sklearn.py index d4875b25bfe5b..d9a8c24d2dd62 100644 --- a/pytorch_lightning/metrics/sklearn.py +++ b/pytorch_lightning/metrics/sklearn.py @@ -1,7 +1,6 @@ -from typing import Any, Optional, Union +from typing import Any, Optional, Union, Sequence import numpy as np - import torch from pytorch_lightning import _logger as lightning_logger @@ -44,7 +43,7 @@ def metric_fn(self): def forward(self, *args, **kwargs) -> Union[np.ndarray, int, float]: """ - Carries the actual metric computation and therefore co + Carries the actual metric computation Args: *args: Positional arguments forwarded to metric call (should be already converted to numpy) **kwargs: keyword arguments forwarded to metric call (should be already converted to numpy) @@ -53,10 +52,8 @@ def forward(self, *args, **kwargs) -> Union[np.ndarray, int, float]: the metric value (will be converted to tensor by baseclass """ - return self.metric_fn(*args, **kwargs) - + return self.metric_fn(*args, **kwargs, **self.metric_kwargs) -# metrics : accuracy, auc, average_precision (AP), confusion_matrix, f1, fbeta, hamm, precision, recall, precision_recall_curve, roc, roc_auc, r2, jaccard class Accuracy(SklearnMetric): def __init__(self, normalize: bool = True, @@ -97,6 +94,7 @@ def forward(self, y_pred: np.ndarray, y_true: np.ndarray, """ return super().forward(y_pred=y_pred, y_true=y_true, sample_weight=sample_weight) + class AUC(SklearnMetric): def __init__(self, reorder: bool = False, reduce_group: Any = torch.distributed.group.WORLD, @@ -136,6 +134,195 @@ def forward(self, x: np.ndarray, y: np.ndarray) -> float: return super().forward(x=x, y=y) +class AveragePrecision(SklearnMetric): + def __init__(self, average: Optional[str] = 'macro', + reduce_group: Any = torch.distributed.group.WORLD, + reduce_op: Any = torch.distributed.ReduceOp.SUM + ): + """ + Calculates the average precision (AP) score. + Args: + average: If None, the scores for each class are returned. Otherwise, this determines the type of + averaging performed on the data: + * If 'micro': Calculate metrics globally by considering each element of the label indicator + matrix as a label. + * If 'macro': Calculate metrics for each label, and find their unweighted mean. + This does not take label imbalance into account. + * If 'weighted': Calculate metrics for each label, and find their average, weighted by + support (the number of true instances for each label). + * If 'samples': Calculate metrics for each instance, and find their average. + reduce_group: the process group for DDP reduces (only needed for DDP training). + Defaults to all processes (world) + reduce_op: the operation to perform during reduction within DDP (only needed for DDP training). + Defaults to sum. + """ + super().__init__('average_precision_score', + reduce_group=reduce_group, + reduce_op=reduce_op, + average=average) + + def forward(self, y_score: np.ndarray, y_true: np.ndarray, + sample_weight: Optional[np.ndarray] = None) -> float: + """ + + Args: + y_score: Target scores, can either be probability estimates of the positive class, + confidence values, or binary decisions. + y_true: True binary labels in binary label indicators. + sample_weight: Sample weights. + Returns: + average precision score + """ + return super().forward(y_score=y_score, y_true=y_true, + sample_weight=sample_weight) + + +class ConfusionMatric(SklearnMetric): + def __init__(self, labels: Optional[Sequence] = None, + reduce_group: Any = torch.distributed.group.WORLD, + reduce_op: Any = torch.distributed.ReduceOp.SUM + ): + """ + Compute confusion matrix to evaluate the accuracy of a classification + By definition a confusion matrix :math:`C` is such that :math:`C_{i, j}` + is equal to the number of observations known to be in group :math:`i` but + predicted to be in group :math:`j`. + + Args: + labels: List of labels to index the matrix. This may be used to reorder + or select a subset of labels. + If none is given, those that appear at least once + in ``y_true`` or ``y_pred`` are used in sorted order. + reduce_group: the process group for DDP reduces (only needed for DDP training). + Defaults to all processes (world) + reduce_op: the operation to perform during reduction within DDP (only needed for DDP training). + Defaults to sum. + """ + super().__init__('confusion_matrix', + reduce_group=reduce_group, + reduce_op=reduce_op, + labels=labels) + + def forward(self, y_pred: np.ndarray, y_true: np.ndarray) -> np.ndarray: + """ + + Args: + y_pred: Estimated targets as returned by a classifier. + y_true: Ground truth (correct) target values. + + Returns: Confusion matrix (array of shape [n_classes, n_classes]) + + """ + return super().forward(y_pred=y_pred, y_true=y_true) + + +class F1(SklearnMetric): + """ + Compute the F1 score, also known as balanced F-score or F-measure + The F1 score can be interpreted as a weighted average of the precision and + recall, where an F1 score reaches its best value at 1 and worst score at 0. + The relative contribution of precision and recall to the F1 score are + equal. The formula for the F1 score is:: + F1 = 2 * (precision * recall) / (precision + recall) + In the multi-class and multi-label case, this is the weighted average of + the F1 score of each class. + + References: + .. [1] `Wikipedia entry for the F1-score + `_ + """ + + def __init__(self, labels: Optional[Sequence] = None, + pos_labels: Union[str, int] = 1, + average: Optional[str] = 'binary', + reduce_group: Any = torch.distributed.group.WORLD, + reduce_op: Any = torch.distributed.ReduceOp.SUM): + """ + + Args: + labels: Integer array of labels. + pos_labels: The class to report if ``average='binary'``. + average: This parameter is required for multiclass/multilabel targets. + If ``None``, the scores for each class are returned. Otherwise, this + determines the type of averaging performed on the data: + ``'binary'``: + Only report results for the class specified by ``pos_label``. + This is applicable only if targets (``y_{true,pred}``) are binary. + ``'micro'``: + Calculate metrics globally by counting the total true positives, + false negatives and false positives. + ``'macro'``: + Calculate metrics for each label, and find their unweighted + mean. This does not take label imbalance into account. + ``'weighted'``: + Calculate metrics for each label, and find their average, weighted + by support (the number of true instances for each label). This + alters 'macro' to account for label imbalance; it can result in an + F-score that is not between precision and recall. + ``'samples'``: + Calculate metrics for each instance, and find their average (only + meaningful for multilabel classification where this differs from + :func:`accuracy_score`). + Note that if ``pos_label`` is given in binary classification with + `average != 'binary'`, only that positive class is reported. This + behavior is deprecated and will change in version 0.18. + reduce_group: the process group for DDP reduces (only needed for DDP training). + Defaults to all processes (world) + reduce_op: the operation to perform during reduction within DDP (only needed for DDP training). + Defaults to sum. + """ + super().__init__('f1_score', + reduce_group=reduce_group, + reduce_op=reduce_op, + labels=labels, + pos_labels=pos_labels, + average=average) + + def forward(self, y_pred: np.ndarray, y_true: np.ndarray, + sample_weight: Optional[np.ndarray] = None) -> Union[np.ndarray, float]: + """ + + Args: + y_pred : Estimated targets as returned by a classifier. + y_true: Ground truth (correct) target values. + sample_weight: Sample weights. + + + Returns: F1 score of the positive class in binary classification or weighted + average of the F1 scores of each class for the multiclass task. + + """ + return super().forward(y_pred=y_pred, y_true=y_true, sample_weight=sample_weight) + + +class FBeta(SklearnMetric): + + pass + + +class Precision(SklearnMetric): + pass + + +class Recall(SklearnMetric): + pass + + +class PrecisionRecallCurve(SklearnMetric): + pass + + +class ROC(SklearnMetric): + pass + + +class AUROC(SklearnMetric): + pass + +class R2(SklearnMetric): + pass +class Jaccard(SklearnMetric): + pass From 08ad7b0756eeb2c11884fc632216cfcdaa6d4c76 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Mon, 13 Apr 2020 12:56:43 +0200 Subject: [PATCH 05/44] add sklearn metrics --- pytorch_lightning/metrics/sklearn.py | 385 ++++++++++++++++++++++++++- 1 file changed, 373 insertions(+), 12 deletions(-) diff --git a/pytorch_lightning/metrics/sklearn.py b/pytorch_lightning/metrics/sklearn.py index d9a8c24d2dd62..d084f18e810b8 100644 --- a/pytorch_lightning/metrics/sklearn.py +++ b/pytorch_lightning/metrics/sklearn.py @@ -177,7 +177,7 @@ def forward(self, y_score: np.ndarray, y_true: np.ndarray, sample_weight=sample_weight) -class ConfusionMatric(SklearnMetric): +class ConfusionMatrix(SklearnMetric): def __init__(self, labels: Optional[Sequence] = None, reduce_group: Any = torch.distributed.group.WORLD, reduce_op: Any = torch.distributed.ReduceOp.SUM @@ -296,33 +296,394 @@ def forward(self, y_pred: np.ndarray, y_true: np.ndarray, class FBeta(SklearnMetric): + """ + Compute the F-beta score.The `beta` parameter determines the weight of precision in the combined + score. ``beta < 1`` lends more weight to precision, while ``beta > 1`` + favors recall (``beta -> 0`` considers only precision, ``beta -> inf`` + only recall). + + References: + .. [1] R. Baeza-Yates and B. Ribeiro-Neto (2011). + Modern Information Retrieval. Addison Wesley, pp. 327-328. + .. [2] `Wikipedia entry for the F1-score + `_ + """ - pass + def __init__(self, beta: float, labels: Optional[Sequence] = None, + pos_labels: Union[str, int] = 1, + average: Optional[str] = 'binary', + reduce_group: Any = torch.distributed.group.WORLD, + reduce_op: Any = torch.distributed.ReduceOp.SUM): + """ + + Args: + beta: Weight of precision in harmonic mean. + labels: Integer array of labels. + pos_labels: The class to report if ``average='binary'``. + average: This parameter is required for multiclass/multilabel targets. + If ``None``, the scores for each class are returned. Otherwise, this + determines the type of averaging performed on the data: + ``'binary'``: + Only report results for the class specified by ``pos_label``. + This is applicable only if targets (``y_{true,pred}``) are binary. + ``'micro'``: + Calculate metrics globally by counting the total true positives, + false negatives and false positives. + ``'macro'``: + Calculate metrics for each label, and find their unweighted + mean. This does not take label imbalance into account. + ``'weighted'``: + Calculate metrics for each label, and find their average, weighted + by support (the number of true instances for each label). This + alters 'macro' to account for label imbalance; it can result in an + F-score that is not between precision and recall. + ``'samples'``: + Calculate metrics for each instance, and find their average (only + meaningful for multilabel classification where this differs from + :func:`accuracy_score`). + Note that if ``pos_label`` is given in binary classification with + `average != 'binary'`, only that positive class is reported. This + behavior is deprecated and will change in version 0.18. + reduce_group: the process group for DDP reduces (only needed for DDP training). + Defaults to all processes (world) + reduce_op: the operation to perform during reduction within DDP (only needed for DDP training). + Defaults to sum. + """ + super().__init__('fbeta_score', + reduce_group=reduce_group, + reduce_op=reduce_op, + beta=beta, + labels=labels, + pos_labels=pos_labels, + average=average) + + def forward(self, y_pred: np.ndarray, y_true: np.ndarray, + sample_weight: Optional[np.ndarray] = None) -> Union[np.ndarray, float]: + """ + + Args: + y_pred : Estimated targets as returned by a classifier. + y_true: Ground truth (correct) target values. + sample_weight: Sample weights. + + + Returns: FBeta score of the positive class in binary classification or weighted + average of the FBeta scores of each class for the multiclass task. + + """ + return super().forward(y_pred=y_pred, y_true=y_true, sample_weight=sample_weight) class Precision(SklearnMetric): - pass + """ + Compute the precision + The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of + true positives and ``fp`` the number of false positives. The precision is + intuitively the ability of the classifier not to label as positive a sample + that is negative. + The best value is 1 and the worst value is 0. + + """ + def __init__(self, labels: Optional[Sequence] = None, + pos_labels: Union[str, int] = 1, + average: Optional[str] = 'binary', + reduce_group: Any = torch.distributed.group.WORLD, + reduce_op: Any = torch.distributed.ReduceOp.SUM): + """ + + Args: + labels: Integer array of labels. + pos_labels: The class to report if ``average='binary'``. + average: This parameter is required for multiclass/multilabel targets. + If ``None``, the scores for each class are returned. Otherwise, this + determines the type of averaging performed on the data: + ``'binary'``: + Only report results for the class specified by ``pos_label``. + This is applicable only if targets (``y_{true,pred}``) are binary. + ``'micro'``: + Calculate metrics globally by counting the total true positives, + false negatives and false positives. + ``'macro'``: + Calculate metrics for each label, and find their unweighted + mean. This does not take label imbalance into account. + ``'weighted'``: + Calculate metrics for each label, and find their average, weighted + by support (the number of true instances for each label). This + alters 'macro' to account for label imbalance; it can result in an + F-score that is not between precision and recall. + ``'samples'``: + Calculate metrics for each instance, and find their average (only + meaningful for multilabel classification where this differs from + :func:`accuracy_score`). + Note that if ``pos_label`` is given in binary classification with + `average != 'binary'`, only that positive class is reported. This + behavior is deprecated and will change in version 0.18. + reduce_group: the process group for DDP reduces (only needed for DDP training). + Defaults to all processes (world) + reduce_op: the operation to perform during reduction within DDP (only needed for DDP training). + Defaults to sum. + """ + super().__init__('precision_score', + reduce_group=reduce_group, + reduce_op=reduce_op, + labels=labels, + pos_labels=pos_labels, + average=average) + + def forward(self, y_pred: np.ndarray, y_true: np.ndarray, + sample_weight: Optional[np.ndarray] = None) -> Union[np.ndarray, float]: + """ + + Args: + y_pred : Estimated targets as returned by a classifier. + y_true: Ground truth (correct) target values. + sample_weight: Sample weights. + + + Returns: Precision of the positive class in binary classification or weighted + average of the precision of each class for the multiclass task. + + """ + return super().forward(y_pred=y_pred, y_true=y_true, sample_weight=sample_weight) class Recall(SklearnMetric): - pass + """ + Compute the recall + The recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of + true positives and ``fn`` the number of false negatives. The recall is + intuitively the ability of the classifier to find all the positive samples. + The best value is 1 and the worst value is 0. + + """ + + def __init__(self, labels: Optional[Sequence] = None, + pos_labels: Union[str, int] = 1, + average: Optional[str] = 'binary', + reduce_group: Any = torch.distributed.group.WORLD, + reduce_op: Any = torch.distributed.ReduceOp.SUM): + """ + + Args: + labels: Integer array of labels. + pos_labels: The class to report if ``average='binary'``. + average: This parameter is required for multiclass/multilabel targets. + If ``None``, the scores for each class are returned. Otherwise, this + determines the type of averaging performed on the data: + ``'binary'``: + Only report results for the class specified by ``pos_label``. + This is applicable only if targets (``y_{true,pred}``) are binary. + ``'micro'``: + Calculate metrics globally by counting the total true positives, + false negatives and false positives. + ``'macro'``: + Calculate metrics for each label, and find their unweighted + mean. This does not take label imbalance into account. + ``'weighted'``: + Calculate metrics for each label, and find their average, weighted + by support (the number of true instances for each label). This + alters 'macro' to account for label imbalance; it can result in an + F-score that is not between precision and recall. + ``'samples'``: + Calculate metrics for each instance, and find their average (only + meaningful for multilabel classification where this differs from + :func:`accuracy_score`). + Note that if ``pos_label`` is given in binary classification with + `average != 'binary'`, only that positive class is reported. This + behavior is deprecated and will change in version 0.18. + reduce_group: the process group for DDP reduces (only needed for DDP training). + Defaults to all processes (world) + reduce_op: the operation to perform during reduction within DDP (only needed for DDP training). + Defaults to sum. + """ + super().__init__('recall_score', + reduce_group=reduce_group, + reduce_op=reduce_op, + labels=labels, + pos_labels=pos_labels, + average=average) + + def forward(self, y_pred: np.ndarray, y_true: np.ndarray, + sample_weight: Optional[np.ndarray] = None) -> Union[np.ndarray, float]: + """ + + Args: + y_pred : Estimated targets as returned by a classifier. + y_true: Ground truth (correct) target values. + sample_weight: Sample weights. + + + Returns: Recall of the positive class in binary classification or weighted + average of the recall of each class for the multiclass task. + + """ + return super().forward(y_pred=y_pred, y_true=y_true, sample_weight=sample_weight) class PrecisionRecallCurve(SklearnMetric): - pass + """ + Compute precision-recall pairs for different probability thresholds + + Note: + this implementation is restricted to the binary classification task. + + The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of + true positives and ``fp`` the number of false positives. The precision is + intuitively the ability of the classifier not to label as positive a sample + that is negative. + The recall is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of + true positives and ``fn`` the number of false negatives. The recall is + intuitively the ability of the classifier to find all the positive samples. + The last precision and recall values are 1. and 0. respectively and do not + have a corresponding threshold. This ensures that the graph starts on the + x axis. + + """ + + def __init__(self, + pos_labels: Union[str, int] = 1, + reduce_group: Any = torch.distributed.group.WORLD, + reduce_op: Any = torch.distributed.ReduceOp.SUM): + """ + + Args: + pos_labels: The class to report if ``average='binary'``. + reduce_group: the process group for DDP reduces (only needed for DDP training). + Defaults to all processes (world) + reduce_op: the operation to perform during reduction within DDP (only needed for DDP training). + Defaults to sum. + """ + super().__init__('precision_recall_curve', + reduce_group=reduce_group, + reduce_op=reduce_op, + pos_labels=pos_labels) + + def forward(self, probas_pred: np.ndarray, y_true: np.ndarray, + sample_weight: Optional[np.ndarray] = None) -> Union[np.ndarray, float]: + """ + + Args: + probas_pred : Estimated probabilities or decision function. + y_true: Ground truth (correct) target values. + sample_weight: Sample weights. + + + Returns: + precision: + Precision values such that element i is the precision of + predictions with score >= thresholds[i] and the last element is 1. + recall: + Decreasing recall values such that element i is the recall of + predictions with score >= thresholds[i] and the last element is 0. + thresholds: + Increasing thresholds on the decision function used to compute + precision and recall. + + """ + return super().forward(probas_pred=probas_pred, y_true=y_true, sample_weight=sample_weight) class ROC(SklearnMetric): - pass + """ + Compute Receiver operating characteristic (ROC) + Note: + this implementation is restricted to the binary classification task. -class AUROC(SklearnMetric): - pass + """ + + def __init__(self, + pos_labels: Union[str, int] = 1, + reduce_group: Any = torch.distributed.group.WORLD, + reduce_op: Any = torch.distributed.ReduceOp.SUM): + """ + + Args: + pos_labels: The class to report if ``average='binary'``. + reduce_group: the process group for DDP reduces (only needed for DDP training). + Defaults to all processes (world) + reduce_op: the operation to perform during reduction within DDP (only needed for DDP training). + Defaults to sum. + References: + .. [1] `Wikipedia entry for the Receiver operating characteristic + `_ + """ + super().__init__('roc_curve', + reduce_group=reduce_group, + reduce_op=reduce_op, + pos_labels=pos_labels) -class R2(SklearnMetric): - pass + def forward(self, y_score: np.ndarray, y_true: np.ndarray, + sample_weight: Optional[np.ndarray] = None) -> Union[np.ndarray, float]: + """ + + Args: + y_score : Target scores, can either be probability estimates of the positive + class or confidence values. + y_true: Ground truth (correct) target values. + sample_weight: Sample weights. + + + Returns: + fpr: + Increasing false positive rates such that element i is the false + positive rate of predictions with score >= thresholds[i]. + tpr: + Increasing true positive rates such that element i is the true + positive rate of predictions with score >= thresholds[i]. + thresholds: + Decreasing thresholds on the decision function used to compute + fpr and tpr. `thresholds[0]` represents no instances being predicted + and is arbitrarily set to `max(y_score) + 1`. + + """ + return super().forward(y_score=y_score, y_true=y_true, sample_weight=sample_weight) + + +class AUROC(SklearnMetric): + """ + Compute Area Under the Curve (AUC) from prediction scores + Note: + this implementation is restricted to the binary classification task + or multilabel classification task in label indicator format. + """ + def __init__(self, average: Optional[str] = 'macro', + reduce_group: Any = torch.distributed.group.WORLD, + reduce_op: Any = torch.distributed.ReduceOp.SUM + ): + """ + Args: + average: If None, the scores for each class are returned. Otherwise, this determines the type of + averaging performed on the data: + * If 'micro': Calculate metrics globally by considering each element of the label indicator + matrix as a label. + * If 'macro': Calculate metrics for each label, and find their unweighted mean. + This does not take label imbalance into account. + * If 'weighted': Calculate metrics for each label, and find their average, weighted by + support (the number of true instances for each label). + * If 'samples': Calculate metrics for each instance, and find their average. + reduce_group: the process group for DDP reduces (only needed for DDP training). + Defaults to all processes (world) + reduce_op: the operation to perform during reduction within DDP (only needed for DDP training). + Defaults to sum. + """ + super().__init__('roc_auc_score', + reduce_group=reduce_group, + reduce_op=reduce_op, + average=average) + def forward(self, y_score: np.ndarray, y_true: np.ndarray, + sample_weight: Optional[np.ndarray] = None) -> float: + """ -class Jaccard(SklearnMetric): - pass + Args: + y_score: Target scores, can either be probability estimates of the positive class, + confidence values, or binary decisions. + y_true: True binary labels in binary label indicators. + sample_weight: Sample weights. + Returns: + Area Under Receiver Operating Characteristic Curve + """ + return super().forward(y_score=y_score, y_true=y_true, + sample_weight=sample_weight) From 2722c08e9ffb0e4ac46519c8f916d3635a64b701 Mon Sep 17 00:00:00 2001 From: Justus Schock <12886177+justusschock@users.noreply.github.com> Date: Fri, 3 Apr 2020 21:10:40 +0200 Subject: [PATCH 06/44] New metric classes (#1326) * Create metrics package * Create metric.py * Create utils.py * Create __init__.py * add tests for metric utils * add docstrings for metrics utils * add function to recursively apply other function to collection * add tests for this function * update test * Update pytorch_lightning/metrics/metric.py Co-Authored-By: Jirka Borovec * update metric name * remove example docs * fix tests * add metric tests * fix to tensor conversion * fix apply to collection * Update CHANGELOG.md * Update pytorch_lightning/metrics/metric.py Co-Authored-By: Jirka Borovec * remove tests from init * add missing type annotations * rename utils to convertors * Create metrics.rst * Update index.rst * Update index.rst * Update pytorch_lightning/metrics/convertors.py Co-Authored-By: Jirka Borovec * Update pytorch_lightning/metrics/convertors.py Co-Authored-By: Jirka Borovec * Update pytorch_lightning/metrics/convertors.py Co-Authored-By: Jirka Borovec * Update pytorch_lightning/metrics/metric.py Co-Authored-By: Jirka Borovec * Update tests/utilities/test_apply_to_collection.py Co-Authored-By: Jirka Borovec * Update tests/utilities/test_apply_to_collection.py Co-Authored-By: Jirka Borovec * Update tests/metrics/convertors.py Co-Authored-By: Jirka Borovec * Apply suggestions from code review Co-Authored-By: Jirka Borovec * add doctest example * rename file and fix imports * added parametrized test * replace lambda with inlined function * rename apply_to_collection to apply_func * Separated class description from init args * Apply suggestions from code review Co-Authored-By: Jirka Borovec * adjust random values * suppress output when seeding * remove gpu from doctest * Add requested changes and add ellipsis for doctest * forgot to push these files... * add explicit check for dtype to convert to * fix ddp tests * remove explicit ddp destruction Co-authored-by: Jirka Borovec --- tests/metrics/__init__.py | 205 -------------------------------------- 1 file changed, 205 deletions(-) diff --git a/tests/metrics/__init__.py b/tests/metrics/__init__.py index a6dfcf8be94a4..e69de29bb2d1d 100644 --- a/tests/metrics/__init__.py +++ b/tests/metrics/__init__.py @@ -1,205 +0,0 @@ -import numpy as np -import pytest -import torch -import torch.distributed as dist - -import tests.base.utils as tutils -from pytorch_lightning.metrics.utils import _apply_to_inputs, _apply_to_outputs, \ - _convert_to_tensor, _convert_to_numpy, _numpy_metric_conversion, \ - _tensor_metric_conversion, _sync_ddp, tensor_metric, numpy_metric - - -def test_apply_to_inputs(): - def apply_fn(inputs, factor): - if isinstance(inputs, (float, int)): - return inputs * factor - elif isinstance(inputs, dict): - return {k: apply_fn(v, factor) for k, v in inputs.items()} - elif isinstance(inputs, (tuple, list)): - return [apply_fn(x, factor) for x in inputs] - - @_apply_to_inputs(apply_fn, factor=2.) - def test_fn(*args, **kwargs): - return args, kwargs - - for args in [[], [1., 2.]]: - for kwargs in [{}, {1., 2.}]: - result_args, result_kwargs = test_fn(*args, **kwargs) - assert isinstance(result_args, list) - assert isinstance(result_kwargs, dict) - assert len(result_args) == len(args) - assert len(result_kwargs) == len(kwargs) - assert all([k in result_kwargs for k in kwargs.keys()]) - for arg, result_arg in zip(args, result_args): - assert arg * 2. == result_arg - - for key in kwargs.keys(): - arg = kwargs[key], - result_arg = result_kwargs[key] - assert arg * 2. == result_arg - - -def test_apply_to_outputs(): - def apply_fn(inputs, additional_str): - return str(inputs) + additional_str - - @_apply_to_outputs(apply_fn, additional_str='_str') - def test_fn(*args, **kwargs): - return 'dummy' - - assert test_fn() == 'dummy_str' - - -def test_convert_to_tensor(): - for test_item in [1., np.array([1.])]: - assert isinstance(_convert_to_tensor(test_item), torch.Tensor) - assert test_item.item() == 1. - - -def test_convert_to_numpy(): - for test_item in [1., torch.tensor([1.])]: - result = _convert_to_numpy(test_item) - assert isinstance(result, np.ndarray) - assert result.item() == 1. - - -def test_numpy_metric_conversion(): - @_numpy_metric_conversion - def numpy_test_metric(*args, **kwargs): - for arg in args: - assert isinstance(arg, np.ndarray) - - for v in kwargs.values(): - assert isinstance(v, np.ndarray) - - return 5. - - result = numpy_test_metric(torch.tensor([1.]), dummy_kwarg=2.) - assert isinstance(result, torch.Tensor) - assert result.item() == 5. - - -def test_tensor_metric_conversion(): - @_tensor_metric_conversion - def tensor_test_metric(*args, **kwargs): - for arg in args: - assert isinstance(arg, torch.Tensor) - - for v in kwargs.values(): - assert isinstance(v, torch.Tensor) - - return 5. - - result = tensor_test_metric(np.array([1.]), dummy_kwarg=2.) - assert isinstance(result, torch.Tensor) - assert result.item() == 5. - - -@pytest.mark.skipif(torch.cuda.device_count() < 2, "test requires multi-GPU machine") -def test_sync_reduce_ddp(): - """Make sure sync-reduce works with DDP""" - tutils.reset_seed() - tutils.set_random_master_port() - - dist.init_process_group('gloo') - - tensor = torch.tensor([1.], device='cuda:0') - - reduced_tensor = _sync_ddp(tensor) - - assert reduced_tensor.item() == dist.get_world_size(), \ - 'Sync-Reduce does not work properly with DDP and Tensors' - - number = 1. - reduced_number = _sync_ddp(number) - assert isinstance(reduced_number, torch.Tensor), 'When reducing a number we should get a tensor out' - assert reduced_number.item() == dist.get_world_size(), \ - 'Sync-Reduce does not work properly with DDP and Numbers' - - dist.destroy_process_group() - - -def test_sync_reduce_simple(): - """Make sure sync-reduce works without DDP""" - tensor = torch.tensor([1.], device='cpu') - - reduced_tensor = _sync_ddp(tensor) - - assert torch.allclose(tensor, - reduced_tensor), 'Sync-Reduce does not work properly without DDP and Tensors' - - number = 1. - - reduced_number = _sync_ddp(number) - assert isinstance(reduced_number, torch.Tensor), 'When reducing a number we should get a tensor out' - assert reduced_number.item() == number, 'Sync-Reduce does not work properly without DDP and Numbers' - - -def _test_tensor_metric(is_ddp: bool): - @tensor_metric() - def tensor_test_metric(*args, **kwargs): - for arg in args: - assert isinstance(arg, torch.Tensor) - - for v in kwargs.values(): - assert isinstance(v, torch.Tensor) - - return 5. - - if is_ddp: - factor = dist.get_world_size() - else: - factor = 1. - - result = tensor_test_metric(np.array([1.]), dummy_kwarg=2.) - assert isinstance(result, torch.Tensor) - assert result.item() == 5. * factor - - -@pytest.mark.skipif(torch.cuda.device_count() < 2, "test requires multi-GPU machine") -def test_tensor_metric_ddp(): - tutils.reset_seed() - tutils.set_random_master_port() - - dist.init_process_group('gloo') - _test_tensor_metric(True) - dist.destroy_process_group() - - -def test_tensor_metric_simple(): - _test_tensor_metric(False) - - -def _test_numpy_metric(is_ddp: bool): - @numpy_metric() - def numpy_test_metric(*args, **kwargs): - for arg in args: - assert isinstance(arg, np.ndarray) - - for v in kwargs.values(): - assert isinstance(v, np.ndarray) - - return 5. - - if is_ddp: - factor = dist.get_world_size() - else: - factor = 1. - - result = numpy_test_metric(torch.tensor([1.]), dummy_kwarg=2.) - assert isinstance(result, torch.Tensor) - assert result.item() == 5. * factor - - -@pytest.mark.skipif(torch.cuda.device_count() < 2, "test requires multi-GPU machine") -def test_numpy_metric_ddp(): - tutils.reset_seed() - tutils.set_random_master_port() - - dist.init_process_group('gloo') - _test_tensor_metric(True) - dist.destroy_process_group() - - -def test_numpy_metric_simple(): - _test_tensor_metric(False) From d7bf19a7320f7b30d27368d4251d34bd9e9c712d Mon Sep 17 00:00:00 2001 From: Justus Schock <12886177+justusschock@users.noreply.github.com> Date: Wed, 1 Apr 2020 16:32:01 +0200 Subject: [PATCH 07/44] Create __init__.py --- tests/metrics/__init__.py | 205 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 205 insertions(+) diff --git a/tests/metrics/__init__.py b/tests/metrics/__init__.py index e69de29bb2d1d..a6dfcf8be94a4 100644 --- a/tests/metrics/__init__.py +++ b/tests/metrics/__init__.py @@ -0,0 +1,205 @@ +import numpy as np +import pytest +import torch +import torch.distributed as dist + +import tests.base.utils as tutils +from pytorch_lightning.metrics.utils import _apply_to_inputs, _apply_to_outputs, \ + _convert_to_tensor, _convert_to_numpy, _numpy_metric_conversion, \ + _tensor_metric_conversion, _sync_ddp, tensor_metric, numpy_metric + + +def test_apply_to_inputs(): + def apply_fn(inputs, factor): + if isinstance(inputs, (float, int)): + return inputs * factor + elif isinstance(inputs, dict): + return {k: apply_fn(v, factor) for k, v in inputs.items()} + elif isinstance(inputs, (tuple, list)): + return [apply_fn(x, factor) for x in inputs] + + @_apply_to_inputs(apply_fn, factor=2.) + def test_fn(*args, **kwargs): + return args, kwargs + + for args in [[], [1., 2.]]: + for kwargs in [{}, {1., 2.}]: + result_args, result_kwargs = test_fn(*args, **kwargs) + assert isinstance(result_args, list) + assert isinstance(result_kwargs, dict) + assert len(result_args) == len(args) + assert len(result_kwargs) == len(kwargs) + assert all([k in result_kwargs for k in kwargs.keys()]) + for arg, result_arg in zip(args, result_args): + assert arg * 2. == result_arg + + for key in kwargs.keys(): + arg = kwargs[key], + result_arg = result_kwargs[key] + assert arg * 2. == result_arg + + +def test_apply_to_outputs(): + def apply_fn(inputs, additional_str): + return str(inputs) + additional_str + + @_apply_to_outputs(apply_fn, additional_str='_str') + def test_fn(*args, **kwargs): + return 'dummy' + + assert test_fn() == 'dummy_str' + + +def test_convert_to_tensor(): + for test_item in [1., np.array([1.])]: + assert isinstance(_convert_to_tensor(test_item), torch.Tensor) + assert test_item.item() == 1. + + +def test_convert_to_numpy(): + for test_item in [1., torch.tensor([1.])]: + result = _convert_to_numpy(test_item) + assert isinstance(result, np.ndarray) + assert result.item() == 1. + + +def test_numpy_metric_conversion(): + @_numpy_metric_conversion + def numpy_test_metric(*args, **kwargs): + for arg in args: + assert isinstance(arg, np.ndarray) + + for v in kwargs.values(): + assert isinstance(v, np.ndarray) + + return 5. + + result = numpy_test_metric(torch.tensor([1.]), dummy_kwarg=2.) + assert isinstance(result, torch.Tensor) + assert result.item() == 5. + + +def test_tensor_metric_conversion(): + @_tensor_metric_conversion + def tensor_test_metric(*args, **kwargs): + for arg in args: + assert isinstance(arg, torch.Tensor) + + for v in kwargs.values(): + assert isinstance(v, torch.Tensor) + + return 5. + + result = tensor_test_metric(np.array([1.]), dummy_kwarg=2.) + assert isinstance(result, torch.Tensor) + assert result.item() == 5. + + +@pytest.mark.skipif(torch.cuda.device_count() < 2, "test requires multi-GPU machine") +def test_sync_reduce_ddp(): + """Make sure sync-reduce works with DDP""" + tutils.reset_seed() + tutils.set_random_master_port() + + dist.init_process_group('gloo') + + tensor = torch.tensor([1.], device='cuda:0') + + reduced_tensor = _sync_ddp(tensor) + + assert reduced_tensor.item() == dist.get_world_size(), \ + 'Sync-Reduce does not work properly with DDP and Tensors' + + number = 1. + reduced_number = _sync_ddp(number) + assert isinstance(reduced_number, torch.Tensor), 'When reducing a number we should get a tensor out' + assert reduced_number.item() == dist.get_world_size(), \ + 'Sync-Reduce does not work properly with DDP and Numbers' + + dist.destroy_process_group() + + +def test_sync_reduce_simple(): + """Make sure sync-reduce works without DDP""" + tensor = torch.tensor([1.], device='cpu') + + reduced_tensor = _sync_ddp(tensor) + + assert torch.allclose(tensor, + reduced_tensor), 'Sync-Reduce does not work properly without DDP and Tensors' + + number = 1. + + reduced_number = _sync_ddp(number) + assert isinstance(reduced_number, torch.Tensor), 'When reducing a number we should get a tensor out' + assert reduced_number.item() == number, 'Sync-Reduce does not work properly without DDP and Numbers' + + +def _test_tensor_metric(is_ddp: bool): + @tensor_metric() + def tensor_test_metric(*args, **kwargs): + for arg in args: + assert isinstance(arg, torch.Tensor) + + for v in kwargs.values(): + assert isinstance(v, torch.Tensor) + + return 5. + + if is_ddp: + factor = dist.get_world_size() + else: + factor = 1. + + result = tensor_test_metric(np.array([1.]), dummy_kwarg=2.) + assert isinstance(result, torch.Tensor) + assert result.item() == 5. * factor + + +@pytest.mark.skipif(torch.cuda.device_count() < 2, "test requires multi-GPU machine") +def test_tensor_metric_ddp(): + tutils.reset_seed() + tutils.set_random_master_port() + + dist.init_process_group('gloo') + _test_tensor_metric(True) + dist.destroy_process_group() + + +def test_tensor_metric_simple(): + _test_tensor_metric(False) + + +def _test_numpy_metric(is_ddp: bool): + @numpy_metric() + def numpy_test_metric(*args, **kwargs): + for arg in args: + assert isinstance(arg, np.ndarray) + + for v in kwargs.values(): + assert isinstance(v, np.ndarray) + + return 5. + + if is_ddp: + factor = dist.get_world_size() + else: + factor = 1. + + result = numpy_test_metric(torch.tensor([1.]), dummy_kwarg=2.) + assert isinstance(result, torch.Tensor) + assert result.item() == 5. * factor + + +@pytest.mark.skipif(torch.cuda.device_count() < 2, "test requires multi-GPU machine") +def test_numpy_metric_ddp(): + tutils.reset_seed() + tutils.set_random_master_port() + + dist.init_process_group('gloo') + _test_tensor_metric(True) + dist.destroy_process_group() + + +def test_numpy_metric_simple(): + _test_tensor_metric(False) From ba2c6f7387fe66cf89c6b2589e6669baee0ec80e Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Wed, 1 Apr 2020 16:36:58 +0200 Subject: [PATCH 08/44] redo sklearn metrics --- pytorch_lightning/metrics/sklearn.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pytorch_lightning/metrics/sklearn.py b/pytorch_lightning/metrics/sklearn.py index d084f18e810b8..c3daac3110ba5 100644 --- a/pytorch_lightning/metrics/sklearn.py +++ b/pytorch_lightning/metrics/sklearn.py @@ -384,6 +384,7 @@ class Precision(SklearnMetric): The best value is 1 and the worst value is 0. """ + def __init__(self, labels: Optional[Sequence] = None, pos_labels: Union[str, int] = 1, average: Optional[str] = 'binary', @@ -648,6 +649,7 @@ class AUROC(SklearnMetric): this implementation is restricted to the binary classification task or multilabel classification task in label indicator format. """ + def __init__(self, average: Optional[str] = 'macro', reduce_group: Any = torch.distributed.group.WORLD, reduce_op: Any = torch.distributed.ReduceOp.SUM From 6595de8a87aec43b536ba9a368201c828f31fbe7 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Mon, 13 Apr 2020 12:56:43 +0200 Subject: [PATCH 09/44] add sklearn metrics --- pytorch_lightning/metrics/sklearn.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/metrics/sklearn.py b/pytorch_lightning/metrics/sklearn.py index c3daac3110ba5..7f4832cd775e1 100644 --- a/pytorch_lightning/metrics/sklearn.py +++ b/pytorch_lightning/metrics/sklearn.py @@ -625,7 +625,6 @@ class or confidence values. y_true: Ground truth (correct) target values. sample_weight: Sample weights. - Returns: fpr: Increasing false positive rates such that element i is the false From 729690ea5ed5d39967d622283b7aaae2fa1ca0c1 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Sun, 26 Apr 2020 16:01:49 +0200 Subject: [PATCH 10/44] start adding sklearn tests --- tests/metrics/__init__.py | 205 -------------------------- tests/metrics/test_sklearn_metrics.py | 48 ++++++ 2 files changed, 48 insertions(+), 205 deletions(-) create mode 100644 tests/metrics/test_sklearn_metrics.py diff --git a/tests/metrics/__init__.py b/tests/metrics/__init__.py index a6dfcf8be94a4..e69de29bb2d1d 100644 --- a/tests/metrics/__init__.py +++ b/tests/metrics/__init__.py @@ -1,205 +0,0 @@ -import numpy as np -import pytest -import torch -import torch.distributed as dist - -import tests.base.utils as tutils -from pytorch_lightning.metrics.utils import _apply_to_inputs, _apply_to_outputs, \ - _convert_to_tensor, _convert_to_numpy, _numpy_metric_conversion, \ - _tensor_metric_conversion, _sync_ddp, tensor_metric, numpy_metric - - -def test_apply_to_inputs(): - def apply_fn(inputs, factor): - if isinstance(inputs, (float, int)): - return inputs * factor - elif isinstance(inputs, dict): - return {k: apply_fn(v, factor) for k, v in inputs.items()} - elif isinstance(inputs, (tuple, list)): - return [apply_fn(x, factor) for x in inputs] - - @_apply_to_inputs(apply_fn, factor=2.) - def test_fn(*args, **kwargs): - return args, kwargs - - for args in [[], [1., 2.]]: - for kwargs in [{}, {1., 2.}]: - result_args, result_kwargs = test_fn(*args, **kwargs) - assert isinstance(result_args, list) - assert isinstance(result_kwargs, dict) - assert len(result_args) == len(args) - assert len(result_kwargs) == len(kwargs) - assert all([k in result_kwargs for k in kwargs.keys()]) - for arg, result_arg in zip(args, result_args): - assert arg * 2. == result_arg - - for key in kwargs.keys(): - arg = kwargs[key], - result_arg = result_kwargs[key] - assert arg * 2. == result_arg - - -def test_apply_to_outputs(): - def apply_fn(inputs, additional_str): - return str(inputs) + additional_str - - @_apply_to_outputs(apply_fn, additional_str='_str') - def test_fn(*args, **kwargs): - return 'dummy' - - assert test_fn() == 'dummy_str' - - -def test_convert_to_tensor(): - for test_item in [1., np.array([1.])]: - assert isinstance(_convert_to_tensor(test_item), torch.Tensor) - assert test_item.item() == 1. - - -def test_convert_to_numpy(): - for test_item in [1., torch.tensor([1.])]: - result = _convert_to_numpy(test_item) - assert isinstance(result, np.ndarray) - assert result.item() == 1. - - -def test_numpy_metric_conversion(): - @_numpy_metric_conversion - def numpy_test_metric(*args, **kwargs): - for arg in args: - assert isinstance(arg, np.ndarray) - - for v in kwargs.values(): - assert isinstance(v, np.ndarray) - - return 5. - - result = numpy_test_metric(torch.tensor([1.]), dummy_kwarg=2.) - assert isinstance(result, torch.Tensor) - assert result.item() == 5. - - -def test_tensor_metric_conversion(): - @_tensor_metric_conversion - def tensor_test_metric(*args, **kwargs): - for arg in args: - assert isinstance(arg, torch.Tensor) - - for v in kwargs.values(): - assert isinstance(v, torch.Tensor) - - return 5. - - result = tensor_test_metric(np.array([1.]), dummy_kwarg=2.) - assert isinstance(result, torch.Tensor) - assert result.item() == 5. - - -@pytest.mark.skipif(torch.cuda.device_count() < 2, "test requires multi-GPU machine") -def test_sync_reduce_ddp(): - """Make sure sync-reduce works with DDP""" - tutils.reset_seed() - tutils.set_random_master_port() - - dist.init_process_group('gloo') - - tensor = torch.tensor([1.], device='cuda:0') - - reduced_tensor = _sync_ddp(tensor) - - assert reduced_tensor.item() == dist.get_world_size(), \ - 'Sync-Reduce does not work properly with DDP and Tensors' - - number = 1. - reduced_number = _sync_ddp(number) - assert isinstance(reduced_number, torch.Tensor), 'When reducing a number we should get a tensor out' - assert reduced_number.item() == dist.get_world_size(), \ - 'Sync-Reduce does not work properly with DDP and Numbers' - - dist.destroy_process_group() - - -def test_sync_reduce_simple(): - """Make sure sync-reduce works without DDP""" - tensor = torch.tensor([1.], device='cpu') - - reduced_tensor = _sync_ddp(tensor) - - assert torch.allclose(tensor, - reduced_tensor), 'Sync-Reduce does not work properly without DDP and Tensors' - - number = 1. - - reduced_number = _sync_ddp(number) - assert isinstance(reduced_number, torch.Tensor), 'When reducing a number we should get a tensor out' - assert reduced_number.item() == number, 'Sync-Reduce does not work properly without DDP and Numbers' - - -def _test_tensor_metric(is_ddp: bool): - @tensor_metric() - def tensor_test_metric(*args, **kwargs): - for arg in args: - assert isinstance(arg, torch.Tensor) - - for v in kwargs.values(): - assert isinstance(v, torch.Tensor) - - return 5. - - if is_ddp: - factor = dist.get_world_size() - else: - factor = 1. - - result = tensor_test_metric(np.array([1.]), dummy_kwarg=2.) - assert isinstance(result, torch.Tensor) - assert result.item() == 5. * factor - - -@pytest.mark.skipif(torch.cuda.device_count() < 2, "test requires multi-GPU machine") -def test_tensor_metric_ddp(): - tutils.reset_seed() - tutils.set_random_master_port() - - dist.init_process_group('gloo') - _test_tensor_metric(True) - dist.destroy_process_group() - - -def test_tensor_metric_simple(): - _test_tensor_metric(False) - - -def _test_numpy_metric(is_ddp: bool): - @numpy_metric() - def numpy_test_metric(*args, **kwargs): - for arg in args: - assert isinstance(arg, np.ndarray) - - for v in kwargs.values(): - assert isinstance(v, np.ndarray) - - return 5. - - if is_ddp: - factor = dist.get_world_size() - else: - factor = 1. - - result = numpy_test_metric(torch.tensor([1.]), dummy_kwarg=2.) - assert isinstance(result, torch.Tensor) - assert result.item() == 5. * factor - - -@pytest.mark.skipif(torch.cuda.device_count() < 2, "test requires multi-GPU machine") -def test_numpy_metric_ddp(): - tutils.reset_seed() - tutils.set_random_master_port() - - dist.init_process_group('gloo') - _test_tensor_metric(True) - dist.destroy_process_group() - - -def test_numpy_metric_simple(): - _test_tensor_metric(False) diff --git a/tests/metrics/test_sklearn_metrics.py b/tests/metrics/test_sklearn_metrics.py new file mode 100644 index 0000000000000..bc7c7b07cb540 --- /dev/null +++ b/tests/metrics/test_sklearn_metrics.py @@ -0,0 +1,48 @@ +import numbers +from collections import Mapping, Sequence + +import numpy as np +import pytest +import torch +from sklearn.metrics import accuracy_score, average_precision_score, auc + +from pytorch_lightning.metrics.converters import _convert_to_numpy +from pytorch_lightning.metrics.sklearn import Accuracy, AveragePrecision, AUC +from pytorch_lightning.utilities.apply_func import apply_to_collection + + +@pytest.mark.parametrize(['metric_class', 'sklearn_func', 'inputs'], [ + pytest.param(Accuracy(), accuracy_score, + {'y_pred': torch.randint(low=0, high=10, size=(10,)), + 'y_true': torch.randint(low=0, high=10, size=(10,))}), + pytest.param(AUC(), auc, {'x': torch.arange(10, dtype=torch.float)/10, + 'y': torch.tensor([0.2, 0.2, 0.2, 0.2, 0.2, + 0.2, 0.3, 0.5, 0.6, 0.7])}) +]) +def test_sklearn_metric(metric_class, sklearn_func, inputs: dict): + numpy_inputs = apply_to_collection( + inputs, (torch.Tensor, np.ndarray, numbers.Number), _convert_to_numpy) + + sklearn_result = sklearn_func(**numpy_inputs) + lightning_result = metric_class(**inputs) + + sklearn_result = apply_to_collection( + sklearn_result, (torch.Tensor, np.ndarray, numbers.Number), _convert_to_numpy) + + lightning_result = apply_to_collection( + lightning_result, (torch.Tensor, np.ndarray, numbers.Number), _convert_to_numpy) + + assert isinstance(lightning_result, type(sklearn_result)) + + if isinstance(lightning_result, np.ndarray): + assert np.allclose(lightning_result, sklearn_result) + elif isinstance(lightning_result, Mapping): + for key in lightning_result.keys(): + assert np.allclose(lightning_result[key], sklearn_result[key]) + + elif isinstance(lightning_result, Sequence): + for val_lightning, val_sklearn in zip(lightning_result, sklearn_result): + assert np.allclose(val_lightning, val_sklearn) + + else: + raise TypeError From 429dab6896ef536f74862f07674c1202654bf954 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Mon, 27 Apr 2020 08:38:58 +0200 Subject: [PATCH 11/44] fix typo --- pytorch_lightning/metrics/sklearn.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/metrics/sklearn.py b/pytorch_lightning/metrics/sklearn.py index 7f4832cd775e1..8fb7c5aa69223 100644 --- a/pytorch_lightning/metrics/sklearn.py +++ b/pytorch_lightning/metrics/sklearn.py @@ -233,7 +233,7 @@ class F1(SklearnMetric): """ def __init__(self, labels: Optional[Sequence] = None, - pos_labels: Union[str, int] = 1, + pos_label: Union[str, int] = 1, average: Optional[str] = 'binary', reduce_group: Any = torch.distributed.group.WORLD, reduce_op: Any = torch.distributed.ReduceOp.SUM): @@ -241,7 +241,7 @@ def __init__(self, labels: Optional[Sequence] = None, Args: labels: Integer array of labels. - pos_labels: The class to report if ``average='binary'``. + pos_label: The class to report if ``average='binary'``. average: This parameter is required for multiclass/multilabel targets. If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: @@ -275,7 +275,7 @@ def __init__(self, labels: Optional[Sequence] = None, reduce_group=reduce_group, reduce_op=reduce_op, labels=labels, - pos_labels=pos_labels, + pos_label=pos_label, average=average) def forward(self, y_pred: np.ndarray, y_true: np.ndarray, From 387b9b29c751493de6a03f5898980ba8b2d5565a Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Mon, 27 Apr 2020 08:39:24 +0200 Subject: [PATCH 12/44] fix typo --- pytorch_lightning/metrics/sklearn.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/metrics/sklearn.py b/pytorch_lightning/metrics/sklearn.py index 8fb7c5aa69223..855fd5aeaf67c 100644 --- a/pytorch_lightning/metrics/sklearn.py +++ b/pytorch_lightning/metrics/sklearn.py @@ -310,7 +310,7 @@ class FBeta(SklearnMetric): """ def __init__(self, beta: float, labels: Optional[Sequence] = None, - pos_labels: Union[str, int] = 1, + pos_label: Union[str, int] = 1, average: Optional[str] = 'binary', reduce_group: Any = torch.distributed.group.WORLD, reduce_op: Any = torch.distributed.ReduceOp.SUM): @@ -319,7 +319,7 @@ def __init__(self, beta: float, labels: Optional[Sequence] = None, Args: beta: Weight of precision in harmonic mean. labels: Integer array of labels. - pos_labels: The class to report if ``average='binary'``. + pos_label: The class to report if ``average='binary'``. average: This parameter is required for multiclass/multilabel targets. If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: @@ -354,7 +354,7 @@ def __init__(self, beta: float, labels: Optional[Sequence] = None, reduce_op=reduce_op, beta=beta, labels=labels, - pos_labels=pos_labels, + pos_label=pos_label, average=average) def forward(self, y_pred: np.ndarray, y_true: np.ndarray, From 74ab62bfbae4be2dfc01018fcd55878c19b99c63 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Mon, 27 Apr 2020 08:42:29 +0200 Subject: [PATCH 13/44] fix typo --- pytorch_lightning/metrics/sklearn.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/metrics/sklearn.py b/pytorch_lightning/metrics/sklearn.py index 855fd5aeaf67c..5d605ae05b484 100644 --- a/pytorch_lightning/metrics/sklearn.py +++ b/pytorch_lightning/metrics/sklearn.py @@ -386,7 +386,7 @@ class Precision(SklearnMetric): """ def __init__(self, labels: Optional[Sequence] = None, - pos_labels: Union[str, int] = 1, + pos_label: Union[str, int] = 1, average: Optional[str] = 'binary', reduce_group: Any = torch.distributed.group.WORLD, reduce_op: Any = torch.distributed.ReduceOp.SUM): @@ -394,7 +394,7 @@ def __init__(self, labels: Optional[Sequence] = None, Args: labels: Integer array of labels. - pos_labels: The class to report if ``average='binary'``. + pos_label: The class to report if ``average='binary'``. average: This parameter is required for multiclass/multilabel targets. If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: @@ -428,7 +428,7 @@ def __init__(self, labels: Optional[Sequence] = None, reduce_group=reduce_group, reduce_op=reduce_op, labels=labels, - pos_labels=pos_labels, + pos_label=pos_label, average=average) def forward(self, y_pred: np.ndarray, y_true: np.ndarray, From 8e5f1d6b20436768abbae9c640f2c243fccba1db Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Mon, 27 Apr 2020 08:42:48 +0200 Subject: [PATCH 14/44] fix typo --- pytorch_lightning/metrics/sklearn.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/metrics/sklearn.py b/pytorch_lightning/metrics/sklearn.py index 5d605ae05b484..66303b6b4fdee 100644 --- a/pytorch_lightning/metrics/sklearn.py +++ b/pytorch_lightning/metrics/sklearn.py @@ -459,7 +459,7 @@ class Recall(SklearnMetric): """ def __init__(self, labels: Optional[Sequence] = None, - pos_labels: Union[str, int] = 1, + pos_label: Union[str, int] = 1, average: Optional[str] = 'binary', reduce_group: Any = torch.distributed.group.WORLD, reduce_op: Any = torch.distributed.ReduceOp.SUM): @@ -467,7 +467,7 @@ def __init__(self, labels: Optional[Sequence] = None, Args: labels: Integer array of labels. - pos_labels: The class to report if ``average='binary'``. + pos_label: The class to report if ``average='binary'``. average: This parameter is required for multiclass/multilabel targets. If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: @@ -501,7 +501,7 @@ def __init__(self, labels: Optional[Sequence] = None, reduce_group=reduce_group, reduce_op=reduce_op, labels=labels, - pos_labels=pos_labels, + pos_label=pos_label, average=average) def forward(self, y_pred: np.ndarray, y_true: np.ndarray, From 50822687669bce4960c77cd49857679d18ff31b6 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Mon, 27 Apr 2020 09:01:36 +0200 Subject: [PATCH 15/44] return x and y only for curves --- pytorch_lightning/metrics/sklearn.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/pytorch_lightning/metrics/sklearn.py b/pytorch_lightning/metrics/sklearn.py index 66303b6b4fdee..22e380bea2a7e 100644 --- a/pytorch_lightning/metrics/sklearn.py +++ b/pytorch_lightning/metrics/sklearn.py @@ -542,13 +542,13 @@ class PrecisionRecallCurve(SklearnMetric): """ def __init__(self, - pos_labels: Union[str, int] = 1, + pos_label: Union[str, int] = 1, reduce_group: Any = torch.distributed.group.WORLD, reduce_op: Any = torch.distributed.ReduceOp.SUM): """ Args: - pos_labels: The class to report if ``average='binary'``. + pos_label: The class to report if ``average='binary'``. reduce_group: the process group for DDP reduces (only needed for DDP training). Defaults to all processes (world) reduce_op: the operation to perform during reduction within DDP (only needed for DDP training). @@ -557,7 +557,7 @@ def __init__(self, super().__init__('precision_recall_curve', reduce_group=reduce_group, reduce_op=reduce_op, - pos_labels=pos_labels) + pos_label=pos_label) def forward(self, probas_pred: np.ndarray, y_true: np.ndarray, sample_weight: Optional[np.ndarray] = None) -> Union[np.ndarray, float]: @@ -581,7 +581,10 @@ def forward(self, probas_pred: np.ndarray, y_true: np.ndarray, precision and recall. """ - return super().forward(probas_pred=probas_pred, y_true=y_true, sample_weight=sample_weight) + # only return x and y here, since for now we cannot auto-convert elements of multiple length. + # Will be fixed in native implementation + return np.array( + super().forward(probas_pred=probas_pred, y_true=y_true, sample_weight=sample_weight)[:2]) class ROC(SklearnMetric): @@ -638,7 +641,7 @@ class or confidence values. and is arbitrarily set to `max(y_score) + 1`. """ - return super().forward(y_score=y_score, y_true=y_true, sample_weight=sample_weight) + return np.array(super().forward(y_score=y_score, y_true=y_true, sample_weight=sample_weight)[:2]) class AUROC(SklearnMetric): From 10cde374bd65aa3de0e36e36a8f6f2ef72022924 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Mon, 27 Apr 2020 09:01:51 +0200 Subject: [PATCH 16/44] fix typo --- pytorch_lightning/metrics/sklearn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/metrics/sklearn.py b/pytorch_lightning/metrics/sklearn.py index 22e380bea2a7e..134f84dba2581 100644 --- a/pytorch_lightning/metrics/sklearn.py +++ b/pytorch_lightning/metrics/sklearn.py @@ -597,7 +597,7 @@ class ROC(SklearnMetric): """ def __init__(self, - pos_labels: Union[str, int] = 1, + pos_label: Union[str, int] = 1, reduce_group: Any = torch.distributed.group.WORLD, reduce_op: Any = torch.distributed.ReduceOp.SUM): """ @@ -616,7 +616,7 @@ def __init__(self, super().__init__('roc_curve', reduce_group=reduce_group, reduce_op=reduce_op, - pos_labels=pos_labels) + pos_label=pos_label) def forward(self, y_score: np.ndarray, y_true: np.ndarray, sample_weight: Optional[np.ndarray] = None) -> Union[np.ndarray, float]: From 1a1762d888f655e09794919091e332e8151be21e Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Mon, 27 Apr 2020 09:03:15 +0200 Subject: [PATCH 17/44] add missing tests for sklearn funcs --- tests/metrics/test_sklearn_metrics.py | 52 +++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 6 deletions(-) diff --git a/tests/metrics/test_sklearn_metrics.py b/tests/metrics/test_sklearn_metrics.py index bc7c7b07cb540..5009887cdfecc 100644 --- a/tests/metrics/test_sklearn_metrics.py +++ b/tests/metrics/test_sklearn_metrics.py @@ -1,23 +1,63 @@ import numbers from collections import Mapping, Sequence +from functools import partial import numpy as np import pytest import torch -from sklearn.metrics import accuracy_score, average_precision_score, auc +from sklearn.metrics import accuracy_score, average_precision_score, auc, confusion_matrix, f1_score, \ + fbeta_score, precision_score, recall_score, precision_recall_curve, roc_curve, roc_auc_score from pytorch_lightning.metrics.converters import _convert_to_numpy -from pytorch_lightning.metrics.sklearn import Accuracy, AveragePrecision, AUC +from pytorch_lightning.metrics.sklearn import Accuracy, AveragePrecision, AUC, ConfusionMatrix, F1, FBeta, \ + Precision, Recall, PrecisionRecallCurve, ROC, AUROC from pytorch_lightning.utilities.apply_func import apply_to_collection +def xy_only(func): + def new_func(*args, **kwargs): + return np.array(func(*args, **kwargs)[:2]) + + return new_func + + @pytest.mark.parametrize(['metric_class', 'sklearn_func', 'inputs'], [ pytest.param(Accuracy(), accuracy_score, - {'y_pred': torch.randint(low=0, high=10, size=(10,)), - 'y_true': torch.randint(low=0, high=10, size=(10,))}), - pytest.param(AUC(), auc, {'x': torch.arange(10, dtype=torch.float)/10, + {'y_pred': torch.randint(low=0, high=10, size=(128,)), + 'y_true': torch.randint(low=0, high=10, size=(128,))}), + pytest.param(AUC(), auc, {'x': torch.arange(10, dtype=torch.float) / 10, 'y': torch.tensor([0.2, 0.2, 0.2, 0.2, 0.2, - 0.2, 0.3, 0.5, 0.6, 0.7])}) + 0.2, 0.3, 0.5, 0.6, 0.7])}), + pytest.param(AveragePrecision(), average_precision_score, + {'y_score': torch.randint(2, size=(128,)), + 'y_true': torch.randint(2, size=(128,))}), + pytest.param(ConfusionMatrix(), confusion_matrix, + {'y_pred': torch.randint(10, size=(128,)), + 'y_true': torch.randint(10, size=(128,))}), + pytest.param(F1(average='macro'), partial(f1_score, average='macro'), + {'y_pred': torch.randint(10, size=(128,)), + 'y_true': torch.randint(10, size=(128,))}), + pytest.param(FBeta(beta=0.5, average='macro'), partial(fbeta_score, + beta=0.5, + average='macro'), + {'y_pred': torch.randint(10, size=(128,)), + 'y_true': torch.randint(10, size=(128,))}), + pytest.param(Precision(average='macro'), partial(precision_score, + average='macro'), + {'y_pred': torch.randint(10, size=(128,)), + 'y_true': torch.randint(10, size=(128,))}), + pytest.param(Recall(average='macro'), partial(recall_score, average='macro'), + {'y_pred': torch.randint(10, size=(128,)), + 'y_true': torch.randint(10, size=(128,))}), + pytest.param(PrecisionRecallCurve(), xy_only(precision_recall_curve), + {'probas_pred': torch.rand(size=(128,)), + 'y_true': torch.randint(2, size=(128,))}), + pytest.param(ROC(), xy_only(roc_curve), + {'y_score': torch.rand(size=(128,)), + 'y_true': torch.randint(2, size=(128,))}), + pytest.param(AUROC(), roc_auc_score, + {'y_score': torch.rand(size=(128,)), + 'y_true': torch.randint(2, size=(128,))}), ]) def test_sklearn_metric(metric_class, sklearn_func, inputs: dict): numpy_inputs = apply_to_collection( From a698282e7e8fc806fbfd611ac737558307240794 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Mon, 27 Apr 2020 09:09:20 +0200 Subject: [PATCH 18/44] imports --- tests/metrics/test_sklearn_metrics.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/metrics/test_sklearn_metrics.py b/tests/metrics/test_sklearn_metrics.py index 5009887cdfecc..fc2f04dcdf479 100644 --- a/tests/metrics/test_sklearn_metrics.py +++ b/tests/metrics/test_sklearn_metrics.py @@ -5,12 +5,13 @@ import numpy as np import pytest import torch -from sklearn.metrics import accuracy_score, average_precision_score, auc, confusion_matrix, f1_score, \ - fbeta_score, precision_score, recall_score, precision_recall_curve, roc_curve, roc_auc_score +from sklearn.metrics import (accuracy_score, average_precision_score, auc, confusion_matrix, f1_score, + fbeta_score, precision_score, recall_score, precision_recall_curve, roc_curve, + roc_auc_score) from pytorch_lightning.metrics.converters import _convert_to_numpy -from pytorch_lightning.metrics.sklearn import Accuracy, AveragePrecision, AUC, ConfusionMatrix, F1, FBeta, \ - Precision, Recall, PrecisionRecallCurve, ROC, AUROC +from pytorch_lightning.metrics.sklearn import (Accuracy, AveragePrecision, AUC, ConfusionMatrix, F1, FBeta, + Precision, Recall, PrecisionRecallCurve, ROC, AUROC) from pytorch_lightning.utilities.apply_func import apply_to_collection From debd245745310f694b489c7e92d80fdd6af55323 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Mon, 27 Apr 2020 09:09:37 +0200 Subject: [PATCH 19/44] __all__ --- pytorch_lightning/metrics/sklearn.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pytorch_lightning/metrics/sklearn.py b/pytorch_lightning/metrics/sklearn.py index 134f84dba2581..8eb858021f045 100644 --- a/pytorch_lightning/metrics/sklearn.py +++ b/pytorch_lightning/metrics/sklearn.py @@ -6,6 +6,9 @@ from pytorch_lightning import _logger as lightning_logger from pytorch_lightning.metrics.metric import NumpyMetric +__all__ = ['SklearnMetric', 'Accuracy', 'AveragePrecision', 'AUC', 'ConfusionMatrix', 'F1', 'FBeta', + 'Precision', 'Recall', 'PrecisionRecallCurve', 'ROC', 'AUROC'] + class SklearnMetric(NumpyMetric): def __init__(self, metric_name: str, From 04adf4b6875a29667af1c6b9f5bdbab62d44d7ef Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Mon, 27 Apr 2020 09:11:19 +0200 Subject: [PATCH 20/44] imports --- pytorch_lightning/metrics/__init__.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pytorch_lightning/metrics/__init__.py b/pytorch_lightning/metrics/__init__.py index cd721851307df..cdc7f3f4d90fc 100644 --- a/pytorch_lightning/metrics/__init__.py +++ b/pytorch_lightning/metrics/__init__.py @@ -22,3 +22,9 @@ """ + +from pytorch_lightning.metrics.metric import Metric, TensorMetric, NumpyMetric +from pytorch_lightning.metrics.sklearn import (SklearnMetric, Accuracy, AveragePrecision, AUC, + ConfusionMatrix, F1, FBeta, + Precision, Recall, PrecisionRecallCurve, ROC, AUROC) +from pytorch_lightning.metrics.converters import numpy_metric, tensor_metric From c72bda94a5e397774ebe4ccac35c4c2fe24fd137 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Mon, 25 May 2020 08:54:01 +0200 Subject: [PATCH 21/44] fix sklearn arguments --- pytorch_lightning/metrics/sklearn.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pytorch_lightning/metrics/sklearn.py b/pytorch_lightning/metrics/sklearn.py index 8eb858021f045..1fbb1bc124761 100644 --- a/pytorch_lightning/metrics/sklearn.py +++ b/pytorch_lightning/metrics/sklearn.py @@ -99,7 +99,7 @@ def forward(self, y_pred: np.ndarray, y_true: np.ndarray, class AUC(SklearnMetric): - def __init__(self, reorder: bool = False, + def __init__(self, reduce_group: Any = torch.distributed.group.WORLD, reduce_op: Any = torch.distributed.ReduceOp.SUM ): @@ -110,8 +110,6 @@ def __init__(self, reorder: bool = False, Every metric call will cause a GPU synchronization, which may slow down your code Args: - reorder: If ``True``, assume that the curve is ascending in the case of ties, as for an ROC curve. - If the curve is non-ascending, the result will be wrong. reduce_group: the process group for DDP reduces (only needed for DDP training). Defaults to all processes (world) reduce_op: the operation to perform during reduction within DDP (only needed for DDP training). @@ -121,7 +119,7 @@ def __init__(self, reorder: bool = False, super().__init__(metric_name='auc', reduce_group=reduce_group, reduce_op=reduce_op, - reorder=reorder) + ) def forward(self, x: np.ndarray, y: np.ndarray) -> float: """ From ca471d663d833289446cba477c822c7899250481 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Mon, 25 May 2020 08:54:09 +0200 Subject: [PATCH 22/44] fix imports --- pytorch_lightning/metrics/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/metrics/utils.py b/pytorch_lightning/metrics/utils.py index 9942545273546..e284b9494d8b8 100644 --- a/pytorch_lightning/metrics/utils.py +++ b/pytorch_lightning/metrics/utils.py @@ -5,7 +5,7 @@ import torch from torch.utils.data._utils.collate import default_convert -from pytorch_lightning.utilities.apply_to_collection import apply_to_collection +from pytorch_lightning.utilities.apply_func import apply_to_collection def _apply_to_inputs(func_to_apply, *dec_args, **dec_kwargs): From e9f5faf8e56fc398b52aaf3df4df0958b23346e6 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Mon, 25 May 2020 09:12:07 +0200 Subject: [PATCH 23/44] update requirements --- environment.yml | 4 +++- requirements-extra.txt | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/environment.yml b/environment.yml index cad6c002d1a4d..98f5fb81e1cdd 100644 --- a/environment.yml +++ b/environment.yml @@ -27,7 +27,9 @@ dependencies: - check-manifest - twine==1.13.0 - pillow<7.0.0 - - scikit-learn>=0.16.1 + - scipy>=0.13.3 + - scikit-learn>=0.20.0 + - pip: - test-tube>=0.7.5 diff --git a/requirements-extra.txt b/requirements-extra.txt index cdc10043bc858..1ce0aa550212f 100644 --- a/requirements-extra.txt +++ b/requirements-extra.txt @@ -10,4 +10,5 @@ matplotlib>=3.1.1 # no need to install with [pytorch] as pytorch is already installed and torchvision is required only for Horovod examples horovod>=0.19.1 omegaconf>=2.0.0 -scikit-learn>=0.16.1 +scipy>=0.13.3 +scikit-learn>=0.20.0 From 13f205a19a3c75a8ccae8e13af78ae6cfe32ad6f Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Mon, 25 May 2020 18:24:03 +0200 Subject: [PATCH 24/44] Update CHANGELOG.md --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 80070aeb3dde6..b615abc47a4d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,6 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - ## [unreleased] - YYYY-MM-DD ### Added From b5dbdb8302857ec0034ab9607447bfe73aadabf5 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Mon, 25 May 2020 18:28:30 +0200 Subject: [PATCH 25/44] Update test_sklearn_metrics.py --- tests/metrics/test_sklearn_metrics.py | 29 ++++++++++++--------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/tests/metrics/test_sklearn_metrics.py b/tests/metrics/test_sklearn_metrics.py index fc2f04dcdf479..e075330d60a3c 100644 --- a/tests/metrics/test_sklearn_metrics.py +++ b/tests/metrics/test_sklearn_metrics.py @@ -25,40 +25,37 @@ def new_func(*args, **kwargs): @pytest.mark.parametrize(['metric_class', 'sklearn_func', 'inputs'], [ pytest.param(Accuracy(), accuracy_score, {'y_pred': torch.randint(low=0, high=10, size=(128,)), - 'y_true': torch.randint(low=0, high=10, size=(128,))}), + 'y_true': torch.randint(low=0, high=10, size=(128,))}, id='Accuracy'), pytest.param(AUC(), auc, {'x': torch.arange(10, dtype=torch.float) / 10, 'y': torch.tensor([0.2, 0.2, 0.2, 0.2, 0.2, - 0.2, 0.3, 0.5, 0.6, 0.7])}), + 0.2, 0.3, 0.5, 0.6, 0.7])}, id='AUC'), pytest.param(AveragePrecision(), average_precision_score, {'y_score': torch.randint(2, size=(128,)), - 'y_true': torch.randint(2, size=(128,))}), + 'y_true': torch.randint(2, size=(128,))}, id='AveragePrecision'), pytest.param(ConfusionMatrix(), confusion_matrix, {'y_pred': torch.randint(10, size=(128,)), - 'y_true': torch.randint(10, size=(128,))}), + 'y_true': torch.randint(10, size=(128,))}, id='ConfusionMatrix'), pytest.param(F1(average='macro'), partial(f1_score, average='macro'), {'y_pred': torch.randint(10, size=(128,)), - 'y_true': torch.randint(10, size=(128,))}), - pytest.param(FBeta(beta=0.5, average='macro'), partial(fbeta_score, - beta=0.5, - average='macro'), + 'y_true': torch.randint(10, size=(128,))}, id='F1'), + pytest.param(FBeta(beta=0.5, average='macro'), partial(fbeta_score, beta=0.5, average='macro'), {'y_pred': torch.randint(10, size=(128,)), - 'y_true': torch.randint(10, size=(128,))}), - pytest.param(Precision(average='macro'), partial(precision_score, - average='macro'), + 'y_true': torch.randint(10, size=(128,))}, id='FBeta'), + pytest.param(Precision(average='macro'), partial(precision_score, average='macro'), {'y_pred': torch.randint(10, size=(128,)), - 'y_true': torch.randint(10, size=(128,))}), + 'y_true': torch.randint(10, size=(128,))}, id='Precision'), pytest.param(Recall(average='macro'), partial(recall_score, average='macro'), {'y_pred': torch.randint(10, size=(128,)), - 'y_true': torch.randint(10, size=(128,))}), + 'y_true': torch.randint(10, size=(128,))}, id='Recall'), pytest.param(PrecisionRecallCurve(), xy_only(precision_recall_curve), {'probas_pred': torch.rand(size=(128,)), - 'y_true': torch.randint(2, size=(128,))}), + 'y_true': torch.randint(2, size=(128,))}, id='PrecisionRecallCurve'), pytest.param(ROC(), xy_only(roc_curve), {'y_score': torch.rand(size=(128,)), - 'y_true': torch.randint(2, size=(128,))}), + 'y_true': torch.randint(2, size=(128,))}, id='ROC'), pytest.param(AUROC(), roc_auc_score, {'y_score': torch.rand(size=(128,)), - 'y_true': torch.randint(2, size=(128,))}), + 'y_true': torch.randint(2, size=(128,))}, id='AUROC'), ]) def test_sklearn_metric(metric_class, sklearn_func, inputs: dict): numpy_inputs = apply_to_collection( From a7e3e4f035f186ceceee80d714e3d2f12fc2fea5 Mon Sep 17 00:00:00 2001 From: Jirka Date: Mon, 25 May 2020 21:40:21 +0200 Subject: [PATCH 26/44] formatting --- pytorch_lightning/metrics/__init__.py | 6 ++--- pytorch_lightning/metrics/sklearn.py | 34 ++++++++++++++++++--------- 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/pytorch_lightning/metrics/__init__.py b/pytorch_lightning/metrics/__init__.py index cdc7f3f4d90fc..83446d11701f9 100644 --- a/pytorch_lightning/metrics/__init__.py +++ b/pytorch_lightning/metrics/__init__.py @@ -24,7 +24,7 @@ """ from pytorch_lightning.metrics.metric import Metric, TensorMetric, NumpyMetric -from pytorch_lightning.metrics.sklearn import (SklearnMetric, Accuracy, AveragePrecision, AUC, - ConfusionMatrix, F1, FBeta, - Precision, Recall, PrecisionRecallCurve, ROC, AUROC) +from pytorch_lightning.metrics.sklearn import ( + SklearnMetric, Accuracy, AveragePrecision, AUC, ConfusionMatrix, F1, FBeta, + Precision, Recall, PrecisionRecallCurve, ROC, AUROC) from pytorch_lightning.metrics.converters import numpy_metric, tensor_metric diff --git a/pytorch_lightning/metrics/sklearn.py b/pytorch_lightning/metrics/sklearn.py index 1fbb1bc124761..20b14980f942e 100644 --- a/pytorch_lightning/metrics/sklearn.py +++ b/pytorch_lightning/metrics/sklearn.py @@ -6,8 +6,20 @@ from pytorch_lightning import _logger as lightning_logger from pytorch_lightning.metrics.metric import NumpyMetric -__all__ = ['SklearnMetric', 'Accuracy', 'AveragePrecision', 'AUC', 'ConfusionMatrix', 'F1', 'FBeta', - 'Precision', 'Recall', 'PrecisionRecallCurve', 'ROC', 'AUROC'] +__all__ = [ + 'SklearnMetric', + 'Accuracy', + 'AveragePrecision', + 'AUC', + 'ConfusionMatrix', + 'F1', + 'FBeta', + 'Precision', + 'Recall', + 'PrecisionRecallCurve', + 'ROC', + 'AUROC' +] class SklearnMetric(NumpyMetric): @@ -143,15 +155,15 @@ def __init__(self, average: Optional[str] = 'macro', """ Calculates the average precision (AP) score. Args: - average: If None, the scores for each class are returned. Otherwise, this determines the type of - averaging performed on the data: - * If 'micro': Calculate metrics globally by considering each element of the label indicator - matrix as a label. - * If 'macro': Calculate metrics for each label, and find their unweighted mean. - This does not take label imbalance into account. - * If 'weighted': Calculate metrics for each label, and find their average, weighted by - support (the number of true instances for each label). - * If 'samples': Calculate metrics for each instance, and find their average. + average: If None, the scores for each class are returned. + Otherwise, this determines the type of averaging performed on the data: + * If 'micro': Calculate metrics globally by considering each element + of the label indicator matrix as a label. + * If 'macro': Calculate metrics for each label, and find their unweighted mean. + This does not take label imbalance into account. + * If 'weighted': Calculate metrics for each label, and find their average, + weighted by support (the number of true instances for each label). + * If 'samples': Calculate metrics for each instance, and find their average. reduce_group: the process group for DDP reduces (only needed for DDP training). Defaults to all processes (world) reduce_op: the operation to perform during reduction within DDP (only needed for DDP training). From cc9b1b315e0fbe33719bc7f08e6edd5abae16b84 Mon Sep 17 00:00:00 2001 From: Jirka Date: Mon, 25 May 2020 21:49:56 +0200 Subject: [PATCH 27/44] formatting --- pytorch_lightning/metrics/sklearn.py | 108 +++++++++++---------------- 1 file changed, 44 insertions(+), 64 deletions(-) diff --git a/pytorch_lightning/metrics/sklearn.py b/pytorch_lightning/metrics/sklearn.py index 20b14980f942e..af8cd3c02373b 100644 --- a/pytorch_lightning/metrics/sklearn.py +++ b/pytorch_lightning/metrics/sklearn.py @@ -23,18 +23,19 @@ class SklearnMetric(NumpyMetric): + """ + Bridge between PyTorch Lightning and scikit-learn metrics + + .. warning:: + Every metric call will cause a GPU synchronization, which may slow down your code + + .. note:: + The order of targets and predictions may be different from the order typically used in PyTorch + """ def __init__(self, metric_name: str, reduce_group: Any = torch.distributed.group.WORLD, reduce_op: Any = torch.distributed.ReduceOp.SUM, **kwargs): """ - Bridge between PyTorch Lightning and scikit-learn metrics - - .. warning:: - Every metric call will cause a GPU synchronization, which may slow down your code - - .. note:: - The order of targets and predictions may be different from the order typically used in PyTorch - Args: metric_name: the metric name to import anc compute from scikit-learn.metrics reduce_group: the process group for DDP reduces (only needed for DDP training). @@ -57,8 +58,8 @@ def metric_fn(self): return getattr(sklearn.metrics, self.name) def forward(self, *args, **kwargs) -> Union[np.ndarray, int, float]: - """ - Carries the actual metric computation + """ Carries the actual metric computation + Args: *args: Positional arguments forwarded to metric call (should be already converted to numpy) **kwargs: keyword arguments forwarded to metric call (should be already converted to numpy) @@ -71,15 +72,16 @@ def forward(self, *args, **kwargs) -> Union[np.ndarray, int, float]: class Accuracy(SklearnMetric): + """ + Calculates the Accuracy Score + + .. warning:: + Every metric call will cause a GPU synchronization, which may slow down your code + """ def __init__(self, normalize: bool = True, reduce_group: Any = torch.distributed.group.WORLD, reduce_op: Any = torch.distributed.ReduceOp.SUM): """ - Calculates the Accuracy Score - - .. warning:: - Every metric call will cause a GPU synchronization, which may slow down your code - Args: normalize: If ``False``, return the number of correctly classified samples. Otherwise, return the fraction of correctly classified samples. @@ -95,8 +97,8 @@ def __init__(self, normalize: bool = True, def forward(self, y_pred: np.ndarray, y_true: np.ndarray, sample_weight: Optional[np.ndarray] = None) -> float: - """ - Computes the accuracy + """ Computes the accuracy + Args: y_pred: the array containing the predictions (already in categorical form) y_true: the array containing the targets (in categorical form) @@ -104,23 +106,22 @@ def forward(self, y_pred: np.ndarray, y_true: np.ndarray, Returns: Accuracy Score - - """ return super().forward(y_pred=y_pred, y_true=y_true, sample_weight=sample_weight) class AUC(SklearnMetric): + """ + Calculates the Area Under the Curve using the trapoezoidal rule + + .. warning:: + Every metric call will cause a GPU synchronization, which may slow down your code + """ def __init__(self, reduce_group: Any = torch.distributed.group.WORLD, reduce_op: Any = torch.distributed.ReduceOp.SUM ): """ - Calculates the Area Under the Curve using the trapoezoidal rule - - .. warning:: - Every metric call will cause a GPU synchronization, which may slow down your code - Args: reduce_group: the process group for DDP reduces (only needed for DDP training). Defaults to all processes (world) @@ -134,8 +135,8 @@ def __init__(self, ) def forward(self, x: np.ndarray, y: np.ndarray) -> float: - """ - Computes the AUC + """ Computes the AUC + Args: x: x coordinates. y: y coordinates. @@ -148,12 +149,14 @@ def forward(self, x: np.ndarray, y: np.ndarray) -> float: class AveragePrecision(SklearnMetric): + """ + Calculates the average precision (AP) score. + """ def __init__(self, average: Optional[str] = 'macro', reduce_group: Any = torch.distributed.group.WORLD, reduce_op: Any = torch.distributed.ReduceOp.SUM ): """ - Calculates the average precision (AP) score. Args: average: If None, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: @@ -177,7 +180,6 @@ def __init__(self, average: Optional[str] = 'macro', def forward(self, y_score: np.ndarray, y_true: np.ndarray, sample_weight: Optional[np.ndarray] = None) -> float: """ - Args: y_score: Target scores, can either be probability estimates of the positive class, confidence values, or binary decisions. @@ -191,16 +193,17 @@ def forward(self, y_score: np.ndarray, y_true: np.ndarray, class ConfusionMatrix(SklearnMetric): + """ + Compute confusion matrix to evaluate the accuracy of a classification + By definition a confusion matrix :math:`C` is such that :math:`C_{i, j}` + is equal to the number of observations known to be in group :math:`i` but + predicted to be in group :math:`j`. + """ def __init__(self, labels: Optional[Sequence] = None, reduce_group: Any = torch.distributed.group.WORLD, reduce_op: Any = torch.distributed.ReduceOp.SUM ): """ - Compute confusion matrix to evaluate the accuracy of a classification - By definition a confusion matrix :math:`C` is such that :math:`C_{i, j}` - is equal to the number of observations known to be in group :math:`i` but - predicted to be in group :math:`j`. - Args: labels: List of labels to index the matrix. This may be used to reorder or select a subset of labels. @@ -218,7 +221,6 @@ def __init__(self, labels: Optional[Sequence] = None, def forward(self, y_pred: np.ndarray, y_true: np.ndarray) -> np.ndarray: """ - Args: y_pred: Estimated targets as returned by a classifier. y_true: Ground truth (correct) target values. @@ -251,7 +253,6 @@ def __init__(self, labels: Optional[Sequence] = None, reduce_group: Any = torch.distributed.group.WORLD, reduce_op: Any = torch.distributed.ReduceOp.SUM): """ - Args: labels: Integer array of labels. pos_label: The class to report if ``average='binary'``. @@ -294,13 +295,11 @@ def __init__(self, labels: Optional[Sequence] = None, def forward(self, y_pred: np.ndarray, y_true: np.ndarray, sample_weight: Optional[np.ndarray] = None) -> Union[np.ndarray, float]: """ - Args: y_pred : Estimated targets as returned by a classifier. y_true: Ground truth (correct) target values. sample_weight: Sample weights. - Returns: F1 score of the positive class in binary classification or weighted average of the F1 scores of each class for the multiclass task. @@ -315,12 +314,12 @@ class FBeta(SklearnMetric): favors recall (``beta -> 0`` considers only precision, ``beta -> inf`` only recall). - References: - .. [1] R. Baeza-Yates and B. Ribeiro-Neto (2011). - Modern Information Retrieval. Addison Wesley, pp. 327-328. - .. [2] `Wikipedia entry for the F1-score - `_ - """ + References: + .. [1] R. Baeza-Yates and B. Ribeiro-Neto (2011). + Modern Information Retrieval. Addison Wesley, pp. 327-328. + .. [2] `Wikipedia entry for the F1-score + `_ + """ def __init__(self, beta: float, labels: Optional[Sequence] = None, pos_label: Union[str, int] = 1, @@ -328,7 +327,6 @@ def __init__(self, beta: float, labels: Optional[Sequence] = None, reduce_group: Any = torch.distributed.group.WORLD, reduce_op: Any = torch.distributed.ReduceOp.SUM): """ - Args: beta: Weight of precision in harmonic mean. labels: Integer array of labels. @@ -373,13 +371,11 @@ def __init__(self, beta: float, labels: Optional[Sequence] = None, def forward(self, y_pred: np.ndarray, y_true: np.ndarray, sample_weight: Optional[np.ndarray] = None) -> Union[np.ndarray, float]: """ - Args: y_pred : Estimated targets as returned by a classifier. y_true: Ground truth (correct) target values. sample_weight: Sample weights. - Returns: FBeta score of the positive class in binary classification or weighted average of the FBeta scores of each class for the multiclass task. @@ -395,7 +391,6 @@ class Precision(SklearnMetric): intuitively the ability of the classifier not to label as positive a sample that is negative. The best value is 1 and the worst value is 0. - """ def __init__(self, labels: Optional[Sequence] = None, @@ -404,7 +399,6 @@ def __init__(self, labels: Optional[Sequence] = None, reduce_group: Any = torch.distributed.group.WORLD, reduce_op: Any = torch.distributed.ReduceOp.SUM): """ - Args: labels: Integer array of labels. pos_label: The class to report if ``average='binary'``. @@ -447,16 +441,13 @@ def __init__(self, labels: Optional[Sequence] = None, def forward(self, y_pred: np.ndarray, y_true: np.ndarray, sample_weight: Optional[np.ndarray] = None) -> Union[np.ndarray, float]: """ - Args: y_pred : Estimated targets as returned by a classifier. y_true: Ground truth (correct) target values. sample_weight: Sample weights. - Returns: Precision of the positive class in binary classification or weighted average of the precision of each class for the multiclass task. - """ return super().forward(y_pred=y_pred, y_true=y_true, sample_weight=sample_weight) @@ -468,7 +459,6 @@ class Recall(SklearnMetric): true positives and ``fn`` the number of false negatives. The recall is intuitively the ability of the classifier to find all the positive samples. The best value is 1 and the worst value is 0. - """ def __init__(self, labels: Optional[Sequence] = None, @@ -477,7 +467,6 @@ def __init__(self, labels: Optional[Sequence] = None, reduce_group: Any = torch.distributed.group.WORLD, reduce_op: Any = torch.distributed.ReduceOp.SUM): """ - Args: labels: Integer array of labels. pos_label: The class to report if ``average='binary'``. @@ -520,16 +509,13 @@ def __init__(self, labels: Optional[Sequence] = None, def forward(self, y_pred: np.ndarray, y_true: np.ndarray, sample_weight: Optional[np.ndarray] = None) -> Union[np.ndarray, float]: """ - Args: y_pred : Estimated targets as returned by a classifier. y_true: Ground truth (correct) target values. sample_weight: Sample weights. - Returns: Recall of the positive class in binary classification or weighted average of the recall of each class for the multiclass task. - """ return super().forward(y_pred=y_pred, y_true=y_true, sample_weight=sample_weight) @@ -551,7 +537,6 @@ class PrecisionRecallCurve(SklearnMetric): The last precision and recall values are 1. and 0. respectively and do not have a corresponding threshold. This ensures that the graph starts on the x axis. - """ def __init__(self, @@ -559,7 +544,6 @@ def __init__(self, reduce_group: Any = torch.distributed.group.WORLD, reduce_op: Any = torch.distributed.ReduceOp.SUM): """ - Args: pos_label: The class to report if ``average='binary'``. reduce_group: the process group for DDP reduces (only needed for DDP training). @@ -575,13 +559,11 @@ def __init__(self, def forward(self, probas_pred: np.ndarray, y_true: np.ndarray, sample_weight: Optional[np.ndarray] = None) -> Union[np.ndarray, float]: """ - Args: probas_pred : Estimated probabilities or decision function. y_true: Ground truth (correct) target values. sample_weight: Sample weights. - Returns: precision: Precision values such that element i is the precision of @@ -606,7 +588,6 @@ class ROC(SklearnMetric): Note: this implementation is restricted to the binary classification task. - """ def __init__(self, @@ -614,7 +595,6 @@ def __init__(self, reduce_group: Any = torch.distributed.group.WORLD, reduce_op: Any = torch.distributed.ReduceOp.SUM): """ - Args: pos_labels: The class to report if ``average='binary'``. reduce_group: the process group for DDP reduces (only needed for DDP training). @@ -634,7 +614,6 @@ def __init__(self, def forward(self, y_score: np.ndarray, y_true: np.ndarray, sample_weight: Optional[np.ndarray] = None) -> Union[np.ndarray, float]: """ - Args: y_score : Target scores, can either be probability estimates of the positive class or confidence values. @@ -660,6 +639,7 @@ class or confidence values. class AUROC(SklearnMetric): """ Compute Area Under the Curve (AUC) from prediction scores + Note: this implementation is restricted to the binary classification task or multilabel classification task in label indicator format. @@ -693,12 +673,12 @@ def __init__(self, average: Optional[str] = 'macro', def forward(self, y_score: np.ndarray, y_true: np.ndarray, sample_weight: Optional[np.ndarray] = None) -> float: """ - Args: y_score: Target scores, can either be probability estimates of the positive class, confidence values, or binary decisions. y_true: True binary labels in binary label indicators. sample_weight: Sample weights. + Returns: Area Under Receiver Operating Characteristic Curve """ From c9908a1a644921348079870fde58eb157cd5e8eb Mon Sep 17 00:00:00 2001 From: Jirka Date: Mon, 25 May 2020 22:03:36 +0200 Subject: [PATCH 28/44] format --- pytorch_lightning/metrics/sklearn.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pytorch_lightning/metrics/sklearn.py b/pytorch_lightning/metrics/sklearn.py index af8cd3c02373b..005c305adaa77 100644 --- a/pytorch_lightning/metrics/sklearn.py +++ b/pytorch_lightning/metrics/sklearn.py @@ -160,13 +160,13 @@ def __init__(self, average: Optional[str] = 'macro', Args: average: If None, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: - * If 'micro': Calculate metrics globally by considering each element - of the label indicator matrix as a label. - * If 'macro': Calculate metrics for each label, and find their unweighted mean. - This does not take label imbalance into account. - * If 'weighted': Calculate metrics for each label, and find their average, - weighted by support (the number of true instances for each label). - * If 'samples': Calculate metrics for each instance, and find their average. + * If 'micro': Calculate metrics globally by considering each element + of the label indicator matrix as a label. + * If 'macro': Calculate metrics for each label, and find their unweighted mean. + This does not take label imbalance into account. + * If 'weighted': Calculate metrics for each label, and find their average, + weighted by support (the number of true instances for each label). + * If 'samples': Calculate metrics for each instance, and find their average. reduce_group: the process group for DDP reduces (only needed for DDP training). Defaults to all processes (world) reduce_op: the operation to perform during reduction within DDP (only needed for DDP training). From 41e59717c1a540d4463d30c15fbf344176beb3c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Tue, 26 May 2020 00:23:29 +0200 Subject: [PATCH 29/44] fix all warnings and formatting problems --- docs/source/conf.py | 1 + pytorch_lightning/metrics/sklearn.py | 259 +++++++++++++++------------ 2 files changed, 145 insertions(+), 115 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 4133571c65635..a084e5e349e39 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -90,6 +90,7 @@ 'sphinx.ext.linkcode', 'sphinx.ext.autosummary', 'sphinx.ext.napoleon', + 'sphinx.ext.imgmath', 'recommonmark', 'sphinx.ext.autosectionlabel', # 'm2r', diff --git a/pytorch_lightning/metrics/sklearn.py b/pytorch_lightning/metrics/sklearn.py index 005c305adaa77..16cfd24cd7f30 100644 --- a/pytorch_lightning/metrics/sklearn.py +++ b/pytorch_lightning/metrics/sklearn.py @@ -26,18 +26,18 @@ class SklearnMetric(NumpyMetric): """ Bridge between PyTorch Lightning and scikit-learn metrics - .. warning:: - Every metric call will cause a GPU synchronization, which may slow down your code + Warning: + Every metric call will cause a GPU synchronization, which may slow down your code - .. note:: - The order of targets and predictions may be different from the order typically used in PyTorch + Note: + The order of targets and predictions may be different from the order typically used in PyTorch """ def __init__(self, metric_name: str, reduce_group: Any = torch.distributed.group.WORLD, reduce_op: Any = torch.distributed.ReduceOp.SUM, **kwargs): """ Args: - metric_name: the metric name to import anc compute from scikit-learn.metrics + metric_name: the metric name to import and compute from scikit-learn.metrics reduce_group: the process group for DDP reduces (only needed for DDP training). Defaults to all processes (world) reduce_op: the operation to perform during reduction within DDP (only needed for DDP training). @@ -58,14 +58,15 @@ def metric_fn(self): return getattr(sklearn.metrics, self.name) def forward(self, *args, **kwargs) -> Union[np.ndarray, int, float]: - """ Carries the actual metric computation + """ + Carries the actual metric computation Args: *args: Positional arguments forwarded to metric call (should be already converted to numpy) **kwargs: keyword arguments forwarded to metric call (should be already converted to numpy) Returns: - the metric value (will be converted to tensor by baseclass + the metric value (will be converted to tensor by baseclass) """ return self.metric_fn(*args, **kwargs, **self.metric_kwargs) @@ -97,15 +98,17 @@ def __init__(self, normalize: bool = True, def forward(self, y_pred: np.ndarray, y_true: np.ndarray, sample_weight: Optional[np.ndarray] = None) -> float: - """ Computes the accuracy + """ + Computes the accuracy Args: y_pred: the array containing the predictions (already in categorical form) y_true: the array containing the targets (in categorical form) - sample_weight: + sample_weight: Sample weights. Returns: Accuracy Score + """ return super().forward(y_pred=y_pred, y_true=y_true, sample_weight=sample_weight) @@ -114,8 +117,8 @@ class AUC(SklearnMetric): """ Calculates the Area Under the Curve using the trapoezoidal rule - .. warning:: - Every metric call will cause a GPU synchronization, which may slow down your code + Warning: + Every metric call will cause a GPU synchronization, which may slow down your code """ def __init__(self, reduce_group: Any = torch.distributed.group.WORLD, @@ -135,7 +138,8 @@ def __init__(self, ) def forward(self, x: np.ndarray, y: np.ndarray) -> float: - """ Computes the AUC + """ + Computes the AUC Args: x: x coordinates. @@ -158,15 +162,17 @@ def __init__(self, average: Optional[str] = 'macro', ): """ Args: - average: If None, the scores for each class are returned. - Otherwise, this determines the type of averaging performed on the data: - * If 'micro': Calculate metrics globally by considering each element - of the label indicator matrix as a label. + average: If None, the scores for each class are returned. Otherwise, this determines the type of + averaging performed on the data: + + * If 'micro': Calculate metrics globally by considering each element of the label indicator + matrix as a label. * If 'macro': Calculate metrics for each label, and find their unweighted mean. - This does not take label imbalance into account. - * If 'weighted': Calculate metrics for each label, and find their average, - weighted by support (the number of true instances for each label). + This does not take label imbalance into account. + * If 'weighted': Calculate metrics for each label, and find their average, weighted by + support (the number of true instances for each label). * If 'samples': Calculate metrics for each instance, and find their average. + reduce_group: the process group for DDP reduces (only needed for DDP training). Defaults to all processes (world) reduce_op: the operation to perform during reduction within DDP (only needed for DDP training). @@ -185,6 +191,7 @@ def forward(self, y_score: np.ndarray, y_true: np.ndarray, confidence values, or binary decisions. y_true: True binary labels in binary label indicators. sample_weight: Sample weights. + Returns: average precision score """ @@ -225,26 +232,31 @@ def forward(self, y_pred: np.ndarray, y_true: np.ndarray) -> np.ndarray: y_pred: Estimated targets as returned by a classifier. y_true: Ground truth (correct) target values. - Returns: Confusion matrix (array of shape [n_classes, n_classes]) + Returns: + Confusion matrix (array of shape [n_classes, n_classes]) """ return super().forward(y_pred=y_pred, y_true=y_true) class F1(SklearnMetric): - """ + r""" Compute the F1 score, also known as balanced F-score or F-measure The F1 score can be interpreted as a weighted average of the precision and recall, where an F1 score reaches its best value at 1 and worst score at 0. The relative contribution of precision and recall to the F1 score are - equal. The formula for the F1 score is:: - F1 = 2 * (precision * recall) / (precision + recall) + equal. The formula for the F1 score is: + + .. math:: + + F_1 = 2 \cdot \frac{precision \cdot recall}{precision + recall} + In the multi-class and multi-label case, this is the weighted average of the F1 score of each class. - References: - .. [1] `Wikipedia entry for the F1-score - `_ + References + - [1] `Wikipedia entry for the F1-score + `_ """ def __init__(self, labels: Optional[Sequence] = None, @@ -259,24 +271,26 @@ def __init__(self, labels: Optional[Sequence] = None, average: This parameter is required for multiclass/multilabel targets. If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: - ``'binary'``: - Only report results for the class specified by ``pos_label``. - This is applicable only if targets (``y_{true,pred}``) are binary. - ``'micro'``: - Calculate metrics globally by counting the total true positives, - false negatives and false positives. - ``'macro'``: - Calculate metrics for each label, and find their unweighted - mean. This does not take label imbalance into account. - ``'weighted'``: - Calculate metrics for each label, and find their average, weighted - by support (the number of true instances for each label). This - alters 'macro' to account for label imbalance; it can result in an - F-score that is not between precision and recall. - ``'samples'``: - Calculate metrics for each instance, and find their average (only - meaningful for multilabel classification where this differs from - :func:`accuracy_score`). + + * ``'binary'``: + Only report results for the class specified by ``pos_label``. + This is applicable only if targets (``y_{true,pred}``) are binary. + * ``'micro'``: + Calculate metrics globally by counting the total true positives, + false negatives and false positives. + * ``'macro'``: + Calculate metrics for each label, and find their unweighted + mean. This does not take label imbalance into account. + * ``'weighted'``: + Calculate metrics for each label, and find their average, weighted + by support (the number of true instances for each label). This + alters 'macro' to account for label imbalance; it can result in an + F-score that is not between precision and recall. + * ``'samples'``: + Calculate metrics for each instance, and find their average (only + meaningful for multilabel classification where this differs from + :func:`accuracy_score`). + Note that if ``pos_label`` is given in binary classification with `average != 'binary'`, only that positive class is reported. This behavior is deprecated and will change in version 0.18. @@ -300,7 +314,8 @@ def forward(self, y_pred: np.ndarray, y_true: np.ndarray, y_true: Ground truth (correct) target values. sample_weight: Sample weights. - Returns: F1 score of the positive class in binary classification or weighted + Returns: + F1 score of the positive class in binary classification or weighted average of the F1 scores of each class for the multiclass task. """ @@ -309,16 +324,16 @@ def forward(self, y_pred: np.ndarray, y_true: np.ndarray, class FBeta(SklearnMetric): """ - Compute the F-beta score.The `beta` parameter determines the weight of precision in the combined + Compute the F-beta score. The `beta` parameter determines the weight of precision in the combined score. ``beta < 1`` lends more weight to precision, while ``beta > 1`` favors recall (``beta -> 0`` considers only precision, ``beta -> inf`` only recall). References: - .. [1] R. Baeza-Yates and B. Ribeiro-Neto (2011). - Modern Information Retrieval. Addison Wesley, pp. 327-328. - .. [2] `Wikipedia entry for the F1-score - `_ + - [1] R. Baeza-Yates and B. Ribeiro-Neto (2011). + Modern Information Retrieval. Addison Wesley, pp. 327-328. + - [2] `Wikipedia entry for the F1-score + `_ """ def __init__(self, beta: float, labels: Optional[Sequence] = None, @@ -334,24 +349,26 @@ def __init__(self, beta: float, labels: Optional[Sequence] = None, average: This parameter is required for multiclass/multilabel targets. If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: - ``'binary'``: - Only report results for the class specified by ``pos_label``. - This is applicable only if targets (``y_{true,pred}``) are binary. - ``'micro'``: - Calculate metrics globally by counting the total true positives, - false negatives and false positives. - ``'macro'``: - Calculate metrics for each label, and find their unweighted - mean. This does not take label imbalance into account. - ``'weighted'``: - Calculate metrics for each label, and find their average, weighted - by support (the number of true instances for each label). This - alters 'macro' to account for label imbalance; it can result in an - F-score that is not between precision and recall. - ``'samples'``: - Calculate metrics for each instance, and find their average (only - meaningful for multilabel classification where this differs from - :func:`accuracy_score`). + + * ``'binary'``: + Only report results for the class specified by ``pos_label``. + This is applicable only if targets (``y_{true,pred}``) are binary. + * ``'micro'``: + Calculate metrics globally by counting the total true positives, + false negatives and false positives. + * ``'macro'``: + Calculate metrics for each label, and find their unweighted + mean. This does not take label imbalance into account. + * ``'weighted'``: + Calculate metrics for each label, and find their average, weighted + by support (the number of true instances for each label). This + alters 'macro' to account for label imbalance; it can result in an + F-score that is not between precision and recall. + * ``'samples'``: + Calculate metrics for each instance, and find their average (only + meaningful for multilabel classification where this differs from + :func:`accuracy_score`). + Note that if ``pos_label`` is given in binary classification with `average != 'binary'`, only that positive class is reported. This behavior is deprecated and will change in version 0.18. @@ -376,7 +393,9 @@ def forward(self, y_pred: np.ndarray, y_true: np.ndarray, y_true: Ground truth (correct) target values. sample_weight: Sample weights. - Returns: FBeta score of the positive class in binary classification or weighted + + Returns: + FBeta score of the positive class in binary classification or weighted average of the FBeta scores of each class for the multiclass task. """ @@ -405,24 +424,26 @@ def __init__(self, labels: Optional[Sequence] = None, average: This parameter is required for multiclass/multilabel targets. If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: - ``'binary'``: - Only report results for the class specified by ``pos_label``. - This is applicable only if targets (``y_{true,pred}``) are binary. - ``'micro'``: - Calculate metrics globally by counting the total true positives, - false negatives and false positives. - ``'macro'``: - Calculate metrics for each label, and find their unweighted - mean. This does not take label imbalance into account. - ``'weighted'``: - Calculate metrics for each label, and find their average, weighted - by support (the number of true instances for each label). This - alters 'macro' to account for label imbalance; it can result in an - F-score that is not between precision and recall. - ``'samples'``: - Calculate metrics for each instance, and find their average (only - meaningful for multilabel classification where this differs from - :func:`accuracy_score`). + + * ``'binary'``: + Only report results for the class specified by ``pos_label``. + This is applicable only if targets (``y_{true,pred}``) are binary. + * ``'micro'``: + Calculate metrics globally by counting the total true positives, + false negatives and false positives. + * ``'macro'``: + Calculate metrics for each label, and find their unweighted + mean. This does not take label imbalance into account. + * ``'weighted'``: + Calculate metrics for each label, and find their average, weighted + by support (the number of true instances for each label). This + alters 'macro' to account for label imbalance; it can result in an + F-score that is not between precision and recall. + * ``'samples'``: + Calculate metrics for each instance, and find their average (only + meaningful for multilabel classification where this differs from + :func:`accuracy_score`). + Note that if ``pos_label`` is given in binary classification with `average != 'binary'`, only that positive class is reported. This behavior is deprecated and will change in version 0.18. @@ -446,8 +467,10 @@ def forward(self, y_pred: np.ndarray, y_true: np.ndarray, y_true: Ground truth (correct) target values. sample_weight: Sample weights. - Returns: Precision of the positive class in binary classification or weighted - average of the precision of each class for the multiclass task. + Returns: + Precision of the positive class in binary classification or weighted + average of the precision of each class for the multiclass task. + """ return super().forward(y_pred=y_pred, y_true=y_true, sample_weight=sample_weight) @@ -473,24 +496,26 @@ def __init__(self, labels: Optional[Sequence] = None, average: This parameter is required for multiclass/multilabel targets. If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: - ``'binary'``: - Only report results for the class specified by ``pos_label``. - This is applicable only if targets (``y_{true,pred}``) are binary. - ``'micro'``: - Calculate metrics globally by counting the total true positives, - false negatives and false positives. - ``'macro'``: - Calculate metrics for each label, and find their unweighted - mean. This does not take label imbalance into account. - ``'weighted'``: - Calculate metrics for each label, and find their average, weighted - by support (the number of true instances for each label). This - alters 'macro' to account for label imbalance; it can result in an - F-score that is not between precision and recall. - ``'samples'``: - Calculate metrics for each instance, and find their average (only - meaningful for multilabel classification where this differs from - :func:`accuracy_score`). + + * ``'binary'``: + Only report results for the class specified by ``pos_label``. + This is applicable only if targets (``y_{true,pred}``) are binary. + * ``'micro'``: + Calculate metrics globally by counting the total true positives, + false negatives and false positives. + * ``'macro'``: + Calculate metrics for each label, and find their unweighted + mean. This does not take label imbalance into account. + * ``'weighted'``: + Calculate metrics for each label, and find their average, weighted + by support (the number of true instances for each label). This + alters 'macro' to account for label imbalance; it can result in an + F-score that is not between precision and recall. + * ``'samples'``: + Calculate metrics for each instance, and find their average (only + meaningful for multilabel classification where this differs from + :func:`accuracy_score`). + Note that if ``pos_label`` is given in binary classification with `average != 'binary'`, only that positive class is reported. This behavior is deprecated and will change in version 0.18. @@ -514,8 +539,10 @@ def forward(self, y_pred: np.ndarray, y_true: np.ndarray, y_true: Ground truth (correct) target values. sample_weight: Sample weights. - Returns: Recall of the positive class in binary classification or weighted - average of the recall of each class for the multiclass task. + Returns: + Recall of the positive class in binary classification or weighted + average of the recall of each class for the multiclass task. + """ return super().forward(y_pred=y_pred, y_true=y_true, sample_weight=sample_weight) @@ -525,7 +552,7 @@ class PrecisionRecallCurve(SklearnMetric): Compute precision-recall pairs for different probability thresholds Note: - this implementation is restricted to the binary classification task. + This implementation is restricted to the binary classification task. The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of true positives and ``fp`` the number of false positives. The precision is @@ -603,8 +630,8 @@ def __init__(self, Defaults to sum. References: - .. [1] `Wikipedia entry for the Receiver operating characteristic - `_ + - [1] `Wikipedia entry for the Receiver operating characteristic + `_ """ super().__init__('roc_curve', reduce_group=reduce_group, @@ -653,13 +680,15 @@ def __init__(self, average: Optional[str] = 'macro', Args: average: If None, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: + * If 'micro': Calculate metrics globally by considering each element of the label indicator - matrix as a label. + matrix as a label. * If 'macro': Calculate metrics for each label, and find their unweighted mean. - This does not take label imbalance into account. + This does not take label imbalance into account. * If 'weighted': Calculate metrics for each label, and find their average, weighted by - support (the number of true instances for each label). + support (the number of true instances for each label). * If 'samples': Calculate metrics for each instance, and find their average. + reduce_group: the process group for DDP reduces (only needed for DDP training). Defaults to all processes (world) reduce_op: the operation to perform during reduction within DDP (only needed for DDP training). From 82781e56b27186cbd6f2ce1f2031015d1c14fe12 Mon Sep 17 00:00:00 2001 From: Justus Schock <12886177+justusschock@users.noreply.github.com> Date: Tue, 26 May 2020 13:09:05 +0200 Subject: [PATCH 30/44] Update environment.yml --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 98f5fb81e1cdd..d610f0a1379b0 100644 --- a/environment.yml +++ b/environment.yml @@ -27,7 +27,7 @@ dependencies: - check-manifest - twine==1.13.0 - pillow<7.0.0 - - scipy>=0.13.3 + - scipy>=0.13.3,<1.4.0 - scikit-learn>=0.20.0 From 01c3e5714c44756b94cf69bac4f547f1eb3eba21 Mon Sep 17 00:00:00 2001 From: Justus Schock <12886177+justusschock@users.noreply.github.com> Date: Tue, 26 May 2020 13:09:31 +0200 Subject: [PATCH 31/44] Update requirements-extra.txt --- requirements-extra.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-extra.txt b/requirements-extra.txt index 1ce0aa550212f..8598fb45b5029 100644 --- a/requirements-extra.txt +++ b/requirements-extra.txt @@ -10,5 +10,5 @@ matplotlib>=3.1.1 # no need to install with [pytorch] as pytorch is already installed and torchvision is required only for Horovod examples horovod>=0.19.1 omegaconf>=2.0.0 -scipy>=0.13.3 +scipy>=0.13.3, <1.4.0 scikit-learn>=0.20.0 From 6a674b6fb3e09a39c09359c598bfb6ced3140d46 Mon Sep 17 00:00:00 2001 From: Justus Schock <12886177+justusschock@users.noreply.github.com> Date: Tue, 26 May 2020 15:09:27 +0200 Subject: [PATCH 32/44] Update environment.yml --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index d610f0a1379b0..98f5fb81e1cdd 100644 --- a/environment.yml +++ b/environment.yml @@ -27,7 +27,7 @@ dependencies: - check-manifest - twine==1.13.0 - pillow<7.0.0 - - scipy>=0.13.3,<1.4.0 + - scipy>=0.13.3 - scikit-learn>=0.20.0 From c96e3d778224140fa4455a9e4f8be0ae798aa158 Mon Sep 17 00:00:00 2001 From: Justus Schock <12886177+justusschock@users.noreply.github.com> Date: Tue, 26 May 2020 15:09:49 +0200 Subject: [PATCH 33/44] Update requirements-extra.txt --- requirements-extra.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-extra.txt b/requirements-extra.txt index 8598fb45b5029..1ce0aa550212f 100644 --- a/requirements-extra.txt +++ b/requirements-extra.txt @@ -10,5 +10,5 @@ matplotlib>=3.1.1 # no need to install with [pytorch] as pytorch is already installed and torchvision is required only for Horovod examples horovod>=0.19.1 omegaconf>=2.0.0 -scipy>=0.13.3, <1.4.0 +scipy>=0.13.3 scikit-learn>=0.20.0 From b779e3792461d60a88de8161daf797dd3cdc0480 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Mon, 8 Jun 2020 15:23:39 +0200 Subject: [PATCH 34/44] Update CHANGELOG.md --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b615abc47a4d1..ec6157d2bc1ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,7 +22,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Added - Remove explicit flush from tensorboard logger ([#2126](https://github.com/PyTorchLightning/pytorch-lightning/pull/2126)) -- Add Metric Base Classes ([#1326](https://github.com/PyTorchLightning/pytorch-lightning/pull/1326), [#1877](https://github.com/PyTorchLightning/pytorch-lightning/pull/1877)) +- Added metric Base classes ([#1326](https://github.com/PyTorchLightning/pytorch-lightning/pull/1326), [#1877](https://github.com/PyTorchLightning/pytorch-lightning/pull/1877)) +- Added Sklearn metrics classes ([#1327](https://github.com/PyTorchLightning/pytorch-lightning/pull/1327)) - Added type hints in `Trainer.fit()` and `Trainer.test()` to reflect that also a list of dataloaders can be passed in ([#1723](https://github.com/PyTorchLightning/pytorch-lightning/pull/1723)) - Allow dataloaders without sampler field present ([#1907](https://github.com/PyTorchLightning/pytorch-lightning/pull/1907)) - Added option `save_last` to save the model at the end of every epoch in `ModelCheckpoint` [(#1908)](https://github.com/PyTorchLightning/pytorch-lightning/pull/1908) From f6e6cecadaecefac46bbe9fd162278cc00a6d83e Mon Sep 17 00:00:00 2001 From: Jirka Date: Mon, 8 Jun 2020 15:28:43 +0200 Subject: [PATCH 35/44] docs --- .circleci/config.yml | 9 ++++++--- pytorch_lightning/metrics/sklearn.py | 28 ++++++++++++++-------------- requirements-extra.txt | 2 +- requirements.txt | 2 +- tests/requirements.txt | 2 +- 5 files changed, 23 insertions(+), 20 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 7bd1de8f6c947..fcff1e65feca1 100755 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -64,8 +64,11 @@ references: name: Make Documentation command: | # First run the same pipeline as Read-The-Docs - sudo apt-get update && sudo apt-get install -y cmake - sudo pip install -r docs/requirements.txt + # apt-get update && apt-get install -y cmake + # using: https://hub.docker.com/r/readthedocs/build + pyenv global 3.6.8 + python --version + pip install -r docs/requirements.txt cd docs; make clean; make html --debug --jobs 2 SPHINXOPTS="-W" test_docs: &test_docs @@ -81,7 +84,7 @@ jobs: Build-Docs: docker: - - image: circleci/python:3.7 + - image: readthedocs/build:latest steps: - checkout - *make_docs diff --git a/pytorch_lightning/metrics/sklearn.py b/pytorch_lightning/metrics/sklearn.py index 16cfd24cd7f30..fdc56884db48e 100644 --- a/pytorch_lightning/metrics/sklearn.py +++ b/pytorch_lightning/metrics/sklearn.py @@ -65,7 +65,7 @@ def forward(self, *args, **kwargs) -> Union[np.ndarray, int, float]: *args: Positional arguments forwarded to metric call (should be already converted to numpy) **kwargs: keyword arguments forwarded to metric call (should be already converted to numpy) - Returns: + Return: the metric value (will be converted to tensor by baseclass) """ @@ -76,7 +76,7 @@ class Accuracy(SklearnMetric): """ Calculates the Accuracy Score - .. warning:: + Warning: Every metric call will cause a GPU synchronization, which may slow down your code """ def __init__(self, normalize: bool = True, @@ -106,7 +106,7 @@ def forward(self, y_pred: np.ndarray, y_true: np.ndarray, y_true: the array containing the targets (in categorical form) sample_weight: Sample weights. - Returns: + Return: Accuracy Score """ @@ -145,7 +145,7 @@ def forward(self, x: np.ndarray, y: np.ndarray) -> float: x: x coordinates. y: y coordinates. - Returns: + Return: AUC calculated with trapezoidal rule """ @@ -192,7 +192,7 @@ def forward(self, y_score: np.ndarray, y_true: np.ndarray, y_true: True binary labels in binary label indicators. sample_weight: Sample weights. - Returns: + Return: average precision score """ return super().forward(y_score=y_score, y_true=y_true, @@ -232,7 +232,7 @@ def forward(self, y_pred: np.ndarray, y_true: np.ndarray) -> np.ndarray: y_pred: Estimated targets as returned by a classifier. y_true: Ground truth (correct) target values. - Returns: + Return: Confusion matrix (array of shape [n_classes, n_classes]) """ @@ -314,7 +314,7 @@ def forward(self, y_pred: np.ndarray, y_true: np.ndarray, y_true: Ground truth (correct) target values. sample_weight: Sample weights. - Returns: + Return: F1 score of the positive class in binary classification or weighted average of the F1 scores of each class for the multiclass task. @@ -394,9 +394,9 @@ def forward(self, y_pred: np.ndarray, y_true: np.ndarray, sample_weight: Sample weights. - Returns: + Return: FBeta score of the positive class in binary classification or weighted - average of the FBeta scores of each class for the multiclass task. + average of the FBeta scores of each class for the multiclass task. """ return super().forward(y_pred=y_pred, y_true=y_true, sample_weight=sample_weight) @@ -467,9 +467,9 @@ def forward(self, y_pred: np.ndarray, y_true: np.ndarray, y_true: Ground truth (correct) target values. sample_weight: Sample weights. - Returns: + Return: Precision of the positive class in binary classification or weighted - average of the precision of each class for the multiclass task. + average of the precision of each class for the multiclass task. """ return super().forward(y_pred=y_pred, y_true=y_true, sample_weight=sample_weight) @@ -539,9 +539,9 @@ def forward(self, y_pred: np.ndarray, y_true: np.ndarray, y_true: Ground truth (correct) target values. sample_weight: Sample weights. - Returns: + Return: Recall of the positive class in binary classification or weighted - average of the recall of each class for the multiclass task. + average of the recall of each class for the multiclass task. """ return super().forward(y_pred=y_pred, y_true=y_true, sample_weight=sample_weight) @@ -708,7 +708,7 @@ def forward(self, y_score: np.ndarray, y_true: np.ndarray, y_true: True binary labels in binary label indicators. sample_weight: Sample weights. - Returns: + Return: Area Under Receiver Operating Characteristic Curve """ return super().forward(y_score=y_score, y_true=y_true, diff --git a/requirements-extra.txt b/requirements-extra.txt index 1ce0aa550212f..ac12429220f82 100644 --- a/requirements-extra.txt +++ b/requirements-extra.txt @@ -11,4 +11,4 @@ matplotlib>=3.1.1 horovod>=0.19.1 omegaconf>=2.0.0 scipy>=0.13.3 -scikit-learn>=0.20.0 +scikit-learn>=0.19 diff --git a/requirements.txt b/requirements.txt index 0aa44aae24f4c..79899d1b9c71f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ # the default package dependencies +numpy>=1.11.0 tqdm>=4.41.0 -numpy>=1.16.4 torch>=1.3 tensorboard>=1.14 future>=0.17.1 # required for builtins in setup.py diff --git a/tests/requirements.txt b/tests/requirements.txt index fdf2e83337acb..2945bc5f968d2 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -8,4 +8,4 @@ flake8-black check-manifest twine==1.13.0 black==19.10b0 -pre-commit>=1.21.0 +pre-commit>=1.0 From 533bb7cd76b2ebfcbd55b791dc753dd28de5be9d Mon Sep 17 00:00:00 2001 From: Jirka Date: Tue, 9 Jun 2020 11:06:07 +0200 Subject: [PATCH 36/44] inherit --- pytorch_lightning/metrics/metric.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/metrics/metric.py b/pytorch_lightning/metrics/metric.py index 5247084498559..bd14655f30fa3 100644 --- a/pytorch_lightning/metrics/metric.py +++ b/pytorch_lightning/metrics/metric.py @@ -1,8 +1,9 @@ from abc import ABC, abstractmethod -from typing import Any, Optional, Union +from typing import Any, Optional import torch import torch.distributed +from torch.nn import Module from pytorch_lightning.metrics.converters import tensor_metric, numpy_metric from pytorch_lightning.utilities.apply_func import apply_to_collection @@ -11,7 +12,7 @@ __all__ = ['Metric', 'TensorMetric', 'NumpyMetric'] -class Metric(DeviceDtypeModuleMixin, torch.nn.Module, ABC): +class Metric(ABC, DeviceDtypeModuleMixin, Module): """ Abstract base class for metric implementation. From ca117e4207235d823e328f49b5e8c1526dcd67b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Tue, 26 May 2020 00:23:29 +0200 Subject: [PATCH 37/44] fix all warnings and formatting problems --- pytorch_lightning/metrics/sklearn.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pytorch_lightning/metrics/sklearn.py b/pytorch_lightning/metrics/sklearn.py index fdc56884db48e..075949fe25988 100644 --- a/pytorch_lightning/metrics/sklearn.py +++ b/pytorch_lightning/metrics/sklearn.py @@ -26,11 +26,11 @@ class SklearnMetric(NumpyMetric): """ Bridge between PyTorch Lightning and scikit-learn metrics - Warning: - Every metric call will cause a GPU synchronization, which may slow down your code + Warning: + Every metric call will cause a GPU synchronization, which may slow down your code - Note: - The order of targets and predictions may be different from the order typically used in PyTorch + Note: + The order of targets and predictions may be different from the order typically used in PyTorch """ def __init__(self, metric_name: str, reduce_group: Any = torch.distributed.group.WORLD, @@ -77,7 +77,7 @@ class Accuracy(SklearnMetric): Calculates the Accuracy Score Warning: - Every metric call will cause a GPU synchronization, which may slow down your code + Every metric call will cause a GPU synchronization, which may slow down your code """ def __init__(self, normalize: bool = True, reduce_group: Any = torch.distributed.group.WORLD, @@ -117,8 +117,8 @@ class AUC(SklearnMetric): """ Calculates the Area Under the Curve using the trapoezoidal rule - Warning: - Every metric call will cause a GPU synchronization, which may slow down your code + Warning: + Every metric call will cause a GPU synchronization, which may slow down your code """ def __init__(self, reduce_group: Any = torch.distributed.group.WORLD, @@ -396,7 +396,7 @@ def forward(self, y_pred: np.ndarray, y_true: np.ndarray, Return: FBeta score of the positive class in binary classification or weighted - average of the FBeta scores of each class for the multiclass task. + average of the FBeta scores of each class for the multiclass task. """ return super().forward(y_pred=y_pred, y_true=y_true, sample_weight=sample_weight) @@ -469,7 +469,7 @@ def forward(self, y_pred: np.ndarray, y_true: np.ndarray, Return: Precision of the positive class in binary classification or weighted - average of the precision of each class for the multiclass task. + average of the precision of each class for the multiclass task. """ return super().forward(y_pred=y_pred, y_true=y_true, sample_weight=sample_weight) @@ -541,7 +541,7 @@ def forward(self, y_pred: np.ndarray, y_true: np.ndarray, Return: Recall of the positive class in binary classification or weighted - average of the recall of each class for the multiclass task. + average of the recall of each class for the multiclass task. """ return super().forward(y_pred=y_pred, y_true=y_true, sample_weight=sample_weight) From 36fa04c100297da4c16e653d44241101f311eb87 Mon Sep 17 00:00:00 2001 From: Jirka Date: Tue, 9 Jun 2020 11:26:34 +0200 Subject: [PATCH 38/44] docs inherit. --- .../computer_vision_fine_tuning.py | 12 +++++++----- pytorch_lightning/core/grads.py | 3 ++- pytorch_lightning/core/hooks.py | 3 ++- pytorch_lightning/core/lightning.py | 2 +- pytorch_lightning/metrics/metric.py | 2 +- .../utilities/device_dtype_mixin.py | 17 +++++++++-------- 6 files changed, 22 insertions(+), 17 deletions(-) diff --git a/pl_examples/domain_templates/computer_vision_fine_tuning.py b/pl_examples/domain_templates/computer_vision_fine_tuning.py index e2db1b98fdb09..703f1c9b02419 100644 --- a/pl_examples/domain_templates/computer_vision_fine_tuning.py +++ b/pl_examples/domain_templates/computer_vision_fine_tuning.py @@ -27,6 +27,8 @@ from tempfile import TemporaryDirectory from typing import Optional, Generator, Union +from torch.nn import Module + import pytorch_lightning as pl import torch import torch.nn.functional as F @@ -47,7 +49,7 @@ # --- Utility functions --- -def _make_trainable(module: torch.nn.Module) -> None: +def _make_trainable(module: Module) -> None: """Unfreezes a given module. Args: @@ -58,7 +60,7 @@ def _make_trainable(module: torch.nn.Module) -> None: module.train() -def _recursive_freeze(module: torch.nn.Module, +def _recursive_freeze(module: Module, train_bn: bool = True) -> None: """Freezes the layers of a given module. @@ -80,7 +82,7 @@ def _recursive_freeze(module: torch.nn.Module, _recursive_freeze(module=child, train_bn=train_bn) -def freeze(module: torch.nn.Module, +def freeze(module: Module, n: Optional[int] = None, train_bn: bool = True) -> None: """Freezes the layers up to index n (if n is not None). @@ -101,7 +103,7 @@ def freeze(module: torch.nn.Module, _make_trainable(module=child) -def filter_params(module: torch.nn.Module, +def filter_params(module: Module, train_bn: bool = True) -> Generator: """Yields the trainable parameters of a given module. @@ -124,7 +126,7 @@ def filter_params(module: torch.nn.Module, yield param -def _unfreeze_and_add_param_group(module: torch.nn.Module, +def _unfreeze_and_add_param_group(module: Module, optimizer: Optimizer, lr: Optional[float] = None, train_bn: bool = True): diff --git a/pytorch_lightning/core/grads.py b/pytorch_lightning/core/grads.py index cb2215002c7d8..f58bbdf25ec88 100644 --- a/pytorch_lightning/core/grads.py +++ b/pytorch_lightning/core/grads.py @@ -4,9 +4,10 @@ from typing import Dict, Union import torch +from torch.nn import Module -class GradInformation(torch.nn.Module): +class GradInformation(Module): def grad_norm(self, norm_type: Union[float, int, str]) -> Dict[str, float]: """Compute each parameter's gradient's norm and their overall norm. diff --git a/pytorch_lightning/core/hooks.py b/pytorch_lightning/core/hooks.py index 960c7124383b0..d3fea6d446845 100644 --- a/pytorch_lightning/core/hooks.py +++ b/pytorch_lightning/core/hooks.py @@ -2,6 +2,7 @@ import torch from torch import Tensor +from torch.nn import Module from torch.optim.optimizer import Optimizer from pytorch_lightning.utilities import move_data_to_device @@ -14,7 +15,7 @@ APEX_AVAILABLE = True -class ModelHooks(torch.nn.Module): +class ModelHooks(Module): # TODO: remove in v0.9.0 def on_sanity_check_start(self): diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 7632822e462c3..fa21f3da44560 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -32,7 +32,7 @@ XLA_AVAILABLE = True -class LightningModule(ABC, DeviceDtypeModuleMixin, GradInformation, ModelIO, ModelHooks, Module): +class LightningModule(ABC, DeviceDtypeModuleMixin, GradInformation, ModelIO, ModelHooks): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) diff --git a/pytorch_lightning/metrics/metric.py b/pytorch_lightning/metrics/metric.py index bd14655f30fa3..9fb86d4b46154 100644 --- a/pytorch_lightning/metrics/metric.py +++ b/pytorch_lightning/metrics/metric.py @@ -12,7 +12,7 @@ __all__ = ['Metric', 'TensorMetric', 'NumpyMetric'] -class Metric(ABC, DeviceDtypeModuleMixin, Module): +class Metric(ABC, DeviceDtypeModuleMixin): """ Abstract base class for metric implementation. diff --git a/pytorch_lightning/utilities/device_dtype_mixin.py b/pytorch_lightning/utilities/device_dtype_mixin.py index eb3faf54faf6e..48ccad5307552 100644 --- a/pytorch_lightning/utilities/device_dtype_mixin.py +++ b/pytorch_lightning/utilities/device_dtype_mixin.py @@ -1,9 +1,10 @@ from typing import Union, Optional import torch +from torch.nn import Module -class DeviceDtypeModuleMixin(torch.nn.Module): +class DeviceDtypeModuleMixin(Module): _device: ... _dtype: Union[str, torch.dtype] @@ -25,7 +26,7 @@ def device(self, new_device: Union[str, torch.device]): # Necessary to avoid infinite recursion raise RuntimeError('Cannot set the device explicitly. Please use module.to(new_device).') - def to(self, *args, **kwargs) -> torch.nn.Module: + def to(self, *args, **kwargs) -> Module: """Moves and/or casts the parameters and buffers. This can be called as @@ -91,7 +92,7 @@ def to(self, *args, **kwargs) -> torch.nn.Module: return super().to(*args, **kwargs) - def cuda(self, device: Optional[int] = None) -> torch.nn.Module: + def cuda(self, device: Optional[int] = None) -> Module: """Moves all model parameters and buffers to the GPU. This also makes associated parameters and buffers different objects. So it should be called before constructing optimizer if the module will @@ -108,7 +109,7 @@ def cuda(self, device: Optional[int] = None) -> torch.nn.Module: self._device = torch.device('cuda', index=device) return super().cuda(device=device) - def cpu(self) -> torch.nn.Module: + def cpu(self) -> Module: """Moves all model parameters and buffers to the CPU. Returns: Module: self @@ -116,7 +117,7 @@ def cpu(self) -> torch.nn.Module: self._device = torch.device('cpu') return super().cpu() - def type(self, dst_type: Union[str, torch.dtype]) -> torch.nn.Module: + def type(self, dst_type: Union[str, torch.dtype]) -> Module: """Casts all parameters and buffers to :attr:`dst_type`. Arguments: @@ -128,7 +129,7 @@ def type(self, dst_type: Union[str, torch.dtype]) -> torch.nn.Module: self._dtype = dst_type return super().type(dst_type=dst_type) - def float(self) -> torch.nn.Module: + def float(self) -> Module: """Casts all floating point parameters and buffers to float datatype. Returns: @@ -137,7 +138,7 @@ def float(self) -> torch.nn.Module: self._dtype = torch.float return super().float() - def double(self) -> torch.nn.Module: + def double(self) -> Module: """Casts all floating point parameters and buffers to ``double`` datatype. Returns: @@ -146,7 +147,7 @@ def double(self) -> torch.nn.Module: self._dtype = torch.double return super().double() - def half(self) -> torch.nn.Module: + def half(self) -> Module: """Casts all floating point parameters and buffers to ``half`` datatype. Returns: From 0a3a31aae1f043646cee318bf008d3671b4ea7b6 Mon Sep 17 00:00:00 2001 From: Jirka Date: Tue, 9 Jun 2020 11:34:27 +0200 Subject: [PATCH 39/44] docs --- .circleci/config.yml | 2 +- pytorch_lightning/core/lightning.py | 2 +- pytorch_lightning/metrics/metric.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index fcff1e65feca1..8a2f5dfb59c8e 100755 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -66,7 +66,7 @@ references: # First run the same pipeline as Read-The-Docs # apt-get update && apt-get install -y cmake # using: https://hub.docker.com/r/readthedocs/build - pyenv global 3.6.8 + pyenv global 3.7.3 python --version pip install -r docs/requirements.txt cd docs; make clean; make html --debug --jobs 2 SPHINXOPTS="-W" diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index fa21f3da44560..7632822e462c3 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -32,7 +32,7 @@ XLA_AVAILABLE = True -class LightningModule(ABC, DeviceDtypeModuleMixin, GradInformation, ModelIO, ModelHooks): +class LightningModule(ABC, DeviceDtypeModuleMixin, GradInformation, ModelIO, ModelHooks, Module): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) diff --git a/pytorch_lightning/metrics/metric.py b/pytorch_lightning/metrics/metric.py index 9fb86d4b46154..bd14655f30fa3 100644 --- a/pytorch_lightning/metrics/metric.py +++ b/pytorch_lightning/metrics/metric.py @@ -12,7 +12,7 @@ __all__ = ['Metric', 'TensorMetric', 'NumpyMetric'] -class Metric(ABC, DeviceDtypeModuleMixin): +class Metric(ABC, DeviceDtypeModuleMixin, Module): """ Abstract base class for metric implementation. From c8d6cacf4c1b4e7d30f5c098fd6cef85e1935982 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Tue, 9 Jun 2020 11:38:26 +0200 Subject: [PATCH 40/44] Apply suggestions from code review Co-authored-by: Nicki Skafte --- pytorch_lightning/metrics/sklearn.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/metrics/sklearn.py b/pytorch_lightning/metrics/sklearn.py index 075949fe25988..45009b32e53e1 100644 --- a/pytorch_lightning/metrics/sklearn.py +++ b/pytorch_lightning/metrics/sklearn.py @@ -49,8 +49,9 @@ def __init__(self, metric_name: str, self.metric_kwargs = kwargs - lightning_logger.debug( - 'Every metric call will cause a GPU synchronization, which may slow down your code') + f'Metric {self.__class__.__name__} is using Sklearn as backend, meaning that' + ' every metric call will cause a GPU synchronization, which may slow down your code' + @property def metric_fn(self): From eeede877c9fe9b50415dd931ace844a7571aef0e Mon Sep 17 00:00:00 2001 From: Jirka Date: Tue, 9 Jun 2020 11:53:30 +0200 Subject: [PATCH 41/44] docs --- .circleci/config.yml | 1 + pytorch_lightning/metrics/sklearn.py | 4 ++-- requirements-extra.txt | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 8a2f5dfb59c8e..2b7f2ad578a32 100755 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -66,6 +66,7 @@ references: # First run the same pipeline as Read-The-Docs # apt-get update && apt-get install -y cmake # using: https://hub.docker.com/r/readthedocs/build + # we need to use py3.7 ot higher becase of an issue with metaclass inheritence pyenv global 3.7.3 python --version pip install -r docs/requirements.txt diff --git a/pytorch_lightning/metrics/sklearn.py b/pytorch_lightning/metrics/sklearn.py index 45009b32e53e1..60cc98c2c329f 100644 --- a/pytorch_lightning/metrics/sklearn.py +++ b/pytorch_lightning/metrics/sklearn.py @@ -48,10 +48,10 @@ def __init__(self, metric_name: str, reduce_op=reduce_op) self.metric_kwargs = kwargs - + lightning_logger.debug( f'Metric {self.__class__.__name__} is using Sklearn as backend, meaning that' ' every metric call will cause a GPU synchronization, which may slow down your code' - + ) @property def metric_fn(self): diff --git a/requirements-extra.txt b/requirements-extra.txt index ac12429220f82..1ce0aa550212f 100644 --- a/requirements-extra.txt +++ b/requirements-extra.txt @@ -11,4 +11,4 @@ matplotlib>=3.1.1 horovod>=0.19.1 omegaconf>=2.0.0 scipy>=0.13.3 -scikit-learn>=0.19 +scikit-learn>=0.20.0 From db8e7243cd1c88ce39ac3174af383a17e1272d5e Mon Sep 17 00:00:00 2001 From: Jirka Date: Tue, 9 Jun 2020 20:27:11 +0200 Subject: [PATCH 42/44] req --- .github/workflows/ci-testing.yml | 6 +++--- requirements-extra.txt | 2 +- requirements.txt | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml index 29711c0c62295..d905df63dec61 100644 --- a/.github/workflows/ci-testing.yml +++ b/.github/workflows/ci-testing.yml @@ -68,9 +68,9 @@ jobs: - name: Set min. dependencies if: matrix.requires == 'minimal' run: | - python -c "req = open('requirements.txt').read().replace('>', '=') ; open('requirements.txt', 'w').write(req)" - python -c "req = open('requirements-extra.txt').read().replace('>', '=') ; open('requirements-extra.txt', 'w').write(req)" - python -c "req = open('tests/requirements-devel.txt').read().replace('>', '=') ; open('tests/requirements-devel.txt', 'w').write(req)" + python -c "req = open('requirements.txt').read().replace('>=', '==') ; open('requirements.txt', 'w').write(req)" + python -c "req = open('requirements-extra.txt').read().replace('>=', '==') ; open('requirements-extra.txt', 'w').write(req)" + python -c "req = open('tests/requirements-devel.txt').read().replace('>=', '==') ; open('tests/requirements-devel.txt', 'w').write(req)" # Note: This uses an internal pip API and may not always work # https://github.com/actions/cache/blob/master/examples.md#multiple-oss-in-a-workflow diff --git a/requirements-extra.txt b/requirements-extra.txt index 1ce0aa550212f..0fcd2f8a1bd92 100644 --- a/requirements-extra.txt +++ b/requirements-extra.txt @@ -10,5 +10,5 @@ matplotlib>=3.1.1 # no need to install with [pytorch] as pytorch is already installed and torchvision is required only for Horovod examples horovod>=0.19.1 omegaconf>=2.0.0 -scipy>=0.13.3 +# scipy>=0.13.3 scikit-learn>=0.20.0 diff --git a/requirements.txt b/requirements.txt index 79899d1b9c71f..8a3923894393b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ # the default package dependencies -numpy>=1.11.0 +numpy>=1.13.3 tqdm>=4.41.0 torch>=1.3 tensorboard>=1.14 From d49298aa948d9f70eea8636a4b0ca7c3d080b092 Mon Sep 17 00:00:00 2001 From: Jirka Date: Tue, 9 Jun 2020 21:25:14 +0200 Subject: [PATCH 43/44] min --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 8a3923894393b..62e723574a9bc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ # the default package dependencies -numpy>=1.13.3 +numpy>=1.15 # because some BLAS compilation issues tqdm>=4.41.0 torch>=1.3 tensorboard>=1.14 From 363bd640207fc5a8352fe13cf282bc2989154ed4 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Wed, 10 Jun 2020 00:46:28 +0200 Subject: [PATCH 44/44] Apply suggestions from code review Co-authored-by: Tullie Murrell --- pytorch_lightning/metrics/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/metrics/utils.py b/pytorch_lightning/metrics/utils.py index e284b9494d8b8..0829c9711cb44 100644 --- a/pytorch_lightning/metrics/utils.py +++ b/pytorch_lightning/metrics/utils.py @@ -44,7 +44,6 @@ def _convert_to_tensor(data: Any) -> Any: """ if isinstance(data, numbers.Number): return torch.tensor([data]) - else: return default_convert(data)