diff --git a/docs/source/metrics.rst b/docs/source/metrics.rst index 6f70a3c73f2d0..8d7322d4702b9 100644 --- a/docs/source/metrics.rst +++ b/docs/source/metrics.rst @@ -1,4 +1,318 @@ -.. automodule:: pytorch_lightning.metrics - :members: - :noindex: - :exclude-members: +.. testsetup:: * + + from torch.nn import Module + from pytorch_lightning.core.lightning import LightningModule + from pytorch_lightning.metrics import TensorMetric, NumpyMetric + +Metrics +======= +This is a general package for PyTorch Metrics. These can also be used with regular non-lightning PyTorch code. +Metrics are used to monitor model performance. + +In this package we provide two major pieces of functionality. + +1. A Metric class you can use to implement metrics with built-in distributed (ddp) support which are device agnostic. +2. A collection of popular metrics already implemented for you. + +Example:: + + from pytorch_lightning.metrics.functional import accuracy + + pred = torch.tensor([0, 1, 2, 3]) + target = torch.tensor([0, 1, 2, 2]) + + # calculates accuracy across all GPUs and all Nodes used in training + accuracy(pred, target) + +Out:: + + tensor(0.7500) + +-------------- + +Implement a metric +------------------ +You can implement metrics as either a PyTorch metric or a Numpy metric. Numpy metrics +will slow down training, use PyTorch metrics when possible. + +Use :class:`TensorMetric` to implement native PyTorch metrics. This class +handles automated DDP syncing and converts all inputs and outputs to tensors. + +Use :class:`NumpyMetric` to implement numpy metrics. This class +handles automated DDP syncing and converts all inputs and outputs to tensors. + +.. warning:: + Numpy metrics might slow down your training substantially, + since every metric computation requires a GPU sync to convert tensors to numpy. + +TensorMetric +^^^^^^^^^^^^ +Here's an example showing how to implement a TensorMetric + +.. testcode:: + + class RMSE(TensorMetric): + def forward(self, x, y): + return torch.sqrt(torch.mean(torch.pow(x-y, 2.0))) + +.. autoclass:: pytorch_lightning.metrics.metric.TensorMetric + :noindex: + +NumpyMetric +^^^^^^^^^^^ +Here's an example showing how to implement a NumpyMetric + +.. testcode:: + + class RMSE(NumpyMetric): + def forward(self, x, y): + return np.sqrt(np.mean(np.power(x-y, 2.0))) + + +.. autoclass:: pytorch_lightning.metrics.metric.NumpyMetric + :noindex: + +-------------- + +Class Metrics +------------- +The following are metrics which can be instantiated as part of a module definition (even with just +plain PyTorch). + +.. testcode:: + + from pytorch_lightning.metrics import Accuracy + + # Plain PyTorch + class MyModule(Module): + def __init__(self): + super().__init__() + self.metric = Accuracy() + + def forward(self, x, y): + y_hat = ... + acc = self.metric(y_hat, y) + + # PyTorch Lightning + class MyModule(LightningModule): + def __init__(self): + super().__init__() + self.metric = Accuracy() + + def training_step(self, batch, batch_idx): + x, y = batch + y_hat = ... + acc = self.metric(y_hat, y) + +These metrics even work when using distributed training: + +.. code-block:: python + + model = MyModule() + trainer = Trainer(gpus=8, num_nodes=2) + + # any metric automatically reduces across GPUs (even the ones you implement using Lightning) + trainer.fit(model) + +Accuracy +^^^^^^^^ + +.. autoclass:: pytorch_lightning.metrics.classification.Accuracy + :noindex: + +AveragePrecision +^^^^^^^^^^^^^^^^ + +.. autoclass:: pytorch_lightning.metrics.classification.AveragePrecision + :noindex: + +AUROC +^^^^^ + +.. autoclass:: pytorch_lightning.metrics.classification.AUROC + :noindex: + +ConfusionMatrix +^^^^^^^^^^^^^^^ + +.. autoclass:: pytorch_lightning.metrics.classification.ConfusionMatrix + :noindex: + +DiceCoefficient +^^^^^^^^^^^^^^^ + +.. autoclass:: pytorch_lightning.metrics.classification.DiceCoefficient + :noindex: + +F1 +^^ + +.. autoclass:: pytorch_lightning.metrics.classification.F1 + :noindex: + +FBeta +^^^^^ + +.. autoclass:: pytorch_lightning.metrics.classification.FBeta + :noindex: + +PrecisionRecall +^^^^^^^^^^^^^^^ + +.. autoclass:: pytorch_lightning.metrics.classification.PrecisionRecall + :noindex: + +Precision +^^^^^^^^^ + +.. autoclass:: pytorch_lightning.metrics.classification.Precision + :noindex: + +Recall +^^^^^^ + +.. autoclass:: pytorch_lightning.metrics.classification.Recall + :noindex: + +ROC +^^^ + +.. autoclass:: pytorch_lightning.metrics.classification.ROC + :noindex: + +MulticlassROC +^^^^^^^^^^^^^ + +.. autoclass:: pytorch_lightning.metrics.classification.MulticlassROC + :noindex: + +MulticlassPrecisionRecall +^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autoclass:: pytorch_lightning.metrics.classification.MulticlassPrecisionRecall + :noindex: + +-------------- + +Functional Metrics +------------------ + +accuracy (F) +^^^^^^^^^^^^ + +.. autofunction:: pytorch_lightning.metrics.functional.accuracy + :noindex: + +auc (F) +^^^^^^^ + +.. autofunction:: pytorch_lightning.metrics.functional.auc + :noindex: + +auroc (F) +^^^^^^^^^ + +.. autofunction:: pytorch_lightning.metrics.functional.auroc + :noindex: + +average_precision (F) +^^^^^^^^^^^^^^^^^^^^^ + +.. autofunction:: pytorch_lightning.metrics.functional.average_precision + :noindex: + +confusion_matrix (F) +^^^^^^^^^^^^^^^^^^^^ + +.. autofunction:: pytorch_lightning.metrics.functional.confusion_matrix + :noindex: + +dice_score (F) +^^^^^^^^^^^^^^ + +.. autofunction:: pytorch_lightning.metrics.functional.dice_score + :noindex: + +f1_score (F) +^^^^^^^^^^^^ + +.. autofunction:: pytorch_lightning.metrics.functional.f1_score + :noindex: + +fbeta_score (F) +^^^^^^^^^^^^^^^ + +.. autofunction:: pytorch_lightning.metrics.functional.fbeta_score + :noindex: + +multiclass_precision_recall_curve (F) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autofunction:: pytorch_lightning.metrics.functional.multiclass_precision_recall_curve + :noindex: + +multiclass_roc (F) +^^^^^^^^^^^^^^^^^^ + +.. autofunction:: pytorch_lightning.metrics.functional.multiclass_roc + :noindex: + +precision (F) +^^^^^^^^^^^^^ + +.. autofunction:: pytorch_lightning.metrics.functional.precision + :noindex: + +precision_recall (F) +^^^^^^^^^^^^^^^^^^^^ + +.. autofunction:: pytorch_lightning.metrics.functional.precision_recall + :noindex: + +precision_recall_curve (F) +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autofunction:: pytorch_lightning.metrics.functional.precision_recall_curve + :noindex: + +recall (F) +^^^^^^^^^^ + +.. autofunction:: pytorch_lightning.metrics.functional.recall + :noindex: + +roc (F) +^^^^^^^ + +.. autofunction:: pytorch_lightning.metrics.functional.roc + :noindex: + +stat_scores (F) +^^^^^^^^^^^^^^^ + +.. autofunction:: pytorch_lightning.metrics.functional.stat_scores + :noindex: + +stat_scores_multiple_classes (F) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autofunction:: pytorch_lightning.metrics.functional.stat_scores_multiple_classes + :noindex: + +---------------- + +Metric pre-processing +--------------------- +Metric + +to_categorical (F) +^^^^^^^^^^^^^^^^^^ + +.. autofunction:: pytorch_lightning.metrics.functional.to_categorical + :noindex: + +to_onehot (F) +^^^^^^^^^^^^^ + +.. autofunction:: pytorch_lightning.metrics.functional.to_onehot + :noindex: diff --git a/pytorch_lightning/metrics/__init__.py b/pytorch_lightning/metrics/__init__.py index 64ca41729d971..ac026c3a74375 100644 --- a/pytorch_lightning/metrics/__init__.py +++ b/pytorch_lightning/metrics/__init__.py @@ -1,30 +1,15 @@ -""" -Metrics -======= - -Metrics are generally used to monitor model performance. - -The following package aims to provide the most convenient ones as well -as a structure to implement your custom metrics for all the fancy research -you want to do. - -For native PyTorch implementations of metrics, it is recommended to use -the :class:`TensorMetric` which handles automated DDP syncing and conversions -to tensors for all inputs and outputs. - -If your metrics implementation works on numpy, just use the -:class:`NumpyMetric`, which handles the automated conversion of -inputs to and outputs from numpy as well as automated ddp syncing. - -.. warning:: Employing numpy in your metric calculation might slow - down your training substantially, since every metric computation - requires a GPU sync to convert tensors to numpy. - - -""" - from pytorch_lightning.metrics.converters import numpy_metric, tensor_metric from pytorch_lightning.metrics.metric import Metric, TensorMetric, NumpyMetric from pytorch_lightning.metrics.sklearn import ( - SklearnMetric, Accuracy, AveragePrecision, AUC, ConfusionMatrix, F1, FBeta, - Precision, Recall, PrecisionRecallCurve, ROC, AUROC) + SklearnMetric, + Accuracy, + AveragePrecision, + AUC, + ConfusionMatrix, + F1, + FBeta, + Precision, + Recall, + PrecisionRecallCurve, + ROC, + AUROC) diff --git a/pytorch_lightning/metrics/classification.py b/pytorch_lightning/metrics/classification.py index 3e02a8735b7a2..db4318ed88d5b 100644 --- a/pytorch_lightning/metrics/classification.py +++ b/pytorch_lightning/metrics/classification.py @@ -60,6 +60,14 @@ def __init__( reduce_group: the process group to reduce metric results from DDP reduce_op: the operation to perform for ddp reduction + Example: + + >>> pred = torch.tensor([0, 1, 2, 3]) + >>> target = torch.tensor([0, 1, 2, 2]) + >>> metric = Accuracy() + >>> metric(pred, target) + tensor(0.7500) + """ super().__init__(name='accuracy', reduce_group=reduce_group, @@ -100,6 +108,17 @@ def __init__( normalize: whether to compute a normalized confusion matrix reduce_group: the process group to reduce metric results from DDP reduce_op: the operation to perform for ddp reduction + + Example: + + >>> pred = torch.tensor([0, 1, 2, 2]) + >>> target = torch.tensor([0, 1, 2, 2]) + >>> metric = ConfusionMatrix() + >>> metric(pred, target) + tensor([[1., 0., 0.], + [0., 1., 0.], + [0., 0., 2.]]) + """ super().__init__(name='confusion_matrix', reduce_group=reduce_group, @@ -138,6 +157,19 @@ def __init__( reduce_group: the process group to reduce metric results from DDP reduce_op: the operation to perform for ddp reduction + Example: + + >>> pred = torch.tensor([0, 1, 2, 3]) + >>> target = torch.tensor([0, 1, 2, 2]) + >>> metric = PrecisionRecall() + >>> prec, recall, thr = metric(pred, target) + >>> prec + tensor([0.3333, 0.0000, 0.0000, 1.0000]) + >>> recall + tensor([1., 0., 0., 0.]) + >>> thr + tensor([1., 2., 3.]) + """ super().__init__(name='precision_recall_curve', reduce_group=reduce_group, @@ -192,11 +224,18 @@ def __init__( reduce_group: the process group to reduce metric results from DDP reduce_op: the operation to perform for ddp reduction + Example: + + >>> pred = torch.tensor([0, 1, 2, 3]) + >>> target = torch.tensor([0, 1, 2, 2]) + >>> metric = Precision() + >>> metric(pred, target) + tensor(1.) + """ super().__init__(name='precision', reduce_group=reduce_group, reduce_op=reduce_op) - self.num_classes = num_classes self.reduction = reduction @@ -239,6 +278,14 @@ def __init__( reduce_group: the process group to reduce metric results from DDP reduce_op: the operation to perform for ddp reduction + Example: + + >>> pred = torch.tensor([0, 1, 2, 3]) + >>> target = torch.tensor([0, 1, 2, 2]) + >>> metric = Recall() + >>> metric(pred, target) + tensor(0.8333) + """ super().__init__(name='recall', reduce_group=reduce_group, @@ -281,6 +328,14 @@ def __init__( reduce_group: the process group to reduce metric results from DDP reduce_op: the operation to perform for ddp reduction + Example: + + >>> pred = torch.tensor([0, 1, 2, 3]) + >>> target = torch.tensor([0, 1, 2, 2]) + >>> metric = AveragePrecision() + >>> metric(pred, target) + tensor(0.3333) + """ super().__init__(name='AP', reduce_group=reduce_group, @@ -327,6 +382,14 @@ def __init__( reduce_group: the process group to reduce metric results from DDP reduce_op: the operation to perform for ddp reduction + Example: + + >>> pred = torch.tensor([0, 1, 2, 3]) + >>> target = torch.tensor([0, 1, 2, 2]) + >>> metric = AUROC() + >>> metric(pred, target) + tensor(0.3333) + """ super().__init__(name='auroc', reduce_group=reduce_group, @@ -379,6 +442,14 @@ def __init__( reduce_group: the process group to reduce metric results from DDP reduce_op: the operation to perform for ddp reduction + Example: + + >>> pred = torch.tensor([0, 1, 2, 3]) + >>> target = torch.tensor([0, 1, 2, 2]) + >>> metric = FBeta(0.25) + >>> metric(pred, target) + tensor(0.9815) + """ super().__init__(name='fbeta', reduce_group=reduce_group, @@ -425,6 +496,14 @@ def __init__( reduce_group: the process group to reduce metric results from DDP reduce_op: the operation to perform for ddp reduction + Example: + + >>> pred = torch.tensor([0, 1, 2, 3]) + >>> target = torch.tensor([0, 1, 2, 2]) + >>> metric = F1() + >>> metric(pred, target) + tensor(0.8889) + """ super().__init__(name='f1', reduce_group=reduce_group, @@ -466,6 +545,19 @@ def __init__( reduce_group: the process group to reduce metric results from DDP reduce_op: the operation to perform for ddp reduction + Example: + + >>> pred = torch.tensor([0, 1, 2, 3]) + >>> target = torch.tensor([0, 1, 2, 2]) + >>> metric = ROC() + >>> fp, tp, thresholds = metric(pred, target) + >>> fp + tensor([0.0000, 0.3333, 0.6667, 0.6667, 1.0000]) + >>> tp + tensor([0., 0., 0., 1., 1.]) + >>> thresholds + tensor([4., 3., 2., 1., 0.]) + """ super().__init__(name='roc', reduce_group=reduce_group, @@ -519,6 +611,20 @@ def __init__( reduce_group: the process group to reduce metric results from DDP reduce_op: the operation to perform for ddp reduction + Example: + + >>> pred = torch.tensor([[0.85, 0.05, 0.05, 0.05], + ... [0.05, 0.85, 0.05, 0.05], + ... [0.05, 0.05, 0.85, 0.05], + ... [0.05, 0.05, 0.05, 0.85]]) + >>> target = torch.tensor([0, 1, 3, 2]) + >>> metric = MulticlassROC() + >>> classes_roc = metric(pred, target) + >>> metric(pred, target) # doctest: +NORMALIZE_WHITESPACE + ((tensor([0., 0., 1.]), tensor([0., 1., 1.]), tensor([1.8500, 0.8500, 0.0500])), + (tensor([0., 0., 1.]), tensor([0., 1., 1.]), tensor([1.8500, 0.8500, 0.0500])), + (tensor([0.0000, 0.3333, 1.0000]), tensor([0., 0., 1.]), tensor([1.8500, 0.8500, 0.0500])), + (tensor([0.0000, 0.3333, 1.0000]), tensor([0., 0., 1.]), tensor([1.8500, 0.8500, 0.0500]))) """ super().__init__(name='multiclass_roc', reduce_group=reduce_group, @@ -535,7 +641,7 @@ def forward( Actual metric computation Args: - pred: predicted labels + pred: predicted probability for each label target: groundtruth labels sample_weight: Weights for each sample defining the sample's impact on the score @@ -569,6 +675,21 @@ def __init__( reduce_group: the process group to reduce metric results from DDP reduce_op: the operation to perform for ddp reduction + Example: + + >>> pred = torch.tensor([[0.85, 0.05, 0.05, 0.05], + ... [0.05, 0.85, 0.05, 0.05], + ... [0.05, 0.05, 0.85, 0.05], + ... [0.05, 0.05, 0.05, 0.85]]) + >>> target = torch.tensor([0, 1, 3, 2]) + >>> metric = MulticlassPrecisionRecall() + >>> classes_pr = metric(pred, target) + >>> metric(pred, target) # doctest: +NORMALIZE_WHITESPACE + ((tensor([1., 1.]), tensor([1., 0.]), tensor([0.8500])), + (tensor([1., 1.]), tensor([1., 0.]), tensor([0.8500])), + (tensor([0.2500, 0.0000, 1.0000]), tensor([1., 0., 0.]), tensor([0.0500, 0.8500])), + (tensor([0.2500, 0.0000, 1.0000]), tensor([1., 0., 0.]), tensor([0.0500, 0.8500]))) + """ super().__init__(name='multiclass_precision_recall_curve', reduce_group=reduce_group, @@ -586,7 +707,7 @@ def forward( Actual metric computation Args: - pred: predicted labels + pred: predicted probability for each label target: groundtruth labels sample_weight: Weights for each sample defining the sample's impact on the score @@ -623,6 +744,20 @@ def __init__( - sum: add elements reduce_group: the process group to reduce metric results from DDP reduce_op: the operation to perform for ddp reduction + + Example: + + .. testcode: + + >>> pred = torch.tensor([[0.85, 0.05, 0.05, 0.05], + ... [0.05, 0.85, 0.05, 0.05], + ... [0.05, 0.05, 0.85, 0.05], + ... [0.05, 0.05, 0.05, 0.85]]) + >>> target = torch.tensor([0, 1, 3, 2]) + >>> metric = DiceCoefficient() + >>> classes_pr = metric(pred, target) + >>> metric(pred, target) + tensor(0.3333) """ super().__init__(name='dice', reduce_group=reduce_group, @@ -638,7 +773,7 @@ def forward(self, pred: torch.Tensor, target: torch.Tensor) -> torch.Tensor: Actual metric computation Args: - pred: predicted labels + pred: predicted probability for each label target: groundtruth labels Return: diff --git a/pytorch_lightning/metrics/functional/__init__.py b/pytorch_lightning/metrics/functional/__init__.py index e69de29bb2d1d..2c8b8a85a92c3 100644 --- a/pytorch_lightning/metrics/functional/__init__.py +++ b/pytorch_lightning/metrics/functional/__init__.py @@ -0,0 +1,21 @@ +from pytorch_lightning.metrics.functional.classification import ( + accuracy, + auc, + auroc, + average_precision, + confusion_matrix, + dice_score, + f1_score, + fbeta_score, + multiclass_precision_recall_curve, + multiclass_roc, + precision, + precision_recall, + precision_recall_curve, + recall, + roc, + stat_scores, + stat_scores_multiple_classes, + to_categorical, + to_onehot +) diff --git a/pytorch_lightning/metrics/functional/classification.py b/pytorch_lightning/metrics/functional/classification.py index 64d821fbb3f21..ddb774631304e 100644 --- a/pytorch_lightning/metrics/functional/classification.py +++ b/pytorch_lightning/metrics/functional/classification.py @@ -21,6 +21,15 @@ def to_onehot( Output: A sparse label tensor with shape [N, C, d1, d2, ...] + + Example: + + >>> x = torch.tensor([1, 2, 3]) + >>> to_onehot(x) + tensor([[0, 1, 0, 0], + [0, 0, 1, 0], + [0, 0, 0, 1]]) + """ if n_classes is None: n_classes = int(tensor.max().detach().item() + 1) @@ -41,6 +50,13 @@ def to_categorical(tensor: torch.Tensor, argmax_dim: int = 1) -> torch.Tensor: Return: A tensor with categorical labels [N, d2, ...] + + Example: + + >>> x = torch.tensor([[0.2, 0.5], [0.9, 0.1]]) + >>> to_categorical(x) + tensor([1, 0]) + """ return torch.argmax(tensor, dim=argmax_dim) @@ -65,7 +81,8 @@ def get_num_classes( if pred.ndim > target.ndim: num_classes = pred.size(1) else: - num_classes = int(target.max().detach().item() + 1) + num_target_classes = int(target.max().detach().item() + 1) + num_classes = num_target_classes return num_classes @@ -88,6 +105,18 @@ def stat_scores( Return: Tensors in the following order: True Positive, False Positive, True Negative, False Negative + Example: + + >>> x = torch.tensor([1, 2, 3]) + >>> y = torch.tensor([0, 2, 3]) + >>> tp, fp, tn, fn, sup = stat_scores(x, y, class_index=1) + >>> stat_scores(x, y, class_index=1) # doctest: +NORMALIZE_WHITESPACE + (tensor(0), + tensor(1), + tensor(2), + tensor(0), + tensor(0)) + """ if pred.ndim == target.ndim + 1: pred = to_categorical(pred, argmax_dim=argmax_dim) @@ -122,6 +151,17 @@ def stat_scores_multiple_classes( Return: Returns tensors for: tp, fp, tn, fn + Example: + + >>> x = torch.tensor([1, 2, 3]) + >>> y = torch.tensor([0, 2, 3]) + >>> tps, fps, tns, fns, sups = stat_scores_multiple_classes(x, y) + >>> stat_scores_multiple_classes(x, y) # doctest: +NORMALIZE_WHITESPACE + (tensor([0., 0., 1., 1.]), + tensor([0., 1., 0., 0.]), + tensor([2., 2., 2., 2.]), + tensor([1., 0., 0., 0.]), + tensor([1., 0., 1., 1.])) """ num_classes = get_num_classes(pred=pred, target=target, num_classes=num_classes) @@ -135,9 +175,7 @@ def stat_scores_multiple_classes( fns = torch.zeros((num_classes,), device=pred.device) sups = torch.zeros((num_classes,), device=pred.device) for c in range(num_classes): - tps[c], fps[c], tns[c], fns[c], sups[c] = stat_scores(pred=pred, - target=target, - class_index=c) + tps[c], fps[c], tns[c], fns[c], sups[c] = stat_scores(pred=pred, target=target, class_index=c) return tps, fps, tns, fns, sups @@ -164,6 +202,14 @@ def accuracy( Return: A Tensor with the classification score. + + Example: + + >>> x = torch.tensor([1, 2, 3]) + >>> y = torch.tensor([0, 2, 3]) + >>> accuracy(x, y) + tensor(0.6667) + """ tps, fps, tns, fns, sups = stat_scores_multiple_classes(pred=pred, target=target, num_classes=num_classes) @@ -193,6 +239,16 @@ def confusion_matrix( Return: Tensor, confusion matrix C [num_classes, num_classes ] + + Example: + + >>> x = torch.tensor([1, 2, 3]) + >>> y = torch.tensor([0, 2, 3]) + >>> confusion_matrix(x, y) + tensor([[0., 1., 0., 0.], + [0., 0., 0., 0.], + [0., 0., 1., 0.], + [0., 0., 0., 1.]]) """ num_classes = get_num_classes(pred, target, None) @@ -229,10 +285,16 @@ def precision_recall( Return: Tensor with precision and recall + + Example: + + >>> x = torch.tensor([0, 1, 2, 3]) + >>> y = torch.tensor([0, 1, 2, 2]) + >>> precision_recall(x, y) + (tensor(1.), tensor(0.8333)) + """ - tps, fps, tns, fns, sups = stat_scores_multiple_classes(pred=pred, - target=target, - num_classes=num_classes) + tps, fps, tns, fns, sups = stat_scores_multiple_classes(pred=pred, target=target, num_classes=num_classes) tps = tps.to(torch.float) fps = fps.to(torch.float) @@ -268,6 +330,14 @@ def precision( Return: Tensor with precision. + + Example: + + >>> x = torch.tensor([0, 1, 2, 3]) + >>> y = torch.tensor([0, 1, 2, 2]) + >>> precision(x, y) + tensor(1.) + """ return precision_recall(pred=pred, target=target, num_classes=num_classes, reduction=reduction)[0] @@ -295,6 +365,13 @@ def recall( Return: Tensor with recall. + + Example: + + >>> x = torch.tensor([0, 1, 2, 3]) + >>> y = torch.tensor([0, 1, 2, 2]) + >>> recall(x, y) + tensor(0.8333) """ return precision_recall(pred=pred, target=target, num_classes=num_classes, reduction=reduction)[1] @@ -329,6 +406,13 @@ def fbeta_score( Return: Tensor with the value of F-score. It is a value between 0-1. + + Example: + + >>> x = torch.tensor([0, 1, 2, 3]) + >>> y = torch.tensor([0, 1, 2, 2]) + >>> fbeta_score(x, y, 0.2) + tensor(0.9877) """ prec, rec = precision_recall(pred=pred, target=target, num_classes=num_classes, @@ -363,6 +447,13 @@ def f1_score( Return: Tensor containing F1-score + + Example: + + >>> x = torch.tensor([0, 1, 2, 3]) + >>> y = torch.tensor([0, 1, 2, 2]) + >>> f1_score(x, y) + tensor(0.8889) """ return fbeta_score(pred=pred, target=target, beta=1., num_classes=num_classes, reduction=reduction) @@ -431,6 +522,19 @@ def roc( Return: [Tensor, Tensor, Tensor]: false-positive rate (fpr), true-positive rate (tpr), thresholds + + Example: + + >>> x = torch.tensor([0, 1, 2, 3]) + >>> y = torch.tensor([0, 1, 2, 2]) + >>> fpr, tpr, thresholds = roc(x,y) + >>> fpr + tensor([0.0000, 0.3333, 0.6667, 0.6667, 1.0000]) + >>> tpr + tensor([0., 0., 0., 1., 1.]) + >>> thresholds + tensor([4, 3, 2, 1, 0]) + """ fps, tps, thresholds = _binary_clf_curve(pred=pred, target=target, sample_weight=sample_weight, @@ -473,6 +577,19 @@ def multiclass_roc( Return: [num_classes, Tensor, Tensor, Tensor]: returns roc for each class. number of classes, false-positive rate (fpr), true-positive rate (tpr), thresholds + + Example: + + >>> pred = torch.tensor([[0.85, 0.05, 0.05, 0.05], + ... [0.05, 0.85, 0.05, 0.05], + ... [0.05, 0.05, 0.85, 0.05], + ... [0.05, 0.05, 0.05, 0.85]]) + >>> target = torch.tensor([0, 1, 3, 2]) + >>> multiclass_roc(pred, target) # doctest: +NORMALIZE_WHITESPACE + ((tensor([0., 0., 1.]), tensor([0., 1., 1.]), tensor([1.8500, 0.8500, 0.0500])), + (tensor([0., 0., 1.]), tensor([0., 1., 1.]), tensor([1.8500, 0.8500, 0.0500])), + (tensor([0.0000, 0.3333, 1.0000]), tensor([0., 0., 1.]), tensor([1.8500, 0.8500, 0.0500])), + (tensor([0.0000, 0.3333, 1.0000]), tensor([0., 0., 1.]), tensor([1.8500, 0.8500, 0.0500]))) """ num_classes = get_num_classes(pred, target, num_classes) @@ -503,6 +620,19 @@ def precision_recall_curve( Return: [Tensor, Tensor, Tensor]: precision, recall, thresholds + + Example: + + >>> pred = torch.tensor([0, 1, 2, 3]) + >>> target = torch.tensor([0, 1, 2, 2]) + >>> precision, recall, thresholds = precision_recall_curve(pred, target) + >>> precision + tensor([0.3333, 0.0000, 0.0000, 1.0000]) + >>> recall + tensor([1., 0., 0., 0.]) + >>> thresholds + tensor([1, 2, 3]) + """ fps, tps, thresholds = _binary_clf_curve(pred=pred, target=target, sample_weight=sample_weight, @@ -547,7 +677,24 @@ def multiclass_precision_recall_curve( num_classes: number of classes Return: - [num_classes, Tensor, Tensor, Tensor]: number of classes, precision, recall, thresholds + [num_classes, Tensor, Tensor, Tensor]: number of classes, precision, recall, thresholds + + Example: + + >>> pred = torch.tensor([[0.85, 0.05, 0.05, 0.05], + ... [0.05, 0.85, 0.05, 0.05], + ... [0.05, 0.05, 0.85, 0.05], + ... [0.05, 0.05, 0.05, 0.85]]) + >>> target = torch.tensor([0, 1, 3, 2]) + >>> nb_classes, precision, recall, thresholds = multiclass_precision_recall_curve(pred, target) + >>> nb_classes + (tensor([1., 1.]), tensor([1., 0.]), tensor([0.8500])) + >>> precision + (tensor([1., 1.]), tensor([1., 0.]), tensor([0.8500])) + >>> recall + (tensor([0.2500, 0.0000, 1.0000]), tensor([1., 0., 0.]), tensor([0.0500, 0.8500])) + >>> thresholds # doctest: +NORMALIZE_WHITESPACE + (tensor([0.2500, 0.0000, 1.0000]), tensor([1., 0., 0.]), tensor([0.0500, 0.8500])) """ num_classes = get_num_classes(pred, target, num_classes) @@ -574,6 +721,13 @@ def auc(x: torch.Tensor, y: torch.Tensor, reorder: bool = True): Return: AUC score (float) + + Example: + + >>> x = torch.tensor([0, 1, 2, 3]) + >>> y = torch.tensor([0, 1, 2, 2]) + >>> auc(x, y) + tensor(4.) """ direction = 1. @@ -635,6 +789,13 @@ def auroc( target: ground-truth labels sample_weight: sample weights pos_label: the label for the positive class (default: 1.) + + Example: + + >>> x = torch.tensor([0, 1, 2, 3]) + >>> y = torch.tensor([0, 1, 2, 2]) + >>> auroc(x, y) + tensor(0.3333) """ @auc_decorator(reorder=True) @@ -650,6 +811,21 @@ def average_precision( sample_weight: Optional[Sequence] = None, pos_label: int = 1., ) -> torch.Tensor: + """ + + Args: + pred: estimated probabilities + target: ground-truth labels + sample_weight: sample weights + pos_label: the label for the positive class (default: 1.) + + Example: + + >>> x = torch.tensor([0, 1, 2, 3]) + >>> y = torch.tensor([0, 1, 2, 2]) + >>> average_precision(x, y) + tensor(0.3333) + """ precision, recall, _ = precision_recall_curve(pred=pred, target=target, sample_weight=sample_weight, pos_label=pos_label) @@ -667,6 +843,26 @@ def dice_score( no_fg_score: float = 0.0, reduction: str = 'elementwise_mean', ) -> torch.Tensor: + """ + Args: + pred: estimated probabilities + target: ground-truth labels + bg: + nan_score: + no_fg_score: + reduction: + + Example: + + >>> pred = torch.tensor([[0.85, 0.05, 0.05, 0.05], + ... [0.05, 0.85, 0.05, 0.05], + ... [0.05, 0.05, 0.85, 0.05], + ... [0.05, 0.05, 0.05, 0.85]]) + >>> target = torch.tensor([0, 1, 3, 2]) + >>> average_precision(pred, target) + tensor(0.2500) + + """ n_classes = pred.shape[1] bg = (1 - int(bool(bg))) scores = torch.zeros(n_classes - bg, device=pred.device, dtype=torch.float32) diff --git a/tests/metrics/functional/test_classification.py b/tests/metrics/functional/test_classification.py index e4e6a5112ee69..e9bf9be1b5006 100644 --- a/tests/metrics/functional/test_classification.py +++ b/tests/metrics/functional/test_classification.py @@ -351,5 +351,6 @@ def test_dice_score(pred, target, expected): score = dice_score(torch.tensor(pred), torch.tensor(target)) assert score == expected + # example data taken from # https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/metrics/tests/test_ranking.py diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index e3701a1f9418f..fbb8035647a7e 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -27,7 +27,7 @@ def test_no_val_module(monkeypatch, tmpdir, tmpdir_server, url_ckpt): """Tests use case where trainer saves the model, and user loads it from tags independently.""" # set $TORCH_HOME, which determines torch hub's cache path, to tmpdir - monkeypatch.setenv('TORCH_HOME', tmpdir) + monkeypatch.setenv('TORCH_HOME', str(tmpdir)) model = EvalModelTemplate()