diff --git a/README.md b/README.md index 3e6b705..71a8aa0 100644 --- a/README.md +++ b/README.md @@ -85,9 +85,9 @@ The library implemented many "reasons" for doubt. - `WrongPredictionReason`: assign doubt when a model cannot predict the listed label - `ShortConfidenceReason`: assign doubt when the correct label gains too little confidence - `LongConfidenceReason`: assign doubt when a wrong label gains too much confidence -- `MarginConfidenceReason`: assign doubt when there's a large difference between the top two classes - `DisagreeReason`: assign doubt when two models disagree on a prediction - `CleanlabReason`: assign doubt according to [cleanlab](https://github.com/cleanlab/cleanlab) +- `MarginConfidenceReason`: assign doubt when there's a small difference between the top two class confidences ### Regression Reasons diff --git a/docs/quickstart/index.md b/docs/quickstart/index.md index e8aea7b..e40eb41 100644 --- a/docs/quickstart/index.md +++ b/docs/quickstart/index.md @@ -162,7 +162,7 @@ of reasons that this library supports. - `WrongPredictionReason`: assign doubt when a model cannot predict the listed label - `ShortConfidenceReason`: assign doubt when the correct label gains too little confidence - `LongConfidenceReason`: assign doubt when a wrong label gains too much confidence -- `MarginConfidenceReason`: assign doubt when there's a large difference between the top two classes +- `MarginConfidenceReason`: assign doubt when there's a small difference between the top two class confidences - `DisagreeReason`: assign doubt when two models disagree on a prediction - `CleanlabReason`: assign doubt according to [cleanlab](https://github.com/cleanlab/cleanlab) diff --git a/doubtlab/reason.py b/doubtlab/reason.py index 3f9ab80..ba16ef9 100644 --- a/doubtlab/reason.py +++ b/doubtlab/reason.py @@ -93,12 +93,13 @@ def __call__(self, X, y=None): class ShannonEntropyReason: """ Assign doubt when the normalized Shannon entropy is too high, see - https://math.stackexchange.com/questions/395121/how-entropy-scales-with-sample-size + [here](https://math.stackexchange.com/questions/395121/how-entropy-scales-with-sample-size) for a discussion. Arguments: model: scikit-learn classifier threshold: confidence threshold for doubt assignment + smoothing: constant value added to probas to prevent division by zeor Usage: @@ -119,21 +120,36 @@ class ShannonEntropyReason: ``` """ - def __init__(self, model, threshold=0.5): + def __init__(self, model, threshold=0.5, smoothing=1e-5): self.model = model self.threshold = threshold + self.smoothing = smoothing def __call__(self, X, y): probas = self.model.predict_proba(X) - log_probas = self.model.predict_log_proba(X) / np.log(len(self.model.classes_)) - entropies = -(probas * log_probas).sum(axis=1) - return np.where(entropies > self.threshold, entropies, 0) + return self.from_proba( + probas, threshold=self.threshold, smoothing=self.smoothing + ) @staticmethod - def from_proba(proba, n_classes, threshold=0.5): - """Outputs a reason array from a prediction array, skipping the need for a model.""" - entropies = -(proba * np.log(proba) / np.log(n_classes)).sum(axis=1) - return np.where(entropies > threshold, entropies, 0) + def from_proba(proba, threshold=0.5, smoothing=1e-5): + """ + Outputs a reason array from a prediction array, skipping the need for a model. + + Usage: + + ```python + import numpy as np + from doubtlab.reason import ShannonEntropyReason + + probas = np.array([[0.9, 0.1, 0.0], [0.5, 0.4, 0.1]]) + predicate = ShannonEntropyReason.from_proba(probas, threshold=0.8) + assert np.all(predicate == np.array([0.0, 1.0])) + ``` + """ + probas = proba + smoothing + entropies = -(probas * np.log(probas) / np.log(probas.shape[1])).sum(axis=1) + return (entropies > threshold).astype(np.float16) class WrongPredictionReason: @@ -170,7 +186,7 @@ def __call__(self, X, y): return self.from_predict(preds, y) @staticmethod - def from_predict(preds, y): + def from_predict(pred, y): """ Outputs a reason array from a prediction array, skipping the need for a model. @@ -186,7 +202,7 @@ def from_predict(preds, y): assert np.all(predicate == np.array([0.0, 1.0])) ``` """ - return (preds != y).astype(np.float16) + return (pred != y).astype(np.float16) class LongConfidenceReason: @@ -221,7 +237,7 @@ def __init__(self, model, threshold=0.2): self.threshold = threshold @staticmethod - def from_probas(probas, y, classes, threshold): + def from_proba(proba, y, classes, threshold): """ Outputs a reason array from a proba array, skipping the need for a model. @@ -231,16 +247,16 @@ def from_probas(probas, y, classes, threshold): import numpy as np from doubtlab.reason import LongConfidenceReason - probas = np.array([[0.9, 0.1], [0.5, 0.5]]) - y = np.array([1, 0]) + probas = np.array([[0.9, 0.1], [0.5, 0.5], [0.2, 0.8]]) + y = np.array([0, 1, 0]) classes = np.array([0, 1]) threshold = 0.4 - predicate = LongConfidenceReason.from_probas(preds, y, classes, threshold) - assert np.all(predicate == np.array([0.0, 1.0])) + predicate = LongConfidenceReason.from_proba(probas, y, classes, threshold) + assert np.all(predicate == np.array([0.0, 1.0, 1.0])) ``` """ values = [] - for i, proba in enumerate(probas): + for i, proba in enumerate(proba): proba_dict = {classes[j]: v for j, v in enumerate(proba) if j != y[i]} values.append(max(proba_dict.values())) confidences = np.array(values) @@ -248,7 +264,7 @@ def from_probas(probas, y, classes, threshold): def __call__(self, X, y): probas = self.model.predict_proba(X) - return self.from_probas(probas, y, self.model.classes_, self.threshold) + return self.from_proba(probas, y, self.model.classes_, self.threshold) class MarginConfidenceReason: @@ -285,7 +301,7 @@ def __init__(self, model, threshold=0.2): self.threshold = threshold @staticmethod - def from_probas(probas, threshold=0.2): + def from_proba(proba, threshold=0.2): """ Outputs a reason array from a proba array, skipping the need for a model. @@ -296,17 +312,17 @@ def from_probas(probas, threshold=0.2): from doubtlab.reason import MarginConfidenceReason probas = np.array([[0.9, 0.1, 0.0], [0.5, 0.4, 0.1]]) - predicate = MarginConfidenceReason.from_probas(probas, threshold=0.3) + predicate = MarginConfidenceReason.from_proba(probas, threshold=0.3) assert np.all(predicate == np.array([0.0, 1.0])) ``` """ - sorted = np.sort(probas, axis=1) + sorted = np.sort(proba, axis=1) margin = sorted[:, -1] - sorted[:, -2] return (margin < threshold).astype(np.float16) def __call__(self, X, y): probas = self.model.predict_proba(X) - return self.from_probas(probas, self.threshold) + return self.from_proba(probas, self.threshold) class ShortConfidenceReason: @@ -341,7 +357,7 @@ def __init__(self, model, threshold=0.2): self.threshold = threshold @staticmethod - def from_probas(probas, y, classes, threshold=0.2): + def from_proba(proba, y, classes, threshold=0.2): """ Outputs a reason array from a proba array, skipping the need for a model. @@ -351,15 +367,16 @@ def from_probas(probas, y, classes, threshold=0.2): import numpy as np from doubtlab.reason import ShortConfidenceReason - probas = np.array([[0.9, 0.1], [0.5, 0.5]]) - y = np.array([0, 1]) + probas = np.array([[0.9, 0.1], [0.5, 0.5], [0.3, 0.7]]) + y = np.array([0, 1, 0]) classes = np.array([0, 1]) - threshold = 0.6 - predicate = ShortConfidenceReason.from_probas(probas, y, classes, threshold) - assert np.all(predicate == np.array([0.0, 1.0])) + threshold = 0.4 + predicate = ShortConfidenceReason.from_proba(probas, y, classes, threshold) + assert np.all(predicate == np.array([0.0, 0.0, 1.0])) + ``` """ values = [] - for i, p in enumerate(probas): + for i, p in enumerate(proba): proba_dict = {classes[j]: v for j, v in enumerate(p)} values.append(proba_dict[y[i]]) confidences = np.array(values) @@ -367,7 +384,7 @@ def from_probas(probas, y, classes, threshold=0.2): def __call__(self, X, y): probas = self.model.predict_proba(X) - return self.from_probas(probas, y, self.model.classes_, self.threshold) + return self.from_proba(probas, y, self.model.classes_, self.threshold) class DisagreeReason: @@ -405,13 +422,14 @@ def __init__(self, model1, model2): self.model2 = model2 @staticmethod - def from_pred(preds1, preds2): + def from_pred(pred1, pred2): """ Outputs a reason array from two pred arrays, skipping the need for a model. Usage: ```python + import numpy as np from doubtlab.reason import DisagreeReason pred1 = [0, 1, 2] @@ -420,7 +438,7 @@ def from_pred(preds1, preds2): assert np.all(predicate == np.array([0.0, 0.0, 1.0])) ``` """ - return (np.array(preds1) != np.array(preds2)).astype(np.float16) + return (np.array(pred1) != np.array(pred2)).astype(np.float16) def __call__(self, X, y): pred1 = self.model1.predict(X) @@ -568,7 +586,7 @@ def __init__(self, model, sorted_index_method="normalized_margin", min_doubt=0.5 self.min_doubt = min_doubt @staticmethod - def from_probas(probas, y, min_doubt=0.5, sorted_index_method="normalized_margin"): + def from_proba(proba, y, min_doubt=0.5, sorted_index_method="normalized_margin"): """ Outputs a reason array from a proba array, skipping the need for a model. @@ -580,12 +598,10 @@ def from_probas(probas, y, min_doubt=0.5, sorted_index_method="normalized_margin probas = np.array([[0.9, 0.1], [0.5, 0.5]]) y = np.array([0, 1]) - classes = np.array([0, 1]) - threshold = 0.4 - predicate = CleanlabReason.from_probas(probas, y, classes, threshold) + predicate = CleanlabReason.from_proba(probas, y) ``` """ - ordered_label_errors = get_noise_indices(y, probas, sorted_index_method) + ordered_label_errors = get_noise_indices(y, proba, sorted_index_method) result = np.zeros_like(y) conf_arr = np.linspace(1, min_doubt, result.shape[0]) for idx, _ in zip(ordered_label_errors, conf_arr): @@ -594,4 +610,4 @@ def from_probas(probas, y, min_doubt=0.5, sorted_index_method="normalized_margin def __call__(self, X, y): probas = self.model.predict_proba(X) - return self.from_probas(probas, y, self.min_doubt, self.sorted_index_method) + return self.from_proba(probas, y, self.min_doubt, self.sorted_index_method) diff --git a/setup.py b/setup.py index a3df21a..00915e1 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ setup( name="doubtlab", - version="0.1.3", + version="0.1.4", author="Vincent D. Warmerdam", packages=find_packages(exclude=["notebooks", "docs"]), description="Don't Blindly Trust Your Labels", diff --git a/tests/test_docs.py b/tests/test_docs.py index 11d9221..d630359 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -1,5 +1,5 @@ import pytest -from mktestdocs import check_docstring, check_md_file +from mktestdocs import check_docstring, check_md_file, get_codeblock_members from doubtlab.reason import ( ProbaReason, @@ -13,10 +13,11 @@ AbsoluteDifferenceReason, RelativeDifferenceReason, CleanlabReason, + ShannonEntropyReason, ) from doubtlab.ensemble import DoubtEnsemble -all_reasons = [ +all_objects = [ ProbaReason, RandomReason, OutlierReason, @@ -28,14 +29,24 @@ AbsoluteDifferenceReason, RelativeDifferenceReason, CleanlabReason, + ShannonEntropyReason, + DoubtEnsemble, ] -@pytest.mark.parametrize( - "func", all_reasons + [DoubtEnsemble], ids=lambda d: d.__name__ -) +def flatten(items): + """Flattens a list""" + return [item for sublist in items for item in sublist] + + +# This way we ensure that each item in `all_members` points to a method +# that could have a docstring. +all_members = flatten([get_codeblock_members(o) for o in all_objects]) + + +@pytest.mark.parametrize("func", all_members, ids=lambda d: d.__qualname__) def test_function_docstrings(func): - """Test the docstring code of some functions.""" + """Test the python example in each method in each object.""" check_docstring(obj=func) diff --git a/tests/test_reason/test_cleanlab.py b/tests/test_reason/test_cleanlab.py index c04216f..32d7684 100644 --- a/tests/test_reason/test_cleanlab.py +++ b/tests/test_reason/test_cleanlab.py @@ -6,7 +6,5 @@ def test_longconf_proba(): """Test from_probas on a obvious example.""" probas = np.array([[0.9, 0.1], [0.5, 0.5]]) y = np.array([0, 1]) - classes = np.array([0, 1]) - threshold = 0.4 - predicate = CleanlabReason.from_probas(probas, y, classes, threshold) + predicate = CleanlabReason.from_proba(proba=probas, y=y) assert predicate.dtype == np.float16 diff --git a/tests/test_reason/test_disagree.py b/tests/test_reason/test_disagree.py index 0017f64..98d4f67 100644 --- a/tests/test_reason/test_disagree.py +++ b/tests/test_reason/test_disagree.py @@ -8,5 +8,5 @@ def test_short_conf_probas(): """ pred1 = [0, 1, 2] pred2 = [0, 1, 1] - predicate = DisagreeReason.from_pred(pred1, pred2) + predicate = DisagreeReason.from_pred(pred1=pred1, pred2=pred2) assert np.all(predicate == np.array([0.0, 0.0, 1.0])) diff --git a/tests/test_reason/test_entropy.py b/tests/test_reason/test_entropy.py new file mode 100644 index 0000000..5a03dc1 --- /dev/null +++ b/tests/test_reason/test_entropy.py @@ -0,0 +1,11 @@ +import numpy as np +from doubtlab.reason import ShannonEntropyReason + + +def test_short_conf_probas(): + """ + Test `from_proba` on an obvious example. + """ + probas = np.array([[0.9, 0.1, 0.0], [0.5, 0.4, 0.1]]) + predicate = ShannonEntropyReason.from_proba(probas, threshold=0.8) + assert np.all(predicate == np.array([0.0, 1.0])) diff --git a/tests/test_reason/test_longconfreason.py b/tests/test_reason/test_longconfreason.py index e7e4c3c..011981e 100644 --- a/tests/test_reason/test_longconfreason.py +++ b/tests/test_reason/test_longconfreason.py @@ -8,5 +8,7 @@ def test_longconf_proba(): y = np.array([0, 1]) classes = np.array([0, 1]) threshold = 0.4 - predicate = LongConfidenceReason.from_probas(probas, y, classes, threshold) + predicate = LongConfidenceReason.from_proba( + proba=probas, y=y, classes=classes, threshold=threshold + ) assert np.all(predicate == np.array([0.0, 1.0])) diff --git a/tests/test_reason/test_margin.py b/tests/test_reason/test_margin.py index 59a321d..db6affa 100644 --- a/tests/test_reason/test_margin.py +++ b/tests/test_reason/test_margin.py @@ -12,12 +12,12 @@ def test_margin_confidence_margin(): model.fit(X, y) probas = np.eye(3) - reason = MarginConfidenceReason.from_probas(probas) + reason = MarginConfidenceReason.from_proba(proba=probas) assert all([r == 0.0 for r in reason]) def test_margin_simple_example(): """Test on a obvious example.""" probas = np.array([[0.9, 0.1, 0.0], [0.5, 0.4, 0.1]]) - predicate = MarginConfidenceReason.from_probas(probas, threshold=0.3) + predicate = MarginConfidenceReason.from_proba(proba=probas, threshold=0.3) assert np.all(predicate == np.array([0.0, 1.0])) diff --git a/tests/test_reason/test_probareason.py b/tests/test_reason/test_probareason.py index edb1bc8..c657a03 100644 --- a/tests/test_reason/test_probareason.py +++ b/tests/test_reason/test_probareason.py @@ -5,12 +5,12 @@ def test_from_proba(): """Ensure internal `from_proba` method handles obvious example""" probas = np.array([[0.9, 0.1], [0.5, 0.5]]) - predicate = ProbaReason.from_proba(probas, max_proba=0.5) + predicate = ProbaReason.from_proba(proba=probas, max_proba=0.5) assert np.all(predicate == np.array([0.0, 1.0])) def test_from_proba_max_proba(): """Ensure internal `from_proba` method handles another obvious example""" probas = np.array([[0.9, 0.1], [0.5, 0.5]]) - predicate = ProbaReason.from_proba(probas, max_proba=0.3) + predicate = ProbaReason.from_proba(proba=probas, max_proba=0.3) assert np.all(predicate == np.array([0.0, 0.0])) diff --git a/tests/test_reason/test_shortconfreason.py b/tests/test_reason/test_shortconfreason.py index 8352d3f..961081c 100644 --- a/tests/test_reason/test_shortconfreason.py +++ b/tests/test_reason/test_shortconfreason.py @@ -10,5 +10,7 @@ def test_short_conf_probas(): y = np.array([0, 1]) classes = np.array([0, 1]) threshold = 0.6 - predicate = ShortConfidenceReason.from_probas(probas, y, classes, threshold) + predicate = ShortConfidenceReason.from_proba( + proba=probas, y=y, classes=classes, threshold=threshold + ) assert np.all(predicate == np.array([0.0, 1.0])) diff --git a/tests/test_reason/test_wrongpred.py b/tests/test_reason/test_wrongpred.py index b67cca0..66f2478 100644 --- a/tests/test_reason/test_wrongpred.py +++ b/tests/test_reason/test_wrongpred.py @@ -6,5 +6,5 @@ def test_from_predict(): """Test `from_predict` on an obvious example""" preds = np.array(["positive", "negative"]) y = np.array(["positive", "neutral"]) - predicate = WrongPredictionReason.from_predict(preds, y) + predicate = WrongPredictionReason.from_predict(pred=preds, y=y) assert np.all(predicate == np.array([0.0, 1.0]))