From c26379db5a1acce755fcabc604052e9f47dc81d5 Mon Sep 17 00:00:00 2001
From: lilacheden <lilach.edel@gmail.com>
Date: Fri, 8 Mar 2024 16:21:32 +0200
Subject: [PATCH 1/9] add processor predictions_yes_1_else_0

Signed-off-by: lilacheden <lilach.edel@gmail.com>
---
 prepare/processors/processors.py                 | 11 +++++++++++
 .../processors/predictions_yes_1_else_0.json     | 10 ++++++++++
 src/unitxt/processors.py                         |  7 +++++++
 tests/library/test_postprocessors.py             | 16 ++++++++++++++++
 4 files changed, 44 insertions(+)
 create mode 100644 src/unitxt/catalog/processors/predictions_yes_1_else_0.json

diff --git a/prepare/processors/processors.py b/prepare/processors/processors.py
index baec959ad..643d2fd41 100644
--- a/prepare/processors/processors.py
+++ b/prepare/processors/processors.py
@@ -16,6 +16,7 @@
     TakeFirstWord,
     ToYesOrNone,
     YesNoToInt,
+    YesToOneElseZero,
 )
 
 logger = get_logger()
@@ -160,6 +161,16 @@
     overwrite=True,
 )
 
+add_to_catalog(
+    SequentialOperator(
+        steps=[
+            YesToOneElseZero(field="prediction", process_every_value=False),
+        ]
+    ),
+    "processors.predictions_yes_1_else_0",
+    overwrite=True,
+)
+
 add_to_catalog(
     SequentialOperator(
         steps=[
diff --git a/src/unitxt/catalog/processors/predictions_yes_1_else_0.json b/src/unitxt/catalog/processors/predictions_yes_1_else_0.json
new file mode 100644
index 000000000..0eef86eef
--- /dev/null
+++ b/src/unitxt/catalog/processors/predictions_yes_1_else_0.json
@@ -0,0 +1,10 @@
+{
+    "type": "sequential_operator",
+    "steps": [
+        {
+            "type": "yes_to_one_else_zero",
+            "field": "prediction",
+            "process_every_value": false
+        }
+    ]
+}
diff --git a/src/unitxt/processors.py b/src/unitxt/processors.py
index a45ab83f1..ddcd4a2e6 100644
--- a/src/unitxt/processors.py
+++ b/src/unitxt/processors.py
@@ -152,6 +152,13 @@ def process_value(self, text: Any) -> Any:
         return text
 
 
+class YesToOneElseZero(FieldOperator):
+    def process_value(self, text: Any) -> Any:
+        if text == "yes":
+            return "1"
+        return "0"
+
+
 class StrToFloatFormat(FieldOperator):
     def process_value(self, text: Any) -> Any:
         try:
diff --git a/tests/library/test_postprocessors.py b/tests/library/test_postprocessors.py
index 7966467ab..26da90e6c 100644
--- a/tests/library/test_postprocessors.py
+++ b/tests/library/test_postprocessors.py
@@ -170,6 +170,22 @@ def test_to_yes_or_none(self):
             tester=self,
         )
 
+    def test_predictions_yes_1_else_0(self):
+        parser, _ = fetch_artifact("processors.predictions_yes_1_else_0")
+        inputs = ["yes", "no", "yaa"]
+        targets = [
+            {"references": ["yes"], "prediction": "1"},
+            {"references": ["no"], "prediction": "0"},
+            {"references": ["yaa"], "prediction": "0"},
+        ]
+
+        check_operator(
+            operator=parser,
+            inputs=list_to_stream_with_prediction_and_references(inputs),
+            targets=targets,
+            tester=self,
+        )
+
     def test_str_to_float_format(self):
         parser, _ = fetch_artifact("processors.str_to_float_format")
         inputs = ["-2.4", "5", "5a"]

From fd2c24becd43923a2b2e4a538a52c7b135770d0a Mon Sep 17 00:00:00 2001
From: lilacheden <lilach.edel@gmail.com>
Date: Tue, 12 Mar 2024 15:30:21 +0200
Subject: [PATCH 2/9] binary metrics accept real values predictions (with
 threshold 0.5)

Signed-off-by: lilacheden <lilach.edel@gmail.com>
---
 src/unitxt/metrics.py         | 34 +++++++++++++---------------------
 tests/library/test_metrics.py | 23 ++---------------------
 2 files changed, 15 insertions(+), 42 deletions(-)

diff --git a/src/unitxt/metrics.py b/src/unitxt/metrics.py
index 631426e20..2d3234643 100644
--- a/src/unitxt/metrics.py
+++ b/src/unitxt/metrics.py
@@ -1098,11 +1098,6 @@ def get_str_id(self, str):
             self.id_to_str[id] = str
         return self.str_to_id[str]
 
-    def _labels_match_average_format(
-        self, references: List[List[str]], predictions: List[str]
-    ):
-        return True
-
     def compute(
         self,
         references: List[List[str]],
@@ -1112,8 +1107,6 @@ def compute(
         assert all(
             len(reference) == 1 for reference in references
         ), "Only a single reference per prediction is allowed in F1 metric"
-        if not self._labels_match_average_format(references, predictions):
-            return {self.main_score: np.nan}
 
         self.str_to_id = {}
         self.id_to_str = {}
@@ -1155,21 +1148,20 @@ class F1Binary(F1):
     pos_classes = {"1", "1.0", "yes", "true"}
 
     def get_str_id(self, str):
-        if str.lower() in self.pos_classes:
-            return 1
-        return 0
+        return int(str)
 
-    # References and predictions must include up to 2 unique values, one of them in pos_classes
-    def _labels_match_average_format(
-        self, references: List[List[str]], predictions: List[str]
-    ):
-        classes = set(predictions + list(itertools.chain(*references)))
-        n_classes = len(classes)
-        if n_classes > 2:
-            return False
-        if n_classes == 2 and len(set(classes).difference(self.pos_classes)) == 0:
-            return False
-        return True
+    def compute(
+        self,
+        references: List[List[str]],
+        predictions: List[str],
+        task_data: List[Dict],
+    ) -> dict:
+        predictions_floats = [to_float_or_default(p) for p in predictions]
+        predictions = [str(int(p > 0.5)) for p in predictions_floats]
+        references = [
+            ["1"] if r[0].lower() in self.pos_classes else "0" for r in references
+        ]
+        return super().compute(references, predictions, task_data)
 
 
 class RecallBinary(F1Binary):
diff --git a/tests/library/test_metrics.py b/tests/library/test_metrics.py
index 1ff4352a6..282515ae7 100644
--- a/tests/library/test_metrics.py
+++ b/tests/library/test_metrics.py
@@ -169,8 +169,8 @@ def test_f1_micro(self):
 
     def test_f1_binary(self):
         metric = F1Binary()
-        references = [["1"], ["0"], ["0"], ["0"], ["1"], ["1"]]
-        predictions = ["1", "1", "0", "0", "1", "1"]
+        references = [["1"], ["0"], ["0"], ["0"], ["Yes"], ["1"]]
+        predictions = ["0.8", "1", "0.2", "0", "0.6", "1"]
 
         global_target = 0.8571428571428
         outputs = apply_metric(
@@ -213,25 +213,6 @@ def test_recall_binary(self):
         self.assertEqual("recall_binary", outputs[0]["score"]["global"]["score_name"])
         self.assertEqual("recall_binary", outputs[0]["score"]["instance"]["score_name"])
 
-    def test_f1_binary_non_binary(self):
-        metric = F1Binary()
-        references = [["1"], ["0"], ["yes"], ["0"], ["1"], ["1"]]
-        predictions = ["1", "1", "0", "0", "1", "1"]
-
-        outputs = apply_metric(
-            metric=metric, predictions=predictions, references=references
-        )
-        self.assertTrue(isnan(outputs[0]["score"]["global"]["score"]))
-
-        metric = F1Binary()
-        references = [["1"], ["yes"], ["1"], ["1"]]
-        predictions = ["1", "1", "1", "1"]
-
-        outputs = apply_metric(
-            metric=metric, predictions=predictions, references=references
-        )
-        self.assertTrue(isnan(outputs[0]["score"]["global"]["score"]))
-
     def test_max_f1(self):
         metric = BinaryMaxF1()
         references = [["1"], ["0"], ["0"]]

From 0e739be065ac275b45875fd5a890f00b4bc6bfa7 Mon Sep 17 00:00:00 2001
From: lilacheden <lilach.edel@gmail.com>
Date: Tue, 12 Mar 2024 15:41:58 +0200
Subject: [PATCH 3/9] add []

Signed-off-by: lilacheden <lilach.edel@gmail.com>
---
 src/unitxt/metrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/unitxt/metrics.py b/src/unitxt/metrics.py
index 2d3234643..0cfa48fbb 100644
--- a/src/unitxt/metrics.py
+++ b/src/unitxt/metrics.py
@@ -1159,7 +1159,7 @@ def compute(
         predictions_floats = [to_float_or_default(p) for p in predictions]
         predictions = [str(int(p > 0.5)) for p in predictions_floats]
         references = [
-            ["1"] if r[0].lower() in self.pos_classes else "0" for r in references
+            ["1"] if r[0].lower() in self.pos_classes else ["0"] for r in references
         ]
         return super().compute(references, predictions, task_data)
 

From f0917a2c316592560ca2bfadb5b75dd4ba0e3ff9 Mon Sep 17 00:00:00 2001
From: lilacheden <lilach.edel@gmail.com>
Date: Tue, 12 Mar 2024 17:59:16 +0200
Subject: [PATCH 4/9] update BinaryMaxF1

Signed-off-by: lilacheden <lilach.edel@gmail.com>
---
 src/unitxt/metrics.py | 22 ++--------------------
 1 file changed, 2 insertions(+), 20 deletions(-)

diff --git a/src/unitxt/metrics.py b/src/unitxt/metrics.py
index 0cfa48fbb..18dfc3ee6 100644
--- a/src/unitxt/metrics.py
+++ b/src/unitxt/metrics.py
@@ -1,4 +1,3 @@
-import itertools
 import re
 import string
 import uuid
@@ -3091,31 +3090,14 @@ def compute(
         assert all(
             len(reference) == 1 for reference in references
         ), "Only a single reference per prediction is allowed in F1 metric"
-        classes = set(itertools.chain(*references))
-        n_clases = len(classes)
-        assert len(classes) <= 2, "References of BinaryMaxF1 must be binary"
-        pos_classes = classes.intersection(self.pos_classes)
-        neg_classes = classes.difference(self.pos_classes)
-        n_pos_classes = len(pos_classes)
-        if n_clases == 2:
-            assert (
-                n_pos_classes == 1
-            ), "Only one positive class is allowed in BinaryMaxF1"
-        pos_class = next(iter(pos_classes)) if n_pos_classes > 0 else "1.0"
-        neg_class = next(iter(neg_classes)) if len(neg_classes) > 0 else "0.0"
 
-        float_predictions = []
-        for prediction in predictions:
-            try:
-                float_predictions.append(float(prediction))
-            except Exception:
-                float_predictions.append(0)
+        float_predictions = [to_float_or_default(p) for p in predictions]
 
         best_thr = -1
         best_f1 = -1
         for thr in set(float_predictions):
             new_predictions = [
-                pos_class if float_prediction >= thr else neg_class
+                "1" if float_prediction >= thr else "0"
                 for float_prediction in float_predictions
             ]
             f1 = super().compute(references, new_predictions, task_data)[

From cc2af9ffed6b42982e05121cf3831f0b8740fcba Mon Sep 17 00:00:00 2001
From: Ariel Gera <ariel.gera1@ibm.com>
Date: Tue, 12 Mar 2024 23:13:30 +0200
Subject: [PATCH 5/9] Add BinaryMaxAccuracy metric

Signed-off-by: Ariel Gera <ariel.gera1@ibm.com>
---
 prepare/metrics/accuracy.py                   |  5 ++-
 .../catalog/metrics/max_accuracy_binary.json  |  3 ++
 src/unitxt/metrics.py                         | 40 +++++++++++++++++++
 tests/library/test_metrics.py                 | 19 +++++++++
 4 files changed, 66 insertions(+), 1 deletion(-)
 create mode 100644 src/unitxt/catalog/metrics/max_accuracy_binary.json

diff --git a/prepare/metrics/accuracy.py b/prepare/metrics/accuracy.py
index 8892cea7e..787711c8e 100644
--- a/prepare/metrics/accuracy.py
+++ b/prepare/metrics/accuracy.py
@@ -1,5 +1,5 @@
 from src.unitxt import add_to_catalog
-from src.unitxt.metrics import Accuracy
+from src.unitxt.metrics import Accuracy, BinaryMaxAccuracy
 from src.unitxt.test_utils.metrics import test_metric
 
 metric = Accuracy()
@@ -32,3 +32,6 @@
 )
 
 add_to_catalog(metric, "metrics.accuracy", overwrite=True)
+
+metric = BinaryMaxAccuracy()
+add_to_catalog(metric, "metrics.max_accuracy_binary", overwrite=True)
diff --git a/src/unitxt/catalog/metrics/max_accuracy_binary.json b/src/unitxt/catalog/metrics/max_accuracy_binary.json
new file mode 100644
index 000000000..15097cf78
--- /dev/null
+++ b/src/unitxt/catalog/metrics/max_accuracy_binary.json
@@ -0,0 +1,3 @@
+{
+    "type": "binary_max_accuracy"
+}
diff --git a/src/unitxt/metrics.py b/src/unitxt/metrics.py
index 18dfc3ee6..538d65acb 100644
--- a/src/unitxt/metrics.py
+++ b/src/unitxt/metrics.py
@@ -3108,3 +3108,43 @@ def compute(
                 best_thr = thr
 
         return {self.main_score: best_f1, "best_thr_maxf1": best_thr}
+
+
+class BinaryMaxAccuracy(GlobalMetric):
+    process_single_instances = False
+    main_score = "max_accuracy_binary"
+    pos_classes = {"1", "1.0", "yes", "true"}
+
+    def compute(
+        self,
+        references: List[List[str]],
+        predictions: List[List[str]],
+        task_data: List[Dict],
+    ) -> dict:
+        assert all(
+            len(reference) == 1 for reference in references
+        ), "Only a single reference per prediction is allowed in BinaryMaxAccuracy metric"
+
+        float_predictions = [to_float_or_default(p) for p in predictions]
+        references = [
+            ["1"] if r[0].lower() in self.pos_classes else ["0"] for r in references
+        ]
+
+        best_thr = -1
+        best_acc = -1
+        for thr in set(float_predictions):
+            new_predictions = [
+                "1" if float_prediction >= thr else "0"
+                for float_prediction in float_predictions
+            ]
+            acc = np.mean(
+                [
+                    [prediction] == reference
+                    for prediction, reference in zip(new_predictions, references)
+                ]
+            )
+            if acc > best_acc:
+                best_acc = acc
+                best_thr = thr
+
+        return {self.main_score: best_acc, "best_thr_max_acc": best_thr}
diff --git a/tests/library/test_metrics.py b/tests/library/test_metrics.py
index 282515ae7..a11b9f22a 100644
--- a/tests/library/test_metrics.py
+++ b/tests/library/test_metrics.py
@@ -3,6 +3,7 @@
 from src.unitxt.logging_utils import get_logger
 from src.unitxt.metrics import (
     Accuracy,
+    BinaryMaxAccuracy,
     BinaryMaxF1,
     F1Binary,
     F1Macro,
@@ -227,6 +228,24 @@ def test_max_f1(self):
         self.assertEqual("max_f1_binary", outputs[0]["score"]["global"]["score_name"])
         self.assertEqual("max_f1_binary", outputs[0]["score"]["instance"]["score_name"])
 
+    def test_binary_max_accuracy(self):
+        metric = BinaryMaxAccuracy()
+        references = [["1"], ["0"], ["0"], ["1"], ["0"]]
+        predictions = ["0.3", "0", "0.7", "1.0", "0.2"]
+
+        global_target = 0.8
+        outputs = apply_metric(
+            metric=metric, predictions=predictions, references=references
+        )
+
+        self.assertAlmostEqual(global_target, outputs[0]["score"]["global"]["score"])
+        self.assertEqual(
+            "max_accuracy_binary", outputs[0]["score"]["global"]["score_name"]
+        )
+        self.assertEqual(
+            "max_accuracy_binary", outputs[0]["score"]["instance"]["score_name"]
+        )
+
     def test_f1_macro(self):
         metric = F1Macro()
         references = [["cat"], ["dog"], ["dog"], ["dog"], ["cat"], ["cat"]]

From c67e5f6017b47ac1ae7174b9308915defb6a1e78 Mon Sep 17 00:00:00 2001
From: Ariel Gera <ariel.gera1@ibm.com>
Date: Wed, 13 Mar 2024 00:31:17 +0200
Subject: [PATCH 6/9] Add Binary Accuracy metric (threshold 0.5)

Signed-off-by: Ariel Gera <ariel.gera1@ibm.com>
---
 prepare/metrics/accuracy.py                   |  5 +++-
 .../catalog/metrics/accuracy_binary.json      |  3 +++
 src/unitxt/metrics.py                         | 25 +++++++++++++++++++
 tests/library/test_metrics.py                 | 22 ++++++++++++++++
 4 files changed, 54 insertions(+), 1 deletion(-)
 create mode 100644 src/unitxt/catalog/metrics/accuracy_binary.json

diff --git a/prepare/metrics/accuracy.py b/prepare/metrics/accuracy.py
index 787711c8e..374532432 100644
--- a/prepare/metrics/accuracy.py
+++ b/prepare/metrics/accuracy.py
@@ -1,5 +1,5 @@
 from src.unitxt import add_to_catalog
-from src.unitxt.metrics import Accuracy, BinaryMaxAccuracy
+from src.unitxt.metrics import Accuracy, BinaryAccuracy, BinaryMaxAccuracy
 from src.unitxt.test_utils.metrics import test_metric
 
 metric = Accuracy()
@@ -33,5 +33,8 @@
 
 add_to_catalog(metric, "metrics.accuracy", overwrite=True)
 
+metric = BinaryAccuracy()
+add_to_catalog(metric, "metrics.accuracy_binary")
+
 metric = BinaryMaxAccuracy()
 add_to_catalog(metric, "metrics.max_accuracy_binary", overwrite=True)
diff --git a/src/unitxt/catalog/metrics/accuracy_binary.json b/src/unitxt/catalog/metrics/accuracy_binary.json
new file mode 100644
index 000000000..7141514c6
--- /dev/null
+++ b/src/unitxt/catalog/metrics/accuracy_binary.json
@@ -0,0 +1,3 @@
+{
+    "type": "binary_accuracy"
+}
diff --git a/src/unitxt/metrics.py b/src/unitxt/metrics.py
index 538d65acb..d871709ba 100644
--- a/src/unitxt/metrics.py
+++ b/src/unitxt/metrics.py
@@ -3110,6 +3110,31 @@ def compute(
         return {self.main_score: best_f1, "best_thr_maxf1": best_thr}
 
 
+class BinaryAccuracy(InstanceMetric):
+    """Calculate accuracy for a binary task, using 0.5 as the threshold in the case of float predictions."""
+
+    reduction_map = {"mean": ["accuracy_binary"]}
+    main_score = "accuracy_binary"
+    ci_scores = ["accuracy_binary"]
+    pos_classes = {"1", "1.0", "yes", "true"}
+
+    def compute(
+        self, references: List[Any], prediction: Any, task_data: List[Dict]
+    ) -> dict:
+        assert (
+            len(references) == 1
+        ), "Only a single reference per prediction is allowed in Binary Accuracy metric"
+
+        float_prediction = to_float_or_default(prediction)
+        prediction = str(int(float_prediction > 0.5))
+        references = ["1"] if references[0].lower() in self.pos_classes else ["0"]
+
+        result = {self.main_score: float([prediction] == references)}
+        result["score"] = result[self.main_score]
+        result["score_name"] = self.main_score
+        return result
+
+
 class BinaryMaxAccuracy(GlobalMetric):
     process_single_instances = False
     main_score = "max_accuracy_binary"
diff --git a/tests/library/test_metrics.py b/tests/library/test_metrics.py
index a11b9f22a..d35cd293c 100644
--- a/tests/library/test_metrics.py
+++ b/tests/library/test_metrics.py
@@ -3,6 +3,7 @@
 from src.unitxt.logging_utils import get_logger
 from src.unitxt.metrics import (
     Accuracy,
+    BinaryAccuracy,
     BinaryMaxAccuracy,
     BinaryMaxF1,
     F1Binary,
@@ -228,6 +229,27 @@ def test_max_f1(self):
         self.assertEqual("max_f1_binary", outputs[0]["score"]["global"]["score_name"])
         self.assertEqual("max_f1_binary", outputs[0]["score"]["instance"]["score_name"])
 
+    def test_accuracy_binary(self):
+        metric = BinaryAccuracy()
+        references = [["1"], ["0"], ["0"], ["1"], ["0"]]
+        predictions = ["0.3", "0", "0.7", "1.0", "0.2"]
+
+        expected_global_result = {
+            "accuracy_binary": 3 / 5,
+            "score": 3 / 5,
+            "score_name": "accuracy_binary",
+        }
+
+        outputs = apply_metric(
+            metric=metric, predictions=predictions, references=references
+        )
+        global_result = {
+            k: v
+            for k, v in outputs[0]["score"]["global"].items()
+            if k in expected_global_result
+        }
+        self.assertDictEqual(expected_global_result, global_result)
+
     def test_binary_max_accuracy(self):
         metric = BinaryMaxAccuracy()
         references = [["1"], ["0"], ["0"], ["1"], ["0"]]

From 681034642197e8d9fe3024c80820b8a7113e64eb Mon Sep 17 00:00:00 2001
From: lilacheden <lilach.edel@gmail.com>
Date: Wed, 13 Mar 2024 09:51:16 +0200
Subject: [PATCH 7/9] threshold to metric parameter

Signed-off-by: lilacheden <lilach.edel@gmail.com>
---
 src/unitxt/metrics.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/unitxt/metrics.py b/src/unitxt/metrics.py
index d871709ba..f28ea550c 100644
--- a/src/unitxt/metrics.py
+++ b/src/unitxt/metrics.py
@@ -1141,10 +1141,13 @@ class F1Micro(F1):
 
 
 class F1Binary(F1):
+    """Calculate f1 for a binary task, using 0.5 as the threshold in the case of float predictions."""
+
     process_single_instances = False
     main_score = "f1_binary"
     average = "binary"
     pos_classes = {"1", "1.0", "yes", "true"}
+    threshold = 0.5
 
     def get_str_id(self, str):
         return int(str)
@@ -1156,7 +1159,7 @@ def compute(
         task_data: List[Dict],
     ) -> dict:
         predictions_floats = [to_float_or_default(p) for p in predictions]
-        predictions = [str(int(p > 0.5)) for p in predictions_floats]
+        predictions = [str(int(p > self.threshold)) for p in predictions_floats]
         references = [
             ["1"] if r[0].lower() in self.pos_classes else ["0"] for r in references
         ]
@@ -3117,6 +3120,7 @@ class BinaryAccuracy(InstanceMetric):
     main_score = "accuracy_binary"
     ci_scores = ["accuracy_binary"]
     pos_classes = {"1", "1.0", "yes", "true"}
+    threshold = 0.5
 
     def compute(
         self, references: List[Any], prediction: Any, task_data: List[Dict]
@@ -3126,7 +3130,7 @@ def compute(
         ), "Only a single reference per prediction is allowed in Binary Accuracy metric"
 
         float_prediction = to_float_or_default(prediction)
-        prediction = str(int(float_prediction > 0.5))
+        prediction = str(int(float_prediction > self.threshold))
         references = ["1"] if references[0].lower() in self.pos_classes else ["0"]
 
         result = {self.main_score: float([prediction] == references)}

From ad5294db8f3d63714a742bcea883059a016307c2 Mon Sep 17 00:00:00 2001
From: lilacheden <lilach.edel@gmail.com>
Date: Wed, 13 Mar 2024 11:49:57 +0200
Subject: [PATCH 8/9] add overwrite

Signed-off-by: lilacheden <lilach.edel@gmail.com>
---
 prepare/metrics/accuracy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/prepare/metrics/accuracy.py b/prepare/metrics/accuracy.py
index 374532432..7528e4adb 100644
--- a/prepare/metrics/accuracy.py
+++ b/prepare/metrics/accuracy.py
@@ -34,7 +34,7 @@
 add_to_catalog(metric, "metrics.accuracy", overwrite=True)
 
 metric = BinaryAccuracy()
-add_to_catalog(metric, "metrics.accuracy_binary")
+add_to_catalog(metric, "metrics.accuracy_binary", overwrite=True)
 
 metric = BinaryMaxAccuracy()
 add_to_catalog(metric, "metrics.max_accuracy_binary", overwrite=True)

From 3a9d93bd5a198b99fb22f5a8b65c19bc5a38d3e5 Mon Sep 17 00:00:00 2001
From: lilacheden <lilach.edel@gmail.com>
Date: Wed, 13 Mar 2024 16:14:02 +0200
Subject: [PATCH 9/9] add doc

Signed-off-by: lilacheden <lilach.edel@gmail.com>
---
 src/unitxt/metrics.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/unitxt/metrics.py b/src/unitxt/metrics.py
index f28ea550c..51eb66f2c 100644
--- a/src/unitxt/metrics.py
+++ b/src/unitxt/metrics.py
@@ -3082,6 +3082,8 @@ class FixedGroupAbsvalNormHedgesGParaphraseStringContainment(StringContainment):
 
 
 class BinaryMaxF1(F1Binary):
+    """Calculate the maximal F1 and the decision threshold that achieves it for a binary task with float predictions."""
+
     main_score = "max_f1_binary"
 
     def compute(
@@ -3140,6 +3142,8 @@ def compute(
 
 
 class BinaryMaxAccuracy(GlobalMetric):
+    """Calculate the maximal accuracy and the decision threshold that achieves it for a binary task with float predictions."""
+
     process_single_instances = False
     main_score = "max_accuracy_binary"
     pos_classes = {"1", "1.0", "yes", "true"}