IBM · elronbandel · May 15, 2024 · May 9, 2024 · May 15, 2024 · May 15, 2024
diff --git a/prepare/cards/legalbench.py b/prepare/cards/legalbench.py
@@ -0,0 +1,143 @@
+from unitxt.blocks import InputOutputTemplate, LoadHF, TaskCard, TemplatesDict
+from unitxt.catalog import add_to_catalog
+from unitxt.operators import AddFields, RenameFields
+from unitxt.string_operators import FormatText
+from unitxt.test_utils.card import test_card
+
+task_cfgs = {
+    # "A mark is generic if it is the common name for the product. A mark is descriptive if it describes a purpose, nature, or attribute of the product. A mark is suggestive if it suggests or implies a quality or characteristic of the product. A mark is arbitrary if it is a real English word that has no relation to the product. A mark is fanciful if it is an invented word. Label the type of mark for the following products."
+    "abercrombie": {
+        "non_task_entries": {
+            "label_field_name": "answer",
+            "text_field_name": "text",
+        },
+        "classes_descriptions": "A mark is generic if it is the common name for the product. A mark is descriptive if it describes a purpose, nature, or attribute of the product. A mark is suggestive if it suggests or implies a quality or characteristic of the product. A mark is arbitrary if it is a real English word that has no relation to the product. A mark is fanciful if it is an invented word.",
+        "type_of_class": "type of mark",
+        "text_type": "products",
+        "classes": ["generic", "descriptive", "suggestive", "arbitrary", "fanciful"],
+        "instruction": "{classes_descriptions}\n\nLabel the {type_of_class} for the following {text_type}:\n",
+        "input_format": "Q: {text} What is the {type_of_class}?",
+        "target_prefix": "A: ",
+    },
+    # A private right of action is when a regular person, a private citizen, is legally entitled to enforce their rights under a given statute. Does the clause specify a private right of action? Answer Yes or No.
+    # https://github.com/HazyResearch/legalbench/blob/main/tasks/proa/base_prompt.txt
+    "proa": {
+        "non_task_entries": {
+            "label_field_name": "answer",
+            "text_field_name": "text",
+        },
+        "classes_descriptions": "A private right of action is when a regular person, a private citizen, is legally entitled to enforce their rights under a given statute",
+        "type_of_class": "a private right of action",
+        "text_type": "clause",
+        "instruction": "{classes_descriptions}. Does the {text_type} specify {type_of_class}? Answer from one of {classes}",
+        "classes": ["Yes", "No"],
+        "title_fields": ["text_type"],
+        "input_format": "{text_type}: {text}",
+        "target_prefix": "A: ",
+    },
+    #    Classify the following text using the following definitions.\n\n- Facts: The paragraph describes the factual background that led up to the present lawsuit.\n- Procedural History: The paragraph describes the course of litigation that led to the current proceeding before the court.\n- Issue: The paragraph describes the legal or factual issue that must be resolved by the court.\n- Rule: The paragraph describes a rule of law relevant to resolving the issue.\n- Analysis: The paragraph analyzes the legal issue by applying the relevant legal principles to the facts of the present dispute.\n- Conclusion: The paragraph presents a conclusion of the court.\n- Decree: The paragraph constitutes a decree resolving the dispute."
+    # https://github.com/HazyResearch/legalbench/blob/main/tasks/function_of_decision_section/base_prompt.txt
+    "function_of_decision_section": {
+        "non_task_entries": {
+            "label_field_name": "answer",
+            "text_field_name": "Paragraph",
+        },
+        "classes_descriptions": "- Facts: The paragraph describes the factual background that led up to the present lawsuit.\n- Procedural History: The paragraph describes the course of litigation that led to the current proceeding before the court.\n- Issue: The paragraph describes the legal or factual issue that must be resolved by the court.\n- Rule: The paragraph describes a rule of law relevant to resolving the issue.\n- Analysis: The paragraph analyzes the legal issue by applying the relevant legal principles to the facts of the present dispute.\n- Conclusion: The paragraph presents a conclusion of the court.\n- Decree: The paragraph constitutes a decree resolving the dispute",
+        "type_of_class": "",
+        "text_type": "text",
+        "classes": [
+            "Facts",
+            "Procedural History",
+            "Issue",
+            "Rule",
+            "Analysis",
+            "Conclusion",
+            "Decree",
+        ],
+        "instruction": "Classify the following {text_type} using the following definitions.\n\n{classes_descriptions}.\n\n",
+        "title_fields": ["text_type"],
+        "input_format": "{text_type}: {text}",
+        "target_prefix": "Label: ",
+    },
+    # "Answer the following questions considering the state of international law on January 1st, 2020. Answer Yes or No."
+    # https://github.com/HazyResearch/legalbench/blob/main/tasks/international_citizenship_questions/base_prompt.txt
+    "international_citizenship_questions": {
+        "non_task_entries": {
+            "label_field_name": "answer",
+            "text_field_name": "question",
+        },
+        "classes_descriptions": "considering the state of international law on January 1st, 2020",
+        "type_of_class": "",
+        "text_type": "question",
+        "title_fields": ["text_type"],
+        "instruction": "Answer the following {text_type} {classes_descriptions}.\n",
+        "classes": ["Yes", "No"],
+        "input_format": "{text_type}: {text} Answer from one of {classes}.",
+        "target_prefix": "Answer: ",
+    },
+    # You are a lobbyist analyzing Congressional bills for their impacts on companies. Given the title and summary of the bill, plus information on the company from its 10K SEC filing, it is your job to determine if a bill is at least somewhat relevant to a company in terms of whether it could impact the company's bottom-line if it was enacted (by saying Yes or No).
+    # https://github.com/HazyResearch/legalbench/blob/main/tasks/corporate_lobbying/base_prompt.txt
+    "corporate_lobbying": {
+        "non_task_entries": {
+            "label_field_name": "answer",
+            "text_field_name": "text",
+            "text_verbalizer": "Official title of bill: {bill_title}\nOfficial summary of bill: {bill_summary}\nCompany name: {company_name}\nCompany business description: {company_description}",
+        },
+        "classes_descriptions": "You are a lobbyist analyzing Congressional bills for their impacts on companies.\nGiven the title and summary of the bill, plus information on the company from its 10K SEC filing, is a bill at least somewhat relevant to a company in terms of whether it could impact the company's bottom-line if it was enacted",
+        "type_of_class": "",
+        "text_type": "",
+        "instruction": "{classes_descriptions}, it is your job to determine {type_of_class} (by saying Yes or No).",
+        "classes": ["Yes", "No"],
+        "input_format": "{text}\nIs this bill potentially relevant to the company? FINAL ANSWER:",
+    },
+}
+
+for task_name, task_cfg in task_cfgs.items():
+    card = TaskCard(
+        loader=LoadHF(path="nguha/legalbench", name=task_name),
+        preprocess_steps=(
+            [
+                FormatText(
+                    text=task_cfg["non_task_entries"]["text_verbalizer"],
+                    to_field="text",
+                )
+            ]
+            if task_cfg["non_task_entries"].get("text_verbalizer", False)
+            else []
+        )
+        + [
+            RenameFields(
+                field_to_field={
+                    task_cfg["non_task_entries"]["text_field_name"]: "text",
+                    task_cfg["non_task_entries"]["label_field_name"]: "label",
+                }
+            ),
+            AddFields(
+                fields={
+                    "text_type": task_cfg["text_type"],
+                    "classes": task_cfg["classes"],
+                    "type_of_class": task_cfg["type_of_class"],
+                    "classes_descriptions": task_cfg["classes_descriptions"],
+                }
+            ),
+        ],
+        task="tasks.classification.multi_class.with_classes_descriptions",
+        templates=TemplatesDict(
+            {
+                "default": InputOutputTemplate(
+                    input_format=task_cfg["input_format"],
+                    output_format="{label}",
+                    instruction=task_cfg["instruction"],
+                    target_prefix=task_cfg.get("target_prefix", ""),
+                    title_fields=task_cfg.get("title_fields", []),
+                    postprocessors=[
+                        "processors.take_first_non_empty_line",
+                        "processors.lower_case_till_punc",
+                    ],
+                ),
+            }
+        ),
+    )
+
+    test_card(card, format="formats.textual_assistant")
+    add_to_catalog(card, f"cards.legalbench.{task_name}", overwrite=True)
diff --git a/prepare/tasks/classification.py b/prepare/tasks/classification.py
@@ -88,3 +88,22 @@
     "tasks.classification.multi_class.relation",
     overwrite=True,
 )
+
+
+add_to_catalog(
+    FormTask(
+        inputs={
+            "text": "str",
+            "text_type": "str",
+            "classes": "List[str]",
+            "type_of_class": "str",
+            "classes_descriptions": "str",
+        },
+        outputs={"label": "str"},
+        prediction_type="str",
+        metrics=["metrics.f1_micro", "metrics.accuracy", "metrics.f1_macro"],
+        augmentable_inputs=["text"],
+    ),
+    "tasks.classification.multi_class.with_classes_descriptions",
+    overwrite=True,
+)
diff --git a/src/unitxt/catalog/cards/legalbench/abercrombie.json b/src/unitxt/catalog/cards/legalbench/abercrombie.json
@@ -0,0 +1,47 @@
+{
+    "type": "task_card",
+    "loader": {
+        "type": "load_hf",
+        "path": "nguha/legalbench",
+        "name": "abercrombie"
+    },
+    "preprocess_steps": [
+        {
+            "type": "rename_fields",
+            "field_to_field": {
+                "text": "text",
+                "answer": "label"
+            }
+        },
+        {
+            "type": "add_fields",
+            "fields": {
+                "text_type": "products",
+                "classes": [
+                    "generic",
+                    "descriptive",
+                    "suggestive",
+                    "arbitrary",
+                    "fanciful"
+                ],
+                "type_of_class": "type of mark",
+                "classes_descriptions": "A mark is generic if it is the common name for the product. A mark is descriptive if it describes a purpose, nature, or attribute of the product. A mark is suggestive if it suggests or implies a quality or characteristic of the product. A mark is arbitrary if it is a real English word that has no relation to the product. A mark is fanciful if it is an invented word."
+            }
+        }
+    ],
+    "task": "tasks.classification.multi_class.with_classes_descriptions",
+    "templates": {
+        "default": {
+            "type": "input_output_template",
+            "input_format": "Q: {text} What is the {type_of_class}?",
+            "output_format": "{label}",
+            "instruction": "{classes_descriptions}\n\nLabel the {type_of_class} for the following {text_type}:\n",
+            "target_prefix": "A: ",
+            "title_fields": [],
+            "postprocessors": [
+                "processors.take_first_non_empty_line",
+                "processors.lower_case_till_punc"
+            ]
+        }
+    }
+}
diff --git a/src/unitxt/catalog/cards/legalbench/corporate_lobbying.json b/src/unitxt/catalog/cards/legalbench/corporate_lobbying.json
@@ -0,0 +1,49 @@
+{
+    "type": "task_card",
+    "loader": {
+        "type": "load_hf",
+        "path": "nguha/legalbench",
+        "name": "corporate_lobbying"
+    },
+    "preprocess_steps": [
+        {
+            "type": "format_text",
+            "text": "Official title of bill: {bill_title}\nOfficial summary of bill: {bill_summary}\nCompany name: {company_name}\nCompany business description: {company_description}",
+            "to_field": "text"
+        },
+        {
+            "type": "rename_fields",
+            "field_to_field": {
+                "text": "text",
+                "answer": "label"
+            }
+        },
+        {
+            "type": "add_fields",
+            "fields": {
+                "text_type": "",
+                "classes": [
+                    "Yes",
+                    "No"
+                ],
+                "type_of_class": "",
+                "classes_descriptions": "You are a lobbyist analyzing Congressional bills for their impacts on companies.\nGiven the title and summary of the bill, plus information on the company from its 10K SEC filing, is a bill at least somewhat relevant to a company in terms of whether it could impact the company's bottom-line if it was enacted"
+            }
+        }
+    ],
+    "task": "tasks.classification.multi_class.with_classes_descriptions",
+    "templates": {
+        "default": {
+            "type": "input_output_template",
+            "input_format": "{text}\nIs this bill potentially relevant to the company? FINAL ANSWER:",
+            "output_format": "{label}",
+            "instruction": "{classes_descriptions}, it is your job to determine {type_of_class} (by saying Yes or No).",
+            "target_prefix": "",
+            "title_fields": [],
+            "postprocessors": [
+                "processors.take_first_non_empty_line",
+                "processors.lower_case_till_punc"
+            ]
+        }
+    }
+}
diff --git a/src/unitxt/catalog/cards/legalbench/function_of_decision_section.json b/src/unitxt/catalog/cards/legalbench/function_of_decision_section.json
@@ -0,0 +1,51 @@
+{
+    "type": "task_card",
+    "loader": {
+        "type": "load_hf",
+        "path": "nguha/legalbench",
+        "name": "function_of_decision_section"
+    },
+    "preprocess_steps": [
+        {
+            "type": "rename_fields",
+            "field_to_field": {
+                "Paragraph": "text",
+                "answer": "label"
+            }
+        },
+        {
+            "type": "add_fields",
+            "fields": {
+                "text_type": "text",
+                "classes": [
+                    "Facts",
+                    "Procedural History",
+                    "Issue",
+                    "Rule",
+                    "Analysis",
+                    "Conclusion",
+                    "Decree"
+                ],
+                "type_of_class": "",
+                "classes_descriptions": "- Facts: The paragraph describes the factual background that led up to the present lawsuit.\n- Procedural History: The paragraph describes the course of litigation that led to the current proceeding before the court.\n- Issue: The paragraph describes the legal or factual issue that must be resolved by the court.\n- Rule: The paragraph describes a rule of law relevant to resolving the issue.\n- Analysis: The paragraph analyzes the legal issue by applying the relevant legal principles to the facts of the present dispute.\n- Conclusion: The paragraph presents a conclusion of the court.\n- Decree: The paragraph constitutes a decree resolving the dispute"
+            }
+        }
+    ],
+    "task": "tasks.classification.multi_class.with_classes_descriptions",
+    "templates": {
+        "default": {
+            "type": "input_output_template",
+            "input_format": "{text_type}: {text}",
+            "output_format": "{label}",
+            "instruction": "Classify the following {text_type} using the following definitions.\n\n{classes_descriptions}.\n\n",
+            "target_prefix": "Label: ",
+            "title_fields": [
+                "text_type"
+            ],
+            "postprocessors": [
+                "processors.take_first_non_empty_line",
+                "processors.lower_case_till_punc"
+            ]
+        }
+    }
+}
diff --git a/src/unitxt/catalog/cards/legalbench/international_citizenship_questions.json b/src/unitxt/catalog/cards/legalbench/international_citizenship_questions.json
@@ -0,0 +1,46 @@
+{
+    "type": "task_card",
+    "loader": {
+        "type": "load_hf",
+        "path": "nguha/legalbench",
+        "name": "international_citizenship_questions"
+    },
+    "preprocess_steps": [
+        {
+            "type": "rename_fields",
+            "field_to_field": {
+                "question": "text",
+                "answer": "label"
+            }
+        },
+        {
+            "type": "add_fields",
+            "fields": {
+                "text_type": "question",
+                "classes": [
+                    "Yes",
+                    "No"
+                ],
+                "type_of_class": "",
+                "classes_descriptions": "considering the state of international law on January 1st, 2020"
+            }
+        }
+    ],
+    "task": "tasks.classification.multi_class.with_classes_descriptions",
+    "templates": {
+        "default": {
+            "type": "input_output_template",
+            "input_format": "{text_type}: {text} Answer from one of {classes}.",
+            "output_format": "{label}",
+            "instruction": "Answer the following {text_type} {classes_descriptions}.\n",
+            "target_prefix": "Answer: ",
+            "title_fields": [
+                "text_type"
+            ],
+            "postprocessors": [
+                "processors.take_first_non_empty_line",
+                "processors.lower_case_till_punc"
+            ]
+        }
+    }
+}