From 3040fcebe3b609e075075d81f0926ad7e8421f87 Mon Sep 17 00:00:00 2001 From: Yotam Perlitz Date: Thu, 9 May 2024 19:42:55 +0300 Subject: [PATCH 1/6] start adding card Signed-off-by: Yotam Perlitz --- prepare/cards/legalbench.py | 77 +++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 prepare/cards/legalbench.py diff --git a/prepare/cards/legalbench.py b/prepare/cards/legalbench.py new file mode 100644 index 000000000..b397db738 --- /dev/null +++ b/prepare/cards/legalbench.py @@ -0,0 +1,77 @@ +from unitxt.blocks import LoadHF, TaskCard +from unitxt.catalog import add_to_catalog +from unitxt.operators import AddFields, RenameFields +from unitxt.test_utils.card import test_card + +# from https://raw.githubusercontent.com/HazyResearch/legalbench/main/helm_prompt_settings.jsonl +field_ordering, instructions, label_keys, output_nouns, _ = [ + { + "abercrombie": [["Description", "text"]], + # "corporate_lobbying": [ + # ["Official title of bill", "bill_title"], + # ["Official summary of bill", "bill_summary"], + # ["Company name", "company_name"], + # ["Company business description", "company_description"], + # ], + "international_citizenship_questions": [["Question", "question"]], + "function_of_decision_section": [["Text", "Paragraph"]], + "proa": [["Statute", "text"]], + }, + { + "abercrombie": "A mark is generic if it is the common name for the product. A mark is descriptive if it describes a purpose, nature, or attribute of the product. A mark is suggestive if it suggests or implies a quality or characteristic of the product. A mark is arbitrary if it is a real English word that has no relation to the product. A mark is fanciful if it is an invented word. Label the type of mark for the following products.", + # "corporate_lobbying": "You are a lobbyist analyzing Congressional bills for their impacts on companies. Given the title and summary of the bill, plus information on the company from its 10K SEC filing, it is your job to determine if a bill is at least somewhat relevant to a company in terms of whether it could impact the company's bottom-line if it was enacted (by saying Yes or No).", + "international_citizenship_questions": "Answer the following questions considering the state of international law on January 1st, 2020. Answer Yes or No.", + "function_of_decision_section": "Classify the following text using the following definitions.\n\n- Facts: The paragraph describes the factual background that led up to the present lawsuit.\n- Procedural History: The paragraph describes the course of litigation that led to the current proceeding before the court.\n- Issue: The paragraph describes the legal or factual issue that must be resolved by the court.\n- Rule: The paragraph describes a rule of law relevant to resolving the issue.\n- Analysis: The paragraph analyzes the legal issue by applying the relevant legal principles to the facts of the present dispute.\n- Conclusion: The paragraph presents a conclusion of the court.\n- Decree: The paragraph constitutes a decree resolving the dispute.", + "proa": "A private right of action is when a regular person, a private citizen, is legally entitled to enforce their rights under a given statute. Does the clause specify a private right of action? Answer Yes or No.", + }, + { + "abercrombie": "answer", + # "corporate_lobbying": "answer", + "international_citizenship_questions": "answer", + "function_of_decision_section": "answer", + "proa": "answer", + }, + { + "abercrombie": "Type", + # "corporate_lobbying": "Label", + "international_citizenship_questions": "Answer", + "function_of_decision_section": "Function", + "proa": "Answer", + }, + { + "abercrombie": 5, + # "corporate_lobbying": 0, + "international_citizenship_questions": 5, + "function_of_decision_section": 5, + "proa": 5, + }, +] + + +# 'Classify the following {text_type} on whether it is related to {class}. Answer "Yes" or "No".\n{text}\n' + +sub_tasks = list(field_ordering.keys()) +sub_task = sub_tasks[4] + +card = TaskCard( + loader=LoadHF(path="nguha/legalbench", name=sub_task), + preprocess_steps=[ + AddFields( + fields={ + # "we have the text field" + # "class": label_keys[sub_task], + "text_type": field_ordering[sub_task][0][1], + # "label": + } + ), + RenameFields(field_to_field={"labels": "class"}), + # This is still needed for YesNoTemplates, which does not handle empty labels + # MapInstanceValues(mappers={"labels": {"[]": ["none"]}}, strict=False), + # RenameFields(field_to_field={"labels": "label"}), + ], + task="tasks.classification.binary", + templates="", +) + +test_card(card, debug=True) +add_to_catalog(card, "cards.sst2", overwrite=True) From cb7ac0b4b54abf1c50d815f56851906c13497262 Mon Sep 17 00:00:00 2001 From: Yotam Perlitz Date: Wed, 15 May 2024 16:10:27 +0300 Subject: [PATCH 2/6] add the classification.multi_class.with_classes_descriptions task Signed-off-by: Yotam Perlitz --- prepare/tasks/classification.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/prepare/tasks/classification.py b/prepare/tasks/classification.py index 100694695..c63f8e4ae 100644 --- a/prepare/tasks/classification.py +++ b/prepare/tasks/classification.py @@ -88,3 +88,22 @@ "tasks.classification.multi_class.relation", overwrite=True, ) + + +add_to_catalog( + FormTask( + inputs={ + "text": "str", + "text_type": "str", + "classes": "List[str]", + "type_of_class": "str", + "classes_descriptions": "str", + }, + outputs={"label": "str"}, + prediction_type="str", + metrics=["metrics.f1_micro", "metrics.accuracy", "metrics.f1_macro"], + augmentable_inputs=["text"], + ), + "tasks.classification.multi_class.with_classes_descriptions", + overwrite=True, +) From 8d56b1cf39462eff53200ce76b56e5131f96d618 Mon Sep 17 00:00:00 2001 From: Yotam Perlitz Date: Wed, 15 May 2024 16:11:13 +0300 Subject: [PATCH 3/6] add format task operator Signed-off-by: Yotam Perlitz --- src/unitxt/string_operators.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/unitxt/string_operators.py b/src/unitxt/string_operators.py index 4900a21d8..6c9ccdd5e 100644 --- a/src/unitxt/string_operators.py +++ b/src/unitxt/string_operators.py @@ -1,7 +1,12 @@ import re -from typing import List +from typing import ( + Any, + Dict, + List, + Optional, +) -from .operators import FieldOperator +from .operators import FieldOperator, StreamInstanceOperator class Split(FieldOperator): @@ -39,6 +44,17 @@ def process_value(self, value: List[str]) -> str: return self.by.join(value) +class FormatText(StreamInstanceOperator): + to_field: str + text: str + + def process( + self, instance: Dict[str, Any], stream_name: Optional[str] = None + ) -> Dict[str, Any]: + instance[self.to_field] = self.text.format(**instance) + return instance + + class Strip(FieldOperator): def process_value(self, value: str) -> str: return value.strip() From 621021244bbf1f0b9de43e3b4e197281732cd555 Mon Sep 17 00:00:00 2001 From: Yotam Perlitz Date: Wed, 15 May 2024 16:12:10 +0300 Subject: [PATCH 4/6] add legalbench prepare file Signed-off-by: Yotam Perlitz --- prepare/cards/legalbench.py | 194 ++++++++++++++++++++++++------------ 1 file changed, 130 insertions(+), 64 deletions(-) diff --git a/prepare/cards/legalbench.py b/prepare/cards/legalbench.py index b397db738..9faf4ea04 100644 --- a/prepare/cards/legalbench.py +++ b/prepare/cards/legalbench.py @@ -1,77 +1,143 @@ -from unitxt.blocks import LoadHF, TaskCard +from unitxt.blocks import InputOutputTemplate, LoadHF, TaskCard, TemplatesDict from unitxt.catalog import add_to_catalog from unitxt.operators import AddFields, RenameFields +from unitxt.string_operators import FormatText from unitxt.test_utils.card import test_card -# from https://raw.githubusercontent.com/HazyResearch/legalbench/main/helm_prompt_settings.jsonl -field_ordering, instructions, label_keys, output_nouns, _ = [ - { - "abercrombie": [["Description", "text"]], - # "corporate_lobbying": [ - # ["Official title of bill", "bill_title"], - # ["Official summary of bill", "bill_summary"], - # ["Company name", "company_name"], - # ["Company business description", "company_description"], - # ], - "international_citizenship_questions": [["Question", "question"]], - "function_of_decision_section": [["Text", "Paragraph"]], - "proa": [["Statute", "text"]], +task_cfgs = { + # "A mark is generic if it is the common name for the product. A mark is descriptive if it describes a purpose, nature, or attribute of the product. A mark is suggestive if it suggests or implies a quality or characteristic of the product. A mark is arbitrary if it is a real English word that has no relation to the product. A mark is fanciful if it is an invented word. Label the type of mark for the following products." + "abercrombie": { + "non_task_entries": { + "label_field_name": "answer", + "text_field_name": "text", + }, + "classes_descriptions": "A mark is generic if it is the common name for the product. A mark is descriptive if it describes a purpose, nature, or attribute of the product. A mark is suggestive if it suggests or implies a quality or characteristic of the product. A mark is arbitrary if it is a real English word that has no relation to the product. A mark is fanciful if it is an invented word.", + "type_of_class": "type of mark", + "text_type": "products", + "classes": ["generic", "descriptive", "suggestive", "arbitrary", "fanciful"], + "instruction": "{classes_descriptions}\n\nLabel the {type_of_class} for the following {text_type}:\n", + "input_format": "Q: {text} What is the {type_of_class}?", + "target_prefix": "A: ", }, - { - "abercrombie": "A mark is generic if it is the common name for the product. A mark is descriptive if it describes a purpose, nature, or attribute of the product. A mark is suggestive if it suggests or implies a quality or characteristic of the product. A mark is arbitrary if it is a real English word that has no relation to the product. A mark is fanciful if it is an invented word. Label the type of mark for the following products.", - # "corporate_lobbying": "You are a lobbyist analyzing Congressional bills for their impacts on companies. Given the title and summary of the bill, plus information on the company from its 10K SEC filing, it is your job to determine if a bill is at least somewhat relevant to a company in terms of whether it could impact the company's bottom-line if it was enacted (by saying Yes or No).", - "international_citizenship_questions": "Answer the following questions considering the state of international law on January 1st, 2020. Answer Yes or No.", - "function_of_decision_section": "Classify the following text using the following definitions.\n\n- Facts: The paragraph describes the factual background that led up to the present lawsuit.\n- Procedural History: The paragraph describes the course of litigation that led to the current proceeding before the court.\n- Issue: The paragraph describes the legal or factual issue that must be resolved by the court.\n- Rule: The paragraph describes a rule of law relevant to resolving the issue.\n- Analysis: The paragraph analyzes the legal issue by applying the relevant legal principles to the facts of the present dispute.\n- Conclusion: The paragraph presents a conclusion of the court.\n- Decree: The paragraph constitutes a decree resolving the dispute.", - "proa": "A private right of action is when a regular person, a private citizen, is legally entitled to enforce their rights under a given statute. Does the clause specify a private right of action? Answer Yes or No.", + # A private right of action is when a regular person, a private citizen, is legally entitled to enforce their rights under a given statute. Does the clause specify a private right of action? Answer Yes or No. + # https://github.com/HazyResearch/legalbench/blob/main/tasks/proa/base_prompt.txt + "proa": { + "non_task_entries": { + "label_field_name": "answer", + "text_field_name": "text", + }, + "classes_descriptions": "A private right of action is when a regular person, a private citizen, is legally entitled to enforce their rights under a given statute", + "type_of_class": "a private right of action", + "text_type": "clause", + "instruction": "{classes_descriptions}. Does the {text_type} specify {type_of_class}? Answer from one of {classes}", + "classes": ["Yes", "No"], + "title_fields": ["text_type"], + "input_format": "{text_type}: {text}", + "target_prefix": "A: ", }, - { - "abercrombie": "answer", - # "corporate_lobbying": "answer", - "international_citizenship_questions": "answer", - "function_of_decision_section": "answer", - "proa": "answer", + # Classify the following text using the following definitions.\n\n- Facts: The paragraph describes the factual background that led up to the present lawsuit.\n- Procedural History: The paragraph describes the course of litigation that led to the current proceeding before the court.\n- Issue: The paragraph describes the legal or factual issue that must be resolved by the court.\n- Rule: The paragraph describes a rule of law relevant to resolving the issue.\n- Analysis: The paragraph analyzes the legal issue by applying the relevant legal principles to the facts of the present dispute.\n- Conclusion: The paragraph presents a conclusion of the court.\n- Decree: The paragraph constitutes a decree resolving the dispute." + # https://github.com/HazyResearch/legalbench/blob/main/tasks/function_of_decision_section/base_prompt.txt + "function_of_decision_section": { + "non_task_entries": { + "label_field_name": "answer", + "text_field_name": "Paragraph", + }, + "classes_descriptions": "- Facts: The paragraph describes the factual background that led up to the present lawsuit.\n- Procedural History: The paragraph describes the course of litigation that led to the current proceeding before the court.\n- Issue: The paragraph describes the legal or factual issue that must be resolved by the court.\n- Rule: The paragraph describes a rule of law relevant to resolving the issue.\n- Analysis: The paragraph analyzes the legal issue by applying the relevant legal principles to the facts of the present dispute.\n- Conclusion: The paragraph presents a conclusion of the court.\n- Decree: The paragraph constitutes a decree resolving the dispute", + "type_of_class": "", + "text_type": "text", + "classes": [ + "Facts", + "Procedural History", + "Issue", + "Rule", + "Analysis", + "Conclusion", + "Decree", + ], + "instruction": "Classify the following {text_type} using the following definitions.\n\n{classes_descriptions}.\n\n", + "title_fields": ["text_type"], + "input_format": "{text_type}: {text}", + "target_prefix": "Label: ", }, - { - "abercrombie": "Type", - # "corporate_lobbying": "Label", - "international_citizenship_questions": "Answer", - "function_of_decision_section": "Function", - "proa": "Answer", + # "Answer the following questions considering the state of international law on January 1st, 2020. Answer Yes or No." + # https://github.com/HazyResearch/legalbench/blob/main/tasks/international_citizenship_questions/base_prompt.txt + "international_citizenship_questions": { + "non_task_entries": { + "label_field_name": "answer", + "text_field_name": "question", + }, + "classes_descriptions": "considering the state of international law on January 1st, 2020", + "type_of_class": "", + "text_type": "question", + "title_fields": ["text_type"], + "instruction": "Answer the following {text_type} {classes_descriptions}.\n", + "classes": ["Yes", "No"], + "input_format": "{text_type}: {text} Answer from one of {classes}.", + "target_prefix": "Answer: ", }, - { - "abercrombie": 5, - # "corporate_lobbying": 0, - "international_citizenship_questions": 5, - "function_of_decision_section": 5, - "proa": 5, + # You are a lobbyist analyzing Congressional bills for their impacts on companies. Given the title and summary of the bill, plus information on the company from its 10K SEC filing, it is your job to determine if a bill is at least somewhat relevant to a company in terms of whether it could impact the company's bottom-line if it was enacted (by saying Yes or No). + # https://github.com/HazyResearch/legalbench/blob/main/tasks/corporate_lobbying/base_prompt.txt + "corporate_lobbying": { + "non_task_entries": { + "label_field_name": "answer", + "text_field_name": "text", + "text_verbalizer": "Official title of bill: {bill_title}\nOfficial summary of bill: {bill_summary}\nCompany name: {company_name}\nCompany business description: {company_description}", + }, + "classes_descriptions": "You are a lobbyist analyzing Congressional bills for their impacts on companies.\nGiven the title and summary of the bill, plus information on the company from its 10K SEC filing, is a bill at least somewhat relevant to a company in terms of whether it could impact the company's bottom-line if it was enacted", + "type_of_class": "", + "text_type": "", + "instruction": "{classes_descriptions}, it is your job to determine {type_of_class} (by saying Yes or No).", + "classes": ["Yes", "No"], + "input_format": "{text}\nIs this bill potentially relevant to the company? FINAL ANSWER:", }, -] +} - -# 'Classify the following {text_type} on whether it is related to {class}. Answer "Yes" or "No".\n{text}\n' - -sub_tasks = list(field_ordering.keys()) -sub_task = sub_tasks[4] - -card = TaskCard( - loader=LoadHF(path="nguha/legalbench", name=sub_task), - preprocess_steps=[ - AddFields( - fields={ - # "we have the text field" - # "class": label_keys[sub_task], - "text_type": field_ordering[sub_task][0][1], - # "label": +for task_name, task_cfg in task_cfgs.items(): + card = TaskCard( + loader=LoadHF(path="nguha/legalbench", name=task_name), + preprocess_steps=( + [ + FormatText( + text=task_cfg["non_task_entries"]["text_verbalizer"], + to_field="text", + ) + ] + if task_cfg["non_task_entries"].get("text_verbalizer", False) + else [] + ) + + [ + RenameFields( + field_to_field={ + task_cfg["non_task_entries"]["text_field_name"]: "text", + task_cfg["non_task_entries"]["label_field_name"]: "label", + } + ), + AddFields( + fields={ + "text_type": task_cfg["text_type"], + "classes": task_cfg["classes"], + "type_of_class": task_cfg["type_of_class"], + "classes_descriptions": task_cfg["classes_descriptions"], + } + ), + ], + task="tasks.classification.multi_class.with_classes_descriptions", + templates=TemplatesDict( + { + "default": InputOutputTemplate( + input_format=task_cfg["input_format"], + output_format="{label}", + instruction=task_cfg["instruction"], + target_prefix=task_cfg.get("target_prefix", ""), + title_fields=task_cfg.get("title_fields", []), + postprocessors=[ + "processors.take_first_non_empty_line", + "processors.lower_case_till_punc", + ], + ), } ), - RenameFields(field_to_field={"labels": "class"}), - # This is still needed for YesNoTemplates, which does not handle empty labels - # MapInstanceValues(mappers={"labels": {"[]": ["none"]}}, strict=False), - # RenameFields(field_to_field={"labels": "label"}), - ], - task="tasks.classification.binary", - templates="", -) + ) -test_card(card, debug=True) -add_to_catalog(card, "cards.sst2", overwrite=True) + test_card(card, format="formats.textual_assistant") + add_to_catalog(card, f"cards.legalbench.{task_name}", overwrite=True) From b3e6a29a813c873e0f2dc6cfa69221aac4208806 Mon Sep 17 00:00:00 2001 From: Yotam Perlitz Date: Wed, 15 May 2024 16:12:39 +0300 Subject: [PATCH 5/6] add the classification.multi_class.with_classes_descriptions task task to the catalog Signed-off-by: Yotam Perlitz --- .../with_classes_descriptions.json | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 src/unitxt/catalog/tasks/classification/multi_class/with_classes_descriptions.json diff --git a/src/unitxt/catalog/tasks/classification/multi_class/with_classes_descriptions.json b/src/unitxt/catalog/tasks/classification/multi_class/with_classes_descriptions.json new file mode 100644 index 000000000..37add5a4d --- /dev/null +++ b/src/unitxt/catalog/tasks/classification/multi_class/with_classes_descriptions.json @@ -0,0 +1,22 @@ +{ + "type": "form_task", + "inputs": { + "text": "str", + "text_type": "str", + "classes": "List[str]", + "type_of_class": "str", + "classes_descriptions": "str" + }, + "outputs": { + "label": "str" + }, + "prediction_type": "str", + "metrics": [ + "metrics.f1_micro", + "metrics.accuracy", + "metrics.f1_macro" + ], + "augmentable_inputs": [ + "text" + ] +} From 50f562937d365e527d945c120001f230258aa517 Mon Sep 17 00:00:00 2001 From: Yotam Perlitz Date: Wed, 15 May 2024 16:13:03 +0300 Subject: [PATCH 6/6] add legalbench cards to the catalog Signed-off-by: Yotam Perlitz --- .../catalog/cards/legalbench/abercrombie.json | 47 +++++++++++++++++ .../cards/legalbench/corporate_lobbying.json | 49 ++++++++++++++++++ .../function_of_decision_section.json | 51 +++++++++++++++++++ .../international_citizenship_questions.json | 46 +++++++++++++++++ src/unitxt/catalog/cards/legalbench/proa.json | 46 +++++++++++++++++ 5 files changed, 239 insertions(+) create mode 100644 src/unitxt/catalog/cards/legalbench/abercrombie.json create mode 100644 src/unitxt/catalog/cards/legalbench/corporate_lobbying.json create mode 100644 src/unitxt/catalog/cards/legalbench/function_of_decision_section.json create mode 100644 src/unitxt/catalog/cards/legalbench/international_citizenship_questions.json create mode 100644 src/unitxt/catalog/cards/legalbench/proa.json diff --git a/src/unitxt/catalog/cards/legalbench/abercrombie.json b/src/unitxt/catalog/cards/legalbench/abercrombie.json new file mode 100644 index 000000000..94d30def6 --- /dev/null +++ b/src/unitxt/catalog/cards/legalbench/abercrombie.json @@ -0,0 +1,47 @@ +{ + "type": "task_card", + "loader": { + "type": "load_hf", + "path": "nguha/legalbench", + "name": "abercrombie" + }, + "preprocess_steps": [ + { + "type": "rename_fields", + "field_to_field": { + "text": "text", + "answer": "label" + } + }, + { + "type": "add_fields", + "fields": { + "text_type": "products", + "classes": [ + "generic", + "descriptive", + "suggestive", + "arbitrary", + "fanciful" + ], + "type_of_class": "type of mark", + "classes_descriptions": "A mark is generic if it is the common name for the product. A mark is descriptive if it describes a purpose, nature, or attribute of the product. A mark is suggestive if it suggests or implies a quality or characteristic of the product. A mark is arbitrary if it is a real English word that has no relation to the product. A mark is fanciful if it is an invented word." + } + } + ], + "task": "tasks.classification.multi_class.with_classes_descriptions", + "templates": { + "default": { + "type": "input_output_template", + "input_format": "Q: {text} What is the {type_of_class}?", + "output_format": "{label}", + "instruction": "{classes_descriptions}\n\nLabel the {type_of_class} for the following {text_type}:\n", + "target_prefix": "A: ", + "title_fields": [], + "postprocessors": [ + "processors.take_first_non_empty_line", + "processors.lower_case_till_punc" + ] + } + } +} diff --git a/src/unitxt/catalog/cards/legalbench/corporate_lobbying.json b/src/unitxt/catalog/cards/legalbench/corporate_lobbying.json new file mode 100644 index 000000000..e21f5b24c --- /dev/null +++ b/src/unitxt/catalog/cards/legalbench/corporate_lobbying.json @@ -0,0 +1,49 @@ +{ + "type": "task_card", + "loader": { + "type": "load_hf", + "path": "nguha/legalbench", + "name": "corporate_lobbying" + }, + "preprocess_steps": [ + { + "type": "format_text", + "text": "Official title of bill: {bill_title}\nOfficial summary of bill: {bill_summary}\nCompany name: {company_name}\nCompany business description: {company_description}", + "to_field": "text" + }, + { + "type": "rename_fields", + "field_to_field": { + "text": "text", + "answer": "label" + } + }, + { + "type": "add_fields", + "fields": { + "text_type": "", + "classes": [ + "Yes", + "No" + ], + "type_of_class": "", + "classes_descriptions": "You are a lobbyist analyzing Congressional bills for their impacts on companies.\nGiven the title and summary of the bill, plus information on the company from its 10K SEC filing, is a bill at least somewhat relevant to a company in terms of whether it could impact the company's bottom-line if it was enacted" + } + } + ], + "task": "tasks.classification.multi_class.with_classes_descriptions", + "templates": { + "default": { + "type": "input_output_template", + "input_format": "{text}\nIs this bill potentially relevant to the company? FINAL ANSWER:", + "output_format": "{label}", + "instruction": "{classes_descriptions}, it is your job to determine {type_of_class} (by saying Yes or No).", + "target_prefix": "", + "title_fields": [], + "postprocessors": [ + "processors.take_first_non_empty_line", + "processors.lower_case_till_punc" + ] + } + } +} diff --git a/src/unitxt/catalog/cards/legalbench/function_of_decision_section.json b/src/unitxt/catalog/cards/legalbench/function_of_decision_section.json new file mode 100644 index 000000000..72a0ae93d --- /dev/null +++ b/src/unitxt/catalog/cards/legalbench/function_of_decision_section.json @@ -0,0 +1,51 @@ +{ + "type": "task_card", + "loader": { + "type": "load_hf", + "path": "nguha/legalbench", + "name": "function_of_decision_section" + }, + "preprocess_steps": [ + { + "type": "rename_fields", + "field_to_field": { + "Paragraph": "text", + "answer": "label" + } + }, + { + "type": "add_fields", + "fields": { + "text_type": "text", + "classes": [ + "Facts", + "Procedural History", + "Issue", + "Rule", + "Analysis", + "Conclusion", + "Decree" + ], + "type_of_class": "", + "classes_descriptions": "- Facts: The paragraph describes the factual background that led up to the present lawsuit.\n- Procedural History: The paragraph describes the course of litigation that led to the current proceeding before the court.\n- Issue: The paragraph describes the legal or factual issue that must be resolved by the court.\n- Rule: The paragraph describes a rule of law relevant to resolving the issue.\n- Analysis: The paragraph analyzes the legal issue by applying the relevant legal principles to the facts of the present dispute.\n- Conclusion: The paragraph presents a conclusion of the court.\n- Decree: The paragraph constitutes a decree resolving the dispute" + } + } + ], + "task": "tasks.classification.multi_class.with_classes_descriptions", + "templates": { + "default": { + "type": "input_output_template", + "input_format": "{text_type}: {text}", + "output_format": "{label}", + "instruction": "Classify the following {text_type} using the following definitions.\n\n{classes_descriptions}.\n\n", + "target_prefix": "Label: ", + "title_fields": [ + "text_type" + ], + "postprocessors": [ + "processors.take_first_non_empty_line", + "processors.lower_case_till_punc" + ] + } + } +} diff --git a/src/unitxt/catalog/cards/legalbench/international_citizenship_questions.json b/src/unitxt/catalog/cards/legalbench/international_citizenship_questions.json new file mode 100644 index 000000000..b58647da2 --- /dev/null +++ b/src/unitxt/catalog/cards/legalbench/international_citizenship_questions.json @@ -0,0 +1,46 @@ +{ + "type": "task_card", + "loader": { + "type": "load_hf", + "path": "nguha/legalbench", + "name": "international_citizenship_questions" + }, + "preprocess_steps": [ + { + "type": "rename_fields", + "field_to_field": { + "question": "text", + "answer": "label" + } + }, + { + "type": "add_fields", + "fields": { + "text_type": "question", + "classes": [ + "Yes", + "No" + ], + "type_of_class": "", + "classes_descriptions": "considering the state of international law on January 1st, 2020" + } + } + ], + "task": "tasks.classification.multi_class.with_classes_descriptions", + "templates": { + "default": { + "type": "input_output_template", + "input_format": "{text_type}: {text} Answer from one of {classes}.", + "output_format": "{label}", + "instruction": "Answer the following {text_type} {classes_descriptions}.\n", + "target_prefix": "Answer: ", + "title_fields": [ + "text_type" + ], + "postprocessors": [ + "processors.take_first_non_empty_line", + "processors.lower_case_till_punc" + ] + } + } +} diff --git a/src/unitxt/catalog/cards/legalbench/proa.json b/src/unitxt/catalog/cards/legalbench/proa.json new file mode 100644 index 000000000..e66be7c32 --- /dev/null +++ b/src/unitxt/catalog/cards/legalbench/proa.json @@ -0,0 +1,46 @@ +{ + "type": "task_card", + "loader": { + "type": "load_hf", + "path": "nguha/legalbench", + "name": "proa" + }, + "preprocess_steps": [ + { + "type": "rename_fields", + "field_to_field": { + "text": "text", + "answer": "label" + } + }, + { + "type": "add_fields", + "fields": { + "text_type": "clause", + "classes": [ + "Yes", + "No" + ], + "type_of_class": "a private right of action", + "classes_descriptions": "A private right of action is when a regular person, a private citizen, is legally entitled to enforce their rights under a given statute" + } + } + ], + "task": "tasks.classification.multi_class.with_classes_descriptions", + "templates": { + "default": { + "type": "input_output_template", + "input_format": "{text_type}: {text}", + "output_format": "{label}", + "instruction": "{classes_descriptions}. Does the {text_type} specify {type_of_class}? Answer from one of {classes}", + "target_prefix": "A: ", + "title_fields": [ + "text_type" + ], + "postprocessors": [ + "processors.take_first_non_empty_line", + "processors.lower_case_till_punc" + ] + } + } +}