Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add 5 legalbench tasks (the 5 existing in HELM) #827

Merged
merged 8 commits into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 143 additions & 0 deletions prepare/cards/legalbench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
from unitxt.blocks import InputOutputTemplate, LoadHF, TaskCard, TemplatesDict
from unitxt.catalog import add_to_catalog
from unitxt.operators import AddFields, RenameFields
from unitxt.string_operators import FormatText
from unitxt.test_utils.card import test_card

task_cfgs = {
# "A mark is generic if it is the common name for the product. A mark is descriptive if it describes a purpose, nature, or attribute of the product. A mark is suggestive if it suggests or implies a quality or characteristic of the product. A mark is arbitrary if it is a real English word that has no relation to the product. A mark is fanciful if it is an invented word. Label the type of mark for the following products."
"abercrombie": {
"non_task_entries": {
"label_field_name": "answer",
"text_field_name": "text",
},
"classes_descriptions": "A mark is generic if it is the common name for the product. A mark is descriptive if it describes a purpose, nature, or attribute of the product. A mark is suggestive if it suggests or implies a quality or characteristic of the product. A mark is arbitrary if it is a real English word that has no relation to the product. A mark is fanciful if it is an invented word.",
"type_of_class": "type of mark",
"text_type": "products",
"classes": ["generic", "descriptive", "suggestive", "arbitrary", "fanciful"],
"instruction": "{classes_descriptions}\n\nLabel the {type_of_class} for the following {text_type}:\n",
"input_format": "Q: {text} What is the {type_of_class}?",
"target_prefix": "A: ",
},
# A private right of action is when a regular person, a private citizen, is legally entitled to enforce their rights under a given statute. Does the clause specify a private right of action? Answer Yes or No.
# https://github.com/HazyResearch/legalbench/blob/main/tasks/proa/base_prompt.txt
"proa": {
"non_task_entries": {
"label_field_name": "answer",
"text_field_name": "text",
},
"classes_descriptions": "A private right of action is when a regular person, a private citizen, is legally entitled to enforce their rights under a given statute",
"type_of_class": "a private right of action",
"text_type": "clause",
"instruction": "{classes_descriptions}. Does the {text_type} specify {type_of_class}? Answer from one of {classes}",
"classes": ["Yes", "No"],
"title_fields": ["text_type"],
"input_format": "{text_type}: {text}",
"target_prefix": "A: ",
},
# Classify the following text using the following definitions.\n\n- Facts: The paragraph describes the factual background that led up to the present lawsuit.\n- Procedural History: The paragraph describes the course of litigation that led to the current proceeding before the court.\n- Issue: The paragraph describes the legal or factual issue that must be resolved by the court.\n- Rule: The paragraph describes a rule of law relevant to resolving the issue.\n- Analysis: The paragraph analyzes the legal issue by applying the relevant legal principles to the facts of the present dispute.\n- Conclusion: The paragraph presents a conclusion of the court.\n- Decree: The paragraph constitutes a decree resolving the dispute."
# https://github.com/HazyResearch/legalbench/blob/main/tasks/function_of_decision_section/base_prompt.txt
"function_of_decision_section": {
"non_task_entries": {
"label_field_name": "answer",
"text_field_name": "Paragraph",
},
"classes_descriptions": "- Facts: The paragraph describes the factual background that led up to the present lawsuit.\n- Procedural History: The paragraph describes the course of litigation that led to the current proceeding before the court.\n- Issue: The paragraph describes the legal or factual issue that must be resolved by the court.\n- Rule: The paragraph describes a rule of law relevant to resolving the issue.\n- Analysis: The paragraph analyzes the legal issue by applying the relevant legal principles to the facts of the present dispute.\n- Conclusion: The paragraph presents a conclusion of the court.\n- Decree: The paragraph constitutes a decree resolving the dispute",
"type_of_class": "",
"text_type": "text",
"classes": [
"Facts",
"Procedural History",
"Issue",
"Rule",
"Analysis",
"Conclusion",
"Decree",
],
"instruction": "Classify the following {text_type} using the following definitions.\n\n{classes_descriptions}.\n\n",
"title_fields": ["text_type"],
"input_format": "{text_type}: {text}",
"target_prefix": "Label: ",
},
# "Answer the following questions considering the state of international law on January 1st, 2020. Answer Yes or No."
# https://github.com/HazyResearch/legalbench/blob/main/tasks/international_citizenship_questions/base_prompt.txt
"international_citizenship_questions": {
"non_task_entries": {
"label_field_name": "answer",
"text_field_name": "question",
},
"classes_descriptions": "considering the state of international law on January 1st, 2020",
"type_of_class": "",
"text_type": "question",
"title_fields": ["text_type"],
"instruction": "Answer the following {text_type} {classes_descriptions}.\n",
"classes": ["Yes", "No"],
"input_format": "{text_type}: {text} Answer from one of {classes}.",
"target_prefix": "Answer: ",
},
# You are a lobbyist analyzing Congressional bills for their impacts on companies. Given the title and summary of the bill, plus information on the company from its 10K SEC filing, it is your job to determine if a bill is at least somewhat relevant to a company in terms of whether it could impact the company's bottom-line if it was enacted (by saying Yes or No).
# https://github.com/HazyResearch/legalbench/blob/main/tasks/corporate_lobbying/base_prompt.txt
"corporate_lobbying": {
"non_task_entries": {
"label_field_name": "answer",
"text_field_name": "text",
"text_verbalizer": "Official title of bill: {bill_title}\nOfficial summary of bill: {bill_summary}\nCompany name: {company_name}\nCompany business description: {company_description}",
},
"classes_descriptions": "You are a lobbyist analyzing Congressional bills for their impacts on companies.\nGiven the title and summary of the bill, plus information on the company from its 10K SEC filing, is a bill at least somewhat relevant to a company in terms of whether it could impact the company's bottom-line if it was enacted",
"type_of_class": "",
"text_type": "",
"instruction": "{classes_descriptions}, it is your job to determine {type_of_class} (by saying Yes or No).",
"classes": ["Yes", "No"],
"input_format": "{text}\nIs this bill potentially relevant to the company? FINAL ANSWER:",
},
}

for task_name, task_cfg in task_cfgs.items():
card = TaskCard(
loader=LoadHF(path="nguha/legalbench", name=task_name),
preprocess_steps=(
[
FormatText(
text=task_cfg["non_task_entries"]["text_verbalizer"],
to_field="text",
)
]
if task_cfg["non_task_entries"].get("text_verbalizer", False)
else []
)
+ [
RenameFields(
field_to_field={
task_cfg["non_task_entries"]["text_field_name"]: "text",
task_cfg["non_task_entries"]["label_field_name"]: "label",
}
),
AddFields(
fields={
"text_type": task_cfg["text_type"],
"classes": task_cfg["classes"],
"type_of_class": task_cfg["type_of_class"],
"classes_descriptions": task_cfg["classes_descriptions"],
}
),
],
task="tasks.classification.multi_class.with_classes_descriptions",
templates=TemplatesDict(
{
"default": InputOutputTemplate(
input_format=task_cfg["input_format"],
output_format="{label}",
instruction=task_cfg["instruction"],
target_prefix=task_cfg.get("target_prefix", ""),
title_fields=task_cfg.get("title_fields", []),
postprocessors=[
"processors.take_first_non_empty_line",
"processors.lower_case_till_punc",
],
),
}
),
)

test_card(card, format="formats.textual_assistant")
add_to_catalog(card, f"cards.legalbench.{task_name}", overwrite=True)
19 changes: 19 additions & 0 deletions prepare/tasks/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,22 @@
"tasks.classification.multi_class.relation",
overwrite=True,
)


add_to_catalog(
FormTask(
inputs={
"text": "str",
"text_type": "str",
"classes": "List[str]",
"type_of_class": "str",
"classes_descriptions": "str",
},
outputs={"label": "str"},
prediction_type="str",
metrics=["metrics.f1_micro", "metrics.accuracy", "metrics.f1_macro"],
augmentable_inputs=["text"],
),
"tasks.classification.multi_class.with_classes_descriptions",
overwrite=True,
)
47 changes: 47 additions & 0 deletions src/unitxt/catalog/cards/legalbench/abercrombie.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
{
"type": "task_card",
"loader": {
"type": "load_hf",
"path": "nguha/legalbench",
"name": "abercrombie"
},
"preprocess_steps": [
{
"type": "rename_fields",
"field_to_field": {
"text": "text",
"answer": "label"
}
},
{
"type": "add_fields",
"fields": {
"text_type": "products",
"classes": [
"generic",
"descriptive",
"suggestive",
"arbitrary",
"fanciful"
],
"type_of_class": "type of mark",
"classes_descriptions": "A mark is generic if it is the common name for the product. A mark is descriptive if it describes a purpose, nature, or attribute of the product. A mark is suggestive if it suggests or implies a quality or characteristic of the product. A mark is arbitrary if it is a real English word that has no relation to the product. A mark is fanciful if it is an invented word."
}
}
],
"task": "tasks.classification.multi_class.with_classes_descriptions",
"templates": {
"default": {
"type": "input_output_template",
"input_format": "Q: {text} What is the {type_of_class}?",
"output_format": "{label}",
"instruction": "{classes_descriptions}\n\nLabel the {type_of_class} for the following {text_type}:\n",
"target_prefix": "A: ",
"title_fields": [],
"postprocessors": [
"processors.take_first_non_empty_line",
"processors.lower_case_till_punc"
]
}
}
}
49 changes: 49 additions & 0 deletions src/unitxt/catalog/cards/legalbench/corporate_lobbying.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{
"type": "task_card",
"loader": {
"type": "load_hf",
"path": "nguha/legalbench",
"name": "corporate_lobbying"
},
"preprocess_steps": [
{
"type": "format_text",
"text": "Official title of bill: {bill_title}\nOfficial summary of bill: {bill_summary}\nCompany name: {company_name}\nCompany business description: {company_description}",
"to_field": "text"
},
{
"type": "rename_fields",
"field_to_field": {
"text": "text",
"answer": "label"
}
},
{
"type": "add_fields",
"fields": {
"text_type": "",
"classes": [
"Yes",
"No"
],
"type_of_class": "",
"classes_descriptions": "You are a lobbyist analyzing Congressional bills for their impacts on companies.\nGiven the title and summary of the bill, plus information on the company from its 10K SEC filing, is a bill at least somewhat relevant to a company in terms of whether it could impact the company's bottom-line if it was enacted"
}
}
],
"task": "tasks.classification.multi_class.with_classes_descriptions",
"templates": {
"default": {
"type": "input_output_template",
"input_format": "{text}\nIs this bill potentially relevant to the company? FINAL ANSWER:",
"output_format": "{label}",
"instruction": "{classes_descriptions}, it is your job to determine {type_of_class} (by saying Yes or No).",
"target_prefix": "",
"title_fields": [],
"postprocessors": [
"processors.take_first_non_empty_line",
"processors.lower_case_till_punc"
]
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{
"type": "task_card",
"loader": {
"type": "load_hf",
"path": "nguha/legalbench",
"name": "function_of_decision_section"
},
"preprocess_steps": [
{
"type": "rename_fields",
"field_to_field": {
"Paragraph": "text",
"answer": "label"
}
},
{
"type": "add_fields",
"fields": {
"text_type": "text",
"classes": [
"Facts",
"Procedural History",
"Issue",
"Rule",
"Analysis",
"Conclusion",
"Decree"
],
"type_of_class": "",
"classes_descriptions": "- Facts: The paragraph describes the factual background that led up to the present lawsuit.\n- Procedural History: The paragraph describes the course of litigation that led to the current proceeding before the court.\n- Issue: The paragraph describes the legal or factual issue that must be resolved by the court.\n- Rule: The paragraph describes a rule of law relevant to resolving the issue.\n- Analysis: The paragraph analyzes the legal issue by applying the relevant legal principles to the facts of the present dispute.\n- Conclusion: The paragraph presents a conclusion of the court.\n- Decree: The paragraph constitutes a decree resolving the dispute"
}
}
],
"task": "tasks.classification.multi_class.with_classes_descriptions",
"templates": {
"default": {
"type": "input_output_template",
"input_format": "{text_type}: {text}",
"output_format": "{label}",
"instruction": "Classify the following {text_type} using the following definitions.\n\n{classes_descriptions}.\n\n",
"target_prefix": "Label: ",
"title_fields": [
"text_type"
],
"postprocessors": [
"processors.take_first_non_empty_line",
"processors.lower_case_till_punc"
]
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{
"type": "task_card",
"loader": {
"type": "load_hf",
"path": "nguha/legalbench",
"name": "international_citizenship_questions"
},
"preprocess_steps": [
{
"type": "rename_fields",
"field_to_field": {
"question": "text",
"answer": "label"
}
},
{
"type": "add_fields",
"fields": {
"text_type": "question",
"classes": [
"Yes",
"No"
],
"type_of_class": "",
"classes_descriptions": "considering the state of international law on January 1st, 2020"
}
}
],
"task": "tasks.classification.multi_class.with_classes_descriptions",
"templates": {
"default": {
"type": "input_output_template",
"input_format": "{text_type}: {text} Answer from one of {classes}.",
"output_format": "{label}",
"instruction": "Answer the following {text_type} {classes_descriptions}.\n",
"target_prefix": "Answer: ",
"title_fields": [
"text_type"
],
"postprocessors": [
"processors.take_first_non_empty_line",
"processors.lower_case_till_punc"
]
}
}
}
Loading
Loading