Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added defaults to tasks #921

Merged
merged 7 commits into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 2 additions & 8 deletions prepare/cards/20_newsgroups.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,9 @@
),
RenameFields(field_to_field={"label_text": "label"}),
MapInstanceValues(mappers={"label": map_labels}),
Set(
fields={
"classes": list(map_labels.values()),
"text_type": "text",
"type_of_class": "topic",
}
),
Set(fields={"classes": list(map_labels.values())}),
],
task="tasks.classification.multi_class",
task="tasks.classification.multi_class.topic_classification",
templates="templates.classification.multi_class.all",
__tags__={"region": "us"},
__description__=(
Expand Down
10 changes: 2 additions & 8 deletions prepare/cards/20newsgroups_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,9 @@
),
RenameFields(field_to_field={"data": "text", "target": "label"}),
MapInstanceValues(mappers={"label": map_labels}),
Set(
fields={
"classes": list(map_labels.values()),
"text_type": "text",
"type_of_class": "topic",
pawelknes marked this conversation as resolved.
Show resolved Hide resolved
}
),
Set(fields={"classes": list(map_labels.values())}),
],
task="tasks.classification.multi_class",
task="tasks.classification.multi_class.topic_classification",
templates="templates.classification.multi_class.all",
)

Expand Down
10 changes: 2 additions & 8 deletions prepare/cards/CFPB_product.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,9 @@
),
RenameFields(field_to_field=field_to_field[subset]),
MapInstanceValues(mappers={"label": mappers[subset]}),
Set(
fields={
"classes": list(mappers[subset].values()),
"text_type": "text",
"type_of_class": "topic",
}
),
Set(fields={"classes": list(mappers[subset].values())}),
],
task="tasks.classification.multi_class",
task="tasks.classification.multi_class.topic_classification",
templates="templates.classification.multi_class.all",
)
test_card(card, debug=False)
Expand Down
3 changes: 1 addition & 2 deletions prepare/cards/ag_news.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,10 @@
fields={
"classes": classlabels.names,
"text_type": "sentence",
"type_of_class": "topic",
}
),
],
task="tasks.classification.multi_class",
task="tasks.classification.multi_class.topic_classification",
templates="templates.classification.multi_class.all",
__tags__={
"annotations_creators": "found",
Expand Down
3 changes: 1 addition & 2 deletions prepare/cards/argument_topic.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,10 @@
fields={
"classes": class_names,
"text_type": "argument", # TODO maybe text?
"type_of_class": "topic",
}
),
],
task="tasks.classification.multi_class",
task="tasks.classification.multi_class.topic_classification",
templates="templates.classification.multi_class.all",
__tags__={
"arxiv": "1911.11408",
Expand Down
8 changes: 1 addition & 7 deletions prepare/cards/atis.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,7 @@
get_default=[],
not_exist_ok=True,
),
Set(
fields={
"text_type": "text",
"class_type": "entity type",
"classes": classes,
}
),
Set(fields={"classes": classes}),
],
task="tasks.span_labeling.extraction",
templates="templates.span_labeling.extraction.all",
Expand Down
3 changes: 1 addition & 2 deletions prepare/cards/billsum.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from unitxt import add_to_catalog
from unitxt.blocks import Set, TaskCard
from unitxt.blocks import TaskCard
from unitxt.loaders import LoadHF
from unitxt.operators import RenameFields
from unitxt.splitters import SplitRandomMix
Expand All @@ -13,7 +13,6 @@
{"train": "train[87.5%]", "validation": "train[12.5%]", "test": "test"}
),
RenameFields(field_to_field={"text": "document"}),
Set(fields={"document_type": "document"}),
],
task="tasks.summarization.abstractive",
templates="templates.summarization.abstractive.all",
Expand Down
3 changes: 1 addition & 2 deletions prepare/cards/claim_stance_topic.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,10 @@
fields={
"classes": class_names,
"text_type": "argument", # TODO maybe text?
"type_of_class": "topic",
}
),
],
task="tasks.classification.multi_class",
task="tasks.classification.multi_class.topic_classification",
templates="templates.classification.multi_class.all",
__tags__={
"language": "en",
Expand Down
1 change: 0 additions & 1 deletion prepare/cards/coedit.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@
Set(
fields={
"class": "Grammatically incorrect",
"text_type": "text",
}
),
Shuffle(page_size=sys.maxsize),
Expand Down
1 change: 0 additions & 1 deletion prepare/cards/cola.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
Set(
fields={
"classes": ["unacceptable", "acceptable"],
"text_type": "text",
"type_of_class": "grammatical acceptability",
}
),
Expand Down
2 changes: 1 addition & 1 deletion prepare/cards/dart.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
SerializeTriples(field_to_field=[["tripleset", "serialized_triples"]]),
RenameFields(field_to_field={"serialized_triples": "input"}),
Copy(field="annotations/text/0", to_field="output"),
Set(fields={"type_of_input": "Triples", "type_of_output": "Text"}),
Set(fields={"type_of_input": "Triples"}),
],
task="tasks.generation",
templates="templates.generation.all",
Expand Down
3 changes: 1 addition & 2 deletions prepare/cards/dbpedia_14.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,10 @@
fields={
"classes": classes,
"text_type": "paragraph",
"type_of_class": "topic",
}
),
],
task="tasks.classification.multi_class",
task="tasks.classification.multi_class.topic_classification",
templates="templates.classification.multi_class.all",
__tags__={
"annotations_creators": "machine-generated",
Expand Down
3 changes: 1 addition & 2 deletions prepare/cards/financial_tweets.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,10 @@
fields={
"classes": list(mappers.values()),
"text_type": "tweet",
"type_of_class": "topic",
}
),
],
task="tasks.classification.multi_class",
task="tasks.classification.multi_class.topic_classification",
templates="templates.classification.multi_class.all",
__tags__={
"annotations_creators": "other",
Expand Down
1 change: 0 additions & 1 deletion prepare/cards/go_emotions.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
Set(
fields={
"classes": classes,
"text_type": "text",
"type_of_classes": "emotions",
}
),
Expand Down
3 changes: 1 addition & 2 deletions prepare/cards/head_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,10 @@
fields={
"classes": categories,
"text_type": "question",
"type_of_class": "topic",
}
),
],
task="tasks.classification.multi_class",
task="tasks.classification.multi_class.topic_classification",
templates="templates.classification.multi_class.all",
__description__=(
"HEAD-QA is a multi-choice HEAlthcare Dataset. The questions come from exams to access a specialized position in the Spanish healthcare system, and are challenging even for highly specialized humans. They are designed by the Ministerio de Sanidad, Consumo y Bienestar Social. The dataset contains questions about the following topics: medicine, nursing, psychology, chemistry, pharmacology and biology… See the full description on the dataset page: https://huggingface.co/datasets/head_qa"
Expand Down
10 changes: 2 additions & 8 deletions prepare/cards/law_stack_exchange.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,9 @@
RenameFields(field_to_field={"text_label": "label"}),
ListFieldValues(fields=["title", "body"], to_field="text"),
JoinStr(separator=". ", field="text", to_field="text"),
Set(
fields={
"classes": classlabels,
"text_type": "text",
"type_of_class": "topic",
}
),
Set(fields={"classes": classlabels}),
],
task="tasks.classification.multi_class",
task="tasks.classification.multi_class.topic_classification",
templates="templates.classification.multi_class.all",
__tags__={
"flags": ["law", "stackexchange"],
Expand Down
1 change: 0 additions & 1 deletion prepare/cards/ledgar.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
Set(
fields={
"classes": classlabels.names,
"text_type": "text",
"type_of_class": "contractual clauses",
}
),
Expand Down
3 changes: 1 addition & 2 deletions prepare/cards/medical_abstracts.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,10 @@
fields={
"classes": list(mappers.values()),
"text_type": "abstract",
"type_of_class": "topic",
}
),
],
task="tasks.classification.multi_class",
task="tasks.classification.multi_class.topic_classification",
templates="templates.classification.multi_class.all",
)
test_card(card, debug=False)
Expand Down
2 changes: 0 additions & 2 deletions prepare/cards/mlsum.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from unitxt.blocks import (
LoadHF,
RenameFields,
Set,
TaskCard,
)
from unitxt.catalog import add_to_catalog
Expand All @@ -21,7 +20,6 @@
loader=LoadHF(path="mlsum", name=lang),
preprocess_steps=[
RenameFields(field_to_field={"text": "document"}),
Set(fields={"document_type": "document"}),
],
task="tasks.summarization.abstractive",
templates="templates.summarization.abstractive.all",
Expand Down
3 changes: 1 addition & 2 deletions prepare/cards/news_category_classification_headline.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,10 @@
fields={
"classes": classlabels,
"text_type": "sentence",
"type_of_class": "topic",
}
),
],
task="tasks.classification.multi_class",
task="tasks.classification.multi_class.topic_classification",
templates="templates.classification.multi_class.all",
)
test_card(card, debug=False)
Expand Down
8 changes: 1 addition & 7 deletions prepare/cards/reuters21578.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,13 +149,7 @@
{"train": "train[85%]", "validation": "train[15%]", "test": "test"}
),
RenameFields(field_to_field={"topics": "labels"}),
Set(
fields={
"classes": classlabels[subset],
"text_type": "text",
"type_of_classes": "topics",
}
),
Set(fields={"classes": classlabels[subset], "type_of_classes": "topics"}),
],
task="tasks.classification.multi_label",
templates="templates.classification.multi_label.all",
Expand Down
6 changes: 2 additions & 4 deletions prepare/cards/stsb.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,9 @@
"label": "attribute_value",
}
),
Set(
fields={"attribute_name": "similarity", "min_value": 1.0, "max_value": 5.0}
),
Set(fields={"min_value": 1.0, "max_value": 5.0}),
],
task="tasks.regression.two_texts",
task="tasks.regression.two_texts.similarity",
templates="templates.regression.two_texts.all",
__tags__={
"annotations_creators": "other",
Expand Down
3 changes: 1 addition & 2 deletions prepare/cards/tldr.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from unitxt import add_to_catalog
from unitxt.blocks import Set, SplitRandomMix, TaskCard
from unitxt.blocks import SplitRandomMix, TaskCard
from unitxt.loaders import LoadHF
from unitxt.operators import RenameFields
from unitxt.test_utils.card import test_card
Expand All @@ -9,7 +9,6 @@
preprocess_steps=[
SplitRandomMix({"train": "train[50%]", "test": "train[50%]"}),
RenameFields(field_to_field={"content": "document"}),
Set(fields={"document_type": "document"}),
],
task="tasks.summarization.abstractive",
templates="templates.summarization.abstractive.all",
Expand Down
1 change: 0 additions & 1 deletion prepare/cards/toxigen.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
Set(
fields={
"classes": ["not toxic", "toxic"],
"text_type": "text",
"type_of_class": "toxicity",
}
),
Expand Down
1 change: 0 additions & 1 deletion prepare/cards/unfair_tos.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
Set(
fields={
"classes": classlabels.feature.names,
"text_type": "text",
"type_of_classes": "contractual clauses",
}
),
Expand Down
2 changes: 0 additions & 2 deletions prepare/cards/universal_ner.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,6 @@
),
Set(
fields={
"text_type": "text",
"class_type": "entity type",
"classes": ["Person", "Organization", "Location"],
}
),
Expand Down
2 changes: 1 addition & 1 deletion prepare/cards/wiki_bio.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
),
SerializeKeyValPairs(field_to_field=[["kvpairs", "input"]]),
RenameFields(field_to_field={"target_text": "output"}),
Set(fields={"type_of_input": "Key-Value pairs", "type_of_output": "Text"}),
Set(fields={"type_of_input": "Key-Value pairs"}),
],
task="tasks.generation",
templates="templates.generation.all",
Expand Down
2 changes: 0 additions & 2 deletions prepare/cards/xlsum.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from unitxt.blocks import (
LoadHF,
RenameFields,
Set,
TaskCard,
)
from unitxt.catalog import add_to_catalog
Expand All @@ -19,7 +18,6 @@
loader=LoadHF(path="GEM/xlsum", name=lang),
preprocess_steps=[
RenameFields(field_to_field={"text": "document", "target": "summary"}),
Set(fields={"document_type": "document"}),
],
task="tasks.summarization.abstractive",
templates="templates.summarization.abstractive.all",
Expand Down
4 changes: 0 additions & 4 deletions prepare/cards/xsum.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,12 @@
from unitxt.blocks import (
LoadHF,
Set,
TaskCard,
)
from unitxt.catalog import add_to_catalog
from unitxt.test_utils.card import test_card

card = TaskCard(
loader=LoadHF(path="EdinburghNLP/xsum"),
preprocess_steps=[
Set(fields={"document_type": "document"}),
],
task="tasks.summarization.abstractive",
templates="templates.summarization.abstractive.all",
__tags__={
Expand Down
10 changes: 2 additions & 8 deletions prepare/cards/yahoo_answers_topics.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,9 @@
to_field="text",
),
JoinStr(separator=" ", field="text", to_field="text"),
Set(
fields={
"classes": classes,
"text_type": "text",
"type_of_class": "topic",
}
),
Set(fields={"classes": classes}),
],
task="tasks.classification.multi_class",
task="tasks.classification.multi_class.topic_classification",
templates="templates.classification.multi_class.all",
__tags__={
"annotations_creators": "found",
Expand Down
Loading
Loading