IBM · pawelknes · Jun 24, 2024 · Jun 19, 2024 · Jun 21, 2024 · Jun 21, 2024
diff --git a/prepare/cards/20_newsgroups.py b/prepare/cards/20_newsgroups.py
@@ -44,15 +44,9 @@
         ),
         RenameFields(field_to_field={"label_text": "label"}),
         MapInstanceValues(mappers={"label": map_labels}),
-        Set(
-            fields={
-                "classes": list(map_labels.values()),
-                "text_type": "text",
-                "type_of_class": "topic",
-            }
-        ),
+        Set(fields={"classes": list(map_labels.values())}),
     ],
-    task="tasks.classification.multi_class",
+    task="tasks.classification.multi_class.topic_classification",
     templates="templates.classification.multi_class.all",
     __tags__={"region": "us"},
     __description__=(

diff --git a/prepare/cards/20newsgroups_sklearn.py b/prepare/cards/20newsgroups_sklearn.py
@@ -42,15 +42,9 @@
         ),
         RenameFields(field_to_field={"data": "text", "target": "label"}),
         MapInstanceValues(mappers={"label": map_labels}),
-        Set(
-            fields={
-                "classes": list(map_labels.values()),
-                "text_type": "text",
-                "type_of_class": "topic",
-            }
-        ),
+        Set(fields={"classes": list(map_labels.values())}),
     ],
-    task="tasks.classification.multi_class",
+    task="tasks.classification.multi_class.topic_classification",
     templates="templates.classification.multi_class.all",
 )
 

diff --git a/prepare/cards/CFPB_product.py b/prepare/cards/CFPB_product.py
@@ -51,15 +51,9 @@
             ),
             RenameFields(field_to_field=field_to_field[subset]),
             MapInstanceValues(mappers={"label": mappers[subset]}),
-            Set(
-                fields={
-                    "classes": list(mappers[subset].values()),
-                    "text_type": "text",
-                    "type_of_class": "topic",
-                }
-            ),
+            Set(fields={"classes": list(mappers[subset].values())}),
         ],
-        task="tasks.classification.multi_class",
+        task="tasks.classification.multi_class.topic_classification",
         templates="templates.classification.multi_class.all",
     )
     test_card(card, debug=False)

diff --git a/prepare/cards/ag_news.py b/prepare/cards/ag_news.py
@@ -30,11 +30,10 @@
             fields={
                 "classes": classlabels.names,
                 "text_type": "sentence",
-                "type_of_class": "topic",
             }
         ),
     ],
-    task="tasks.classification.multi_class",
+    task="tasks.classification.multi_class.topic_classification",
     templates="templates.classification.multi_class.all",
     __tags__={
         "annotations_creators": "found",

diff --git a/prepare/cards/argument_topic.py b/prepare/cards/argument_topic.py
@@ -85,11 +85,10 @@
             fields={
                 "classes": class_names,
                 "text_type": "argument",  # TODO maybe text?
-                "type_of_class": "topic",
             }
         ),
     ],
-    task="tasks.classification.multi_class",
+    task="tasks.classification.multi_class.topic_classification",
     templates="templates.classification.multi_class.all",
     __tags__={
         "arxiv": "1911.11408",

diff --git a/prepare/cards/atis.py b/prepare/cards/atis.py
@@ -117,13 +117,7 @@
             get_default=[],
             not_exist_ok=True,
         ),
-        Set(
-            fields={
-                "text_type": "text",
-                "class_type": "entity type",
-                "classes": classes,
-            }
-        ),
+        Set(fields={"classes": classes}),
     ],
     task="tasks.span_labeling.extraction",
     templates="templates.span_labeling.extraction.all",

diff --git a/prepare/cards/billsum.py b/prepare/cards/billsum.py
@@ -1,5 +1,5 @@
 from unitxt import add_to_catalog
-from unitxt.blocks import Set, TaskCard
+from unitxt.blocks import TaskCard
 from unitxt.loaders import LoadHF
 from unitxt.operators import RenameFields
 from unitxt.splitters import SplitRandomMix
@@ -13,7 +13,6 @@
             {"train": "train[87.5%]", "validation": "train[12.5%]", "test": "test"}
         ),
         RenameFields(field_to_field={"text": "document"}),
-        Set(fields={"document_type": "document"}),
     ],
     task="tasks.summarization.abstractive",
     templates="templates.summarization.abstractive.all",

diff --git a/prepare/cards/claim_stance_topic.py b/prepare/cards/claim_stance_topic.py
@@ -70,11 +70,10 @@
             fields={
                 "classes": class_names,
                 "text_type": "argument",  # TODO maybe text?
-                "type_of_class": "topic",
             }
         ),
     ],
-    task="tasks.classification.multi_class",
+    task="tasks.classification.multi_class.topic_classification",
     templates="templates.classification.multi_class.all",
     __tags__={
         "language": "en",

diff --git a/prepare/cards/coedit.py b/prepare/cards/coedit.py
@@ -73,7 +73,6 @@
         Set(
             fields={
                 "class": "Grammatically incorrect",
-                "text_type": "text",
             }
         ),
         Shuffle(page_size=sys.maxsize),

diff --git a/prepare/cards/cola.py b/prepare/cards/cola.py
@@ -17,7 +17,6 @@
         Set(
             fields={
                 "classes": ["unacceptable", "acceptable"],
-                "text_type": "text",
                 "type_of_class": "grammatical acceptability",
             }
         ),

diff --git a/prepare/cards/dart.py b/prepare/cards/dart.py
@@ -16,7 +16,7 @@
         SerializeTriples(field_to_field=[["tripleset", "serialized_triples"]]),
         RenameFields(field_to_field={"serialized_triples": "input"}),
         Copy(field="annotations/text/0", to_field="output"),
-        Set(fields={"type_of_input": "Triples", "type_of_output": "Text"}),
+        Set(fields={"type_of_input": "Triples"}),
     ],
     task="tasks.generation",
     templates="templates.generation.all",

diff --git a/prepare/cards/dbpedia_14.py b/prepare/cards/dbpedia_14.py
@@ -46,11 +46,10 @@
             fields={
                 "classes": classes,
                 "text_type": "paragraph",
-                "type_of_class": "topic",
             }
         ),
     ],
-    task="tasks.classification.multi_class",
+    task="tasks.classification.multi_class.topic_classification",
     templates="templates.classification.multi_class.all",
     __tags__={
         "annotations_creators": "machine-generated",

diff --git a/prepare/cards/financial_tweets.py b/prepare/cards/financial_tweets.py
@@ -53,11 +53,10 @@
             fields={
                 "classes": list(mappers.values()),
                 "text_type": "tweet",
-                "type_of_class": "topic",
             }
         ),
     ],
-    task="tasks.classification.multi_class",
+    task="tasks.classification.multi_class.topic_classification",
     templates="templates.classification.multi_class.all",
     __tags__={
         "annotations_creators": "other",

diff --git a/prepare/cards/go_emotions.py b/prepare/cards/go_emotions.py
@@ -23,7 +23,6 @@
         Set(
             fields={
                 "classes": classes,
-                "text_type": "text",
                 "type_of_classes": "emotions",
             }
         ),

diff --git a/prepare/cards/head_qa.py b/prepare/cards/head_qa.py
@@ -32,11 +32,10 @@
                 fields={
                     "classes": categories,
                     "text_type": "question",
-                    "type_of_class": "topic",
                 }
             ),
         ],
-        task="tasks.classification.multi_class",
+        task="tasks.classification.multi_class.topic_classification",
         templates="templates.classification.multi_class.all",
         __description__=(
             "HEAD-QA is a multi-choice HEAlthcare Dataset. The questions come from exams to access a specialized position in the Spanish healthcare system, and are challenging even for highly specialized humans. They are designed by the Ministerio de Sanidad, Consumo y Bienestar Social. The dataset contains questions about the following topics: medicine, nursing, psychology, chemistry, pharmacology and biology… See the full description on the dataset page: https://huggingface.co/datasets/head_qa"

diff --git a/prepare/cards/law_stack_exchange.py b/prepare/cards/law_stack_exchange.py
@@ -43,15 +43,9 @@
         RenameFields(field_to_field={"text_label": "label"}),
         ListFieldValues(fields=["title", "body"], to_field="text"),
         JoinStr(separator=". ", field="text", to_field="text"),
-        Set(
-            fields={
-                "classes": classlabels,
-                "text_type": "text",
-                "type_of_class": "topic",
-            }
-        ),
+        Set(fields={"classes": classlabels}),
     ],
-    task="tasks.classification.multi_class",
+    task="tasks.classification.multi_class.topic_classification",
     templates="templates.classification.multi_class.all",
     __tags__={
         "flags": ["law", "stackexchange"],

diff --git a/prepare/cards/ledgar.py b/prepare/cards/ledgar.py
@@ -24,7 +24,6 @@
         Set(
             fields={
                 "classes": classlabels.names,
-                "text_type": "text",
                 "type_of_class": "contractual clauses",
             }
         ),

diff --git a/prepare/cards/medical_abstracts.py b/prepare/cards/medical_abstracts.py
@@ -39,11 +39,10 @@
             fields={
                 "classes": list(mappers.values()),
                 "text_type": "abstract",
-                "type_of_class": "topic",
             }
         ),
     ],
-    task="tasks.classification.multi_class",
+    task="tasks.classification.multi_class.topic_classification",
     templates="templates.classification.multi_class.all",
 )
 test_card(card, debug=False)

diff --git a/prepare/cards/mlsum.py b/prepare/cards/mlsum.py
@@ -2,7 +2,6 @@
 from unitxt.blocks import (
     LoadHF,
     RenameFields,
-    Set,
     TaskCard,
 )
 from unitxt.catalog import add_to_catalog
@@ -21,7 +20,6 @@
         loader=LoadHF(path="mlsum", name=lang),
         preprocess_steps=[
             RenameFields(field_to_field={"text": "document"}),
-            Set(fields={"document_type": "document"}),
         ],
         task="tasks.summarization.abstractive",
         templates="templates.summarization.abstractive.all",

diff --git a/prepare/cards/news_category_classification_headline.py b/prepare/cards/news_category_classification_headline.py
@@ -74,11 +74,10 @@
             fields={
                 "classes": classlabels,
                 "text_type": "sentence",
-                "type_of_class": "topic",
             }
         ),
     ],
-    task="tasks.classification.multi_class",
+    task="tasks.classification.multi_class.topic_classification",
     templates="templates.classification.multi_class.all",
 )
 test_card(card, debug=False)

diff --git a/prepare/cards/reuters21578.py b/prepare/cards/reuters21578.py
@@ -149,13 +149,7 @@
                 {"train": "train[85%]", "validation": "train[15%]", "test": "test"}
             ),
             RenameFields(field_to_field={"topics": "labels"}),
-            Set(
-                fields={
-                    "classes": classlabels[subset],
-                    "text_type": "text",
-                    "type_of_classes": "topics",
-                }
-            ),
+            Set(fields={"classes": classlabels[subset], "type_of_classes": "topics"}),
         ],
         task="tasks.classification.multi_label",
         templates="templates.classification.multi_label.all",

diff --git a/prepare/cards/stsb.py b/prepare/cards/stsb.py
@@ -21,11 +21,9 @@
                 "label": "attribute_value",
             }
         ),
-        Set(
-            fields={"attribute_name": "similarity", "min_value": 1.0, "max_value": 5.0}
-        ),
+        Set(fields={"min_value": 1.0, "max_value": 5.0}),
     ],
-    task="tasks.regression.two_texts",
+    task="tasks.regression.two_texts.similarity",
     templates="templates.regression.two_texts.all",
     __tags__={
         "annotations_creators": "other",

diff --git a/prepare/cards/tldr.py b/prepare/cards/tldr.py
@@ -1,5 +1,5 @@
 from unitxt import add_to_catalog
-from unitxt.blocks import Set, SplitRandomMix, TaskCard
+from unitxt.blocks import SplitRandomMix, TaskCard
 from unitxt.loaders import LoadHF
 from unitxt.operators import RenameFields
 from unitxt.test_utils.card import test_card
@@ -9,7 +9,6 @@
     preprocess_steps=[
         SplitRandomMix({"train": "train[50%]", "test": "train[50%]"}),
         RenameFields(field_to_field={"content": "document"}),
-        Set(fields={"document_type": "document"}),
     ],
     task="tasks.summarization.abstractive",
     templates="templates.summarization.abstractive.all",

diff --git a/prepare/cards/toxigen.py b/prepare/cards/toxigen.py
@@ -21,7 +21,6 @@
         Set(
             fields={
                 "classes": ["not toxic", "toxic"],
-                "text_type": "text",
                 "type_of_class": "toxicity",
             }
         ),

diff --git a/prepare/cards/unfair_tos.py b/prepare/cards/unfair_tos.py
@@ -25,7 +25,6 @@
         Set(
             fields={
                 "classes": classlabels.feature.names,
-                "text_type": "text",
                 "type_of_classes": "contractual clauses",
             }
         ),

diff --git a/prepare/cards/universal_ner.py b/prepare/cards/universal_ner.py
@@ -71,8 +71,6 @@
             ),
             Set(
                 fields={
-                    "text_type": "text",
-                    "class_type": "entity type",
                     "classes": ["Person", "Organization", "Location"],
                 }
             ),

diff --git a/prepare/cards/wiki_bio.py b/prepare/cards/wiki_bio.py
@@ -20,7 +20,7 @@
         ),
         SerializeKeyValPairs(field_to_field=[["kvpairs", "input"]]),
         RenameFields(field_to_field={"target_text": "output"}),
-        Set(fields={"type_of_input": "Key-Value pairs", "type_of_output": "Text"}),
+        Set(fields={"type_of_input": "Key-Value pairs"}),
     ],
     task="tasks.generation",
     templates="templates.generation.all",

diff --git a/prepare/cards/xlsum.py b/prepare/cards/xlsum.py
@@ -2,7 +2,6 @@
 from unitxt.blocks import (
     LoadHF,
     RenameFields,
-    Set,
     TaskCard,
 )
 from unitxt.catalog import add_to_catalog
@@ -19,7 +18,6 @@
         loader=LoadHF(path="GEM/xlsum", name=lang),
         preprocess_steps=[
             RenameFields(field_to_field={"text": "document", "target": "summary"}),
-            Set(fields={"document_type": "document"}),
         ],
         task="tasks.summarization.abstractive",
         templates="templates.summarization.abstractive.all",

diff --git a/prepare/cards/xsum.py b/prepare/cards/xsum.py
@@ -1,16 +1,12 @@
 from unitxt.blocks import (
     LoadHF,
-    Set,
     TaskCard,
 )
 from unitxt.catalog import add_to_catalog
 from unitxt.test_utils.card import test_card
 
 card = TaskCard(
     loader=LoadHF(path="EdinburghNLP/xsum"),
-    preprocess_steps=[
-        Set(fields={"document_type": "document"}),
-    ],
     task="tasks.summarization.abstractive",
     templates="templates.summarization.abstractive.all",
     __tags__={

diff --git a/prepare/cards/yahoo_answers_topics.py b/prepare/cards/yahoo_answers_topics.py
@@ -43,15 +43,9 @@
             to_field="text",
         ),
         JoinStr(separator=" ", field="text", to_field="text"),
-        Set(
-            fields={
-                "classes": classes,
-                "text_type": "text",
-                "type_of_class": "topic",
-            }
-        ),
+        Set(fields={"classes": classes}),
     ],
-    task="tasks.classification.multi_class",
+    task="tasks.classification.multi_class.topic_classification",
     templates="templates.classification.multi_class.all",
     __tags__={
         "annotations_creators": "found",