Merge branch 'main' into serializers

IBM · Sep 8, 2024 · f897cf6 · f897cf6
2 parents 7248f90 + c43f57c
commit f897cf6
Show file tree

Hide file tree

Showing 28 changed files with 1,146 additions and 68 deletions.
diff --git a/.github/workflows/catalog_consistency.yml b/.github/workflows/catalog_consistency.yml
@@ -12,7 +12,6 @@ jobs:
      runs-on: ubuntu-latest
      env:
        OS: ubuntu-latest
-       GENAI_KEY: ${{ secrets.GENAI_KEY }}
        UNITXT_DEFAULT_VERBOSITY: error
        DATASETS_VERBOSITY: error
        HF_HUB_VERBOSITY: error

diff --git a/.github/workflows/catalog_preparation.yml b/.github/workflows/catalog_preparation.yml
@@ -12,7 +12,6 @@ jobs:
      runs-on: ubuntu-latest
      env:
        OS: ubuntu-latest
-       GENAI_KEY: ${{ secrets.GENAI_KEY }}
        UNITXT_DEFAULT_VERBOSITY: error
        DATASETS_VERBOSITY: error
        HF_HUB_VERBOSITY: error

diff --git a/README.md b/README.md
@@ -31,11 +31,11 @@ https://github.com/IBM/unitxt/assets/23455264/baef9131-39d4-4164-90b2-05da52919f
 
 ### 🦄 Currently on Unitxt Catalog
 
-![NLP Tasks](https://img.shields.io/badge/NLP_tasks-40-blue)
-![Dataset Cards](https://img.shields.io/badge/Dataset_Cards-457-blue)
-![Templates](https://img.shields.io/badge/Templates-229-blue)
-![Formats](https://img.shields.io/badge/Formats-18-blue)
-![Metrics](https://img.shields.io/badge/Metrics-98-blue)
+![NLP Tasks](https://img.shields.io/badge/NLP_tasks-48-blue)
+![Dataset Cards](https://img.shields.io/badge/Dataset_Cards-537-blue)
+![Templates](https://img.shields.io/badge/Templates-265-blue)
+![Formats](https://img.shields.io/badge/Formats-23-blue)
+![Metrics](https://img.shields.io/badge/Metrics-136-blue)
 
 ### 🦄 Run Unitxt Exploration Dashboard
 

diff --git a/prepare/cards/chat_rag_bench.py b/prepare/cards/chat_rag_bench.py
@@ -0,0 +1,70 @@
+from copy import deepcopy
+
+from unitxt import add_to_catalog
+from unitxt.blocks import (
+    LoadHF,
+    SplitRandomMix,
+    TaskCard,
+    TemplatesDict,
+)
+from unitxt.dialog_operators import SerializeOpenAiFormatDialog
+from unitxt.operators import Copy, Set, Shuffle
+from unitxt.test_utils.card import test_card
+
+splits_random_mixes = {
+    "train": SplitRandomMix(
+        {"train": "test[0.6]", "validation": "test[0.2]", "test": "test[0.2]"}
+    ),
+    "standard": SplitRandomMix({"test": "test"}),
+}
+
+subsets = ["doqa_travel", "doqa_cooking", "doqa_movies", "doc2dial", "hybridial"]
+for split in splits_random_mixes:
+    for subset in subsets:
+        card = TaskCard(
+            loader=LoadHF(path="nvidia/ChatRAG-Bench", name=subset, split="test"),
+            preprocess_steps=[
+                splits_random_mixes[split],
+                Shuffle(),
+                Copy(
+                    field_to_field={
+                        "ctxs/*/text": "contexts",
+                        "messages": "dialog",
+                        "answers": "reference_answers",
+                    }
+                ),
+                Set(
+                    fields={
+                        "contexts_ids": [],
+                    }
+                ),
+                SerializeOpenAiFormatDialog(
+                    field="dialog",
+                    to_field="question",
+                    format="formats.user_assistant",
+                    slice_first_and_last_turns_format=True,
+                    last_response_to_field="dummy",
+                ),
+            ],
+            task="tasks.rag.response_generation",
+            templates=TemplatesDict(
+                {"default": "templates.rag.response_generation.please_respond_chat"}
+            ),
+        )
+
+        # testing the card is too slow with the bert-score metric, so dropping it
+        card_for_test = deepcopy(card)
+        card_for_test.task.metrics = [
+            "metrics.rouge",
+        ]
+
+        test_card(
+            card_for_test,
+            strict=True,
+            demos_taken_from="test",
+        )
+        add_to_catalog(
+            card,
+            f"cards.rag.response_generation.chat_rag_bench.{'train.' if split=='train' else ''}user_assistant_format.{subset}",
+            overwrite=True,
+        )
diff --git a/prepare/cards/clapnq.py b/prepare/cards/clapnq.py
@@ -14,6 +14,11 @@
 )
 from unitxt.test_utils.card import test_card
 
+splits = {
+    "eval": {"train": "train", "test": "validation"},
+    "train": {"train": "train[0.5]", "validation": "train[0.5]", "test": "validation"},
+}
+
 unanswerable_responses = [
     "I'm sorry, I cannot answer this question based on the context.",
     "The answer is not in the text provided.",
@@ -27,53 +32,54 @@
     "Insufficient context to provide an answer.",
 ]
 
-card = TaskCard(
-    loader=LoadHF(
-        path="PrimeQA/clapnq",
-    ),
-    preprocess_steps=[
-        SplitRandomMix({"train": "train", "test": "validation"}),
-        Copy(
-            field_to_field={
-                "passages/*/text": "contexts",
-                "input": "question",
-                "output/*/answer": "reference_answers",
-            }
+for split in splits.keys():
+    card = TaskCard(
+        loader=LoadHF(
+            path="PrimeQA/clapnq",
         ),
-        Set(
-            fields={
-                "contexts_ids": [],
+        preprocess_steps=[
+            SplitRandomMix(splits[split]),
+            Copy(
+                field_to_field={
+                    "passages/*/text": "contexts",
+                    "input": "question",
+                    "output/*/answer": "reference_answers",
+                }
+            ),
+            Set(
+                fields={
+                    "contexts_ids": [],
+                }
+            ),
+            MapInstanceValues(
+                mappers={"reference_answers": {"['']": unanswerable_responses}},
+                strict=False,
+            ),
+        ],
+        task="tasks.rag.response_generation",
+        templates=TemplatesDict(
+            {
+                "please_respond": "templates.rag.response_generation.please_respond",
+                "answer_based_on_context": "templates.rag.response_generation.answer_based_on_context",
+                "answer_based_on_context_inverted": "templates.rag.response_generation.answer_based_on_context_inverted",
             }
         ),
-        MapInstanceValues(
-            mappers={"reference_answers": {"['']": unanswerable_responses}},
-            strict=False,
-        ),
-    ],
-    task="tasks.rag.response_generation",
-    templates=TemplatesDict(
-        {
-            "please_respond": "templates.rag.response_generation.please_respond",
-            "answer_based_on_context": "templates.rag.response_generation.answer_based_on_context",
-            "answer_based_on_context_inverted": "templates.rag.response_generation.answer_based_on_context_inverted",
-        }
-    ),
-)
+    )
 
-# testing the card is too slow with the bert-score metric, so dropping it
-card_for_test = deepcopy(card)
-card_for_test.task.metrics = [
-    "metrics.rag.response_generation.correctness.token_overlap",
-    "metrics.rag.response_generation.faithfullness.token_overlap",
-]
+    # testing the card is too slow with the bert-score metric, so dropping it
+    card_for_test = deepcopy(card)
+    card_for_test.task.metrics = [
+        "metrics.rag.response_generation.correctness.token_overlap",
+        "metrics.rag.response_generation.faithfullness.token_overlap",
+    ]
 
-test_card(
-    card_for_test,
-    strict=True,
-    demos_taken_from="test",
-)
-add_to_catalog(
-    card,
-    "cards.rag.response_generation.clapnq",
-    overwrite=True,
-)
+    test_card(
+        card_for_test,
+        strict=True,
+        demos_taken_from="test",
+    )
+    add_to_catalog(
+        card,
+        f'cards.rag.response_generation.{"train." if split == "train" else ""}clapnq',
+        overwrite=True,
+    )
diff --git a/prepare/cards/open_australian_legal_qa.py b/prepare/cards/open_australian_legal_qa.py
@@ -0,0 +1,54 @@
+from copy import deepcopy
+
+from unitxt import add_to_catalog
+from unitxt.blocks import (
+    LoadHF,
+    SplitRandomMix,
+    TaskCard,
+    TemplatesDict,
+)
+from unitxt.operators import (
+    Copy,
+    ListFieldValues,
+    Shuffle,
+)
+from unitxt.test_utils.card import test_card
+
+card = TaskCard(
+    loader=LoadHF(
+        path="umarbutler/open-australian-legal-qa",
+    ),
+    preprocess_steps=[
+        SplitRandomMix(
+            {"train": "train[0.5]", "validation": "train[0.2]", "test": "train[0.3]"}
+        ),
+        Shuffle(),
+        Copy(
+            field_to_field={
+                "source/text": "contexts",
+                "answer": "reference_answers",
+                "source/citation": "contexts_ids",
+            }
+        ),
+        ListFieldValues(fields=["reference_answers"], to_field="reference_answers"),
+        ListFieldValues(fields=["contexts"], to_field="contexts"),
+        ListFieldValues(fields=["contexts_ids"], to_field="contexts_ids"),
+    ],
+    task="tasks.rag.response_generation",
+    templates=TemplatesDict(
+        {"default": "templates.rag.response_generation.please_respond_chat"}
+    ),
+)
+
+# testing the card is too slow with the bert-score metric, so dropping it
+card_for_test = deepcopy(card)
+card_for_test.task.metrics = ["metrics.rouge"]
+
+test_card(
+    card_for_test,
+    strict=True,
+    demos_taken_from="test",
+)
+add_to_catalog(
+    card, "cards.rag.response_generation.train.open_australian_legal_qa", overwrite=True
+)
diff --git a/prepare/templates/rag/response_generation.py b/prepare/templates/rag/response_generation.py
@@ -14,6 +14,16 @@
     overwrite=True,
 )
 
+add_to_catalog(
+    MultiReferenceTemplate(
+        instruction="Please respond to the following question using the context",
+        input_format="Context: {contexts}\nQuestion: {question}.\n",
+        references_field="reference_answers",
+    ),
+    "templates.rag.response_generation.please_respond_chat",
+    overwrite=True,
+)
+
 add_to_catalog(
     MultiReferenceTemplate(
         instruction="Answer the question, basing your answer on the context",

diff --git a/...og/cards/rag/response_generation/chat_rag_bench/train/user_assistant_format/doc2dial.json b/...og/cards/rag/response_generation/chat_rag_bench/train/user_assistant_format/doc2dial.json
@@ -0,0 +1,48 @@
+{
+    "__type__": "task_card",
+    "loader": {
+        "__type__": "load_hf",
+        "path": "nvidia/ChatRAG-Bench",
+        "name": "doc2dial",
+        "split": "test"
+    },
+    "preprocess_steps": [
+        {
+            "__type__": "split_random_mix",
+            "mix": {
+                "train": "test[0.6]",
+                "validation": "test[0.2]",
+                "test": "test[0.2]"
+            }
+        },
+        {
+            "__type__": "shuffle"
+        },
+        {
+            "__type__": "copy",
+            "field_to_field": {
+                "ctxs/*/text": "contexts",
+                "messages": "dialog",
+                "answers": "reference_answers"
+            }
+        },
+        {
+            "__type__": "set",
+            "fields": {
+                "contexts_ids": []
+            }
+        },
+        {
+            "__type__": "serialize_open_ai_format_dialog",
+            "field": "dialog",
+            "to_field": "question",
+            "format": "formats.user_assistant",
+            "slice_first_and_last_turns_format": true,
+            "last_response_to_field": "dummy"
+        }
+    ],
+    "task": "tasks.rag.response_generation",
+    "templates": {
+        "default": "templates.rag.response_generation.please_respond_chat"
+    }
+}
diff --git a/...ards/rag/response_generation/chat_rag_bench/train/user_assistant_format/doqa_cooking.json b/...ards/rag/response_generation/chat_rag_bench/train/user_assistant_format/doqa_cooking.json
@@ -0,0 +1,48 @@
+{
+    "__type__": "task_card",
+    "loader": {
+        "__type__": "load_hf",
+        "path": "nvidia/ChatRAG-Bench",
+        "name": "doqa_cooking",
+        "split": "test"
+    },
+    "preprocess_steps": [
+        {
+            "__type__": "split_random_mix",
+            "mix": {
+                "train": "test[0.6]",
+                "validation": "test[0.2]",
+                "test": "test[0.2]"
+            }
+        },
+        {
+            "__type__": "shuffle"
+        },
+        {
+            "__type__": "copy",
+            "field_to_field": {
+                "ctxs/*/text": "contexts",
+                "messages": "dialog",
+                "answers": "reference_answers"
+            }
+        },
+        {
+            "__type__": "set",
+            "fields": {
+                "contexts_ids": []
+            }
+        },
+        {
+            "__type__": "serialize_open_ai_format_dialog",
+            "field": "dialog",
+            "to_field": "question",
+            "format": "formats.user_assistant",
+            "slice_first_and_last_turns_format": true,
+            "last_response_to_field": "dummy"
+        }
+    ],
+    "task": "tasks.rag.response_generation",
+    "templates": {
+        "default": "templates.rag.response_generation.please_respond_chat"
+    }
+}