Skip to content

Commit

Permalink
Merge branch 'main' into serializers
Browse files Browse the repository at this point in the history
  • Loading branch information
elronbandel committed Sep 8, 2024
2 parents 7248f90 + c43f57c commit f897cf6
Show file tree
Hide file tree
Showing 28 changed files with 1,146 additions and 68 deletions.
1 change: 0 additions & 1 deletion .github/workflows/catalog_consistency.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ jobs:
runs-on: ubuntu-latest
env:
OS: ubuntu-latest
GENAI_KEY: ${{ secrets.GENAI_KEY }}
UNITXT_DEFAULT_VERBOSITY: error
DATASETS_VERBOSITY: error
HF_HUB_VERBOSITY: error
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/catalog_preparation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ jobs:
runs-on: ubuntu-latest
env:
OS: ubuntu-latest
GENAI_KEY: ${{ secrets.GENAI_KEY }}
UNITXT_DEFAULT_VERBOSITY: error
DATASETS_VERBOSITY: error
HF_HUB_VERBOSITY: error
Expand Down
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@ https://github.com/IBM/unitxt/assets/23455264/baef9131-39d4-4164-90b2-05da52919f

### 🦄 Currently on Unitxt Catalog

![NLP Tasks](https://img.shields.io/badge/NLP_tasks-40-blue)
![Dataset Cards](https://img.shields.io/badge/Dataset_Cards-457-blue)
![Templates](https://img.shields.io/badge/Templates-229-blue)
![Formats](https://img.shields.io/badge/Formats-18-blue)
![Metrics](https://img.shields.io/badge/Metrics-98-blue)
![NLP Tasks](https://img.shields.io/badge/NLP_tasks-48-blue)
![Dataset Cards](https://img.shields.io/badge/Dataset_Cards-537-blue)
![Templates](https://img.shields.io/badge/Templates-265-blue)
![Formats](https://img.shields.io/badge/Formats-23-blue)
![Metrics](https://img.shields.io/badge/Metrics-136-blue)

### 🦄 Run Unitxt Exploration Dashboard

Expand Down
70 changes: 70 additions & 0 deletions prepare/cards/chat_rag_bench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from copy import deepcopy

from unitxt import add_to_catalog
from unitxt.blocks import (
LoadHF,
SplitRandomMix,
TaskCard,
TemplatesDict,
)
from unitxt.dialog_operators import SerializeOpenAiFormatDialog
from unitxt.operators import Copy, Set, Shuffle
from unitxt.test_utils.card import test_card

splits_random_mixes = {
"train": SplitRandomMix(
{"train": "test[0.6]", "validation": "test[0.2]", "test": "test[0.2]"}
),
"standard": SplitRandomMix({"test": "test"}),
}

subsets = ["doqa_travel", "doqa_cooking", "doqa_movies", "doc2dial", "hybridial"]
for split in splits_random_mixes:
for subset in subsets:
card = TaskCard(
loader=LoadHF(path="nvidia/ChatRAG-Bench", name=subset, split="test"),
preprocess_steps=[
splits_random_mixes[split],
Shuffle(),
Copy(
field_to_field={
"ctxs/*/text": "contexts",
"messages": "dialog",
"answers": "reference_answers",
}
),
Set(
fields={
"contexts_ids": [],
}
),
SerializeOpenAiFormatDialog(
field="dialog",
to_field="question",
format="formats.user_assistant",
slice_first_and_last_turns_format=True,
last_response_to_field="dummy",
),
],
task="tasks.rag.response_generation",
templates=TemplatesDict(
{"default": "templates.rag.response_generation.please_respond_chat"}
),
)

# testing the card is too slow with the bert-score metric, so dropping it
card_for_test = deepcopy(card)
card_for_test.task.metrics = [
"metrics.rouge",
]

test_card(
card_for_test,
strict=True,
demos_taken_from="test",
)
add_to_catalog(
card,
f"cards.rag.response_generation.chat_rag_bench.{'train.' if split=='train' else ''}user_assistant_format.{subset}",
overwrite=True,
)
96 changes: 51 additions & 45 deletions prepare/cards/clapnq.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@
)
from unitxt.test_utils.card import test_card

splits = {
"eval": {"train": "train", "test": "validation"},
"train": {"train": "train[0.5]", "validation": "train[0.5]", "test": "validation"},
}

unanswerable_responses = [
"I'm sorry, I cannot answer this question based on the context.",
"The answer is not in the text provided.",
Expand All @@ -27,53 +32,54 @@
"Insufficient context to provide an answer.",
]

card = TaskCard(
loader=LoadHF(
path="PrimeQA/clapnq",
),
preprocess_steps=[
SplitRandomMix({"train": "train", "test": "validation"}),
Copy(
field_to_field={
"passages/*/text": "contexts",
"input": "question",
"output/*/answer": "reference_answers",
}
for split in splits.keys():
card = TaskCard(
loader=LoadHF(
path="PrimeQA/clapnq",
),
Set(
fields={
"contexts_ids": [],
preprocess_steps=[
SplitRandomMix(splits[split]),
Copy(
field_to_field={
"passages/*/text": "contexts",
"input": "question",
"output/*/answer": "reference_answers",
}
),
Set(
fields={
"contexts_ids": [],
}
),
MapInstanceValues(
mappers={"reference_answers": {"['']": unanswerable_responses}},
strict=False,
),
],
task="tasks.rag.response_generation",
templates=TemplatesDict(
{
"please_respond": "templates.rag.response_generation.please_respond",
"answer_based_on_context": "templates.rag.response_generation.answer_based_on_context",
"answer_based_on_context_inverted": "templates.rag.response_generation.answer_based_on_context_inverted",
}
),
MapInstanceValues(
mappers={"reference_answers": {"['']": unanswerable_responses}},
strict=False,
),
],
task="tasks.rag.response_generation",
templates=TemplatesDict(
{
"please_respond": "templates.rag.response_generation.please_respond",
"answer_based_on_context": "templates.rag.response_generation.answer_based_on_context",
"answer_based_on_context_inverted": "templates.rag.response_generation.answer_based_on_context_inverted",
}
),
)
)

# testing the card is too slow with the bert-score metric, so dropping it
card_for_test = deepcopy(card)
card_for_test.task.metrics = [
"metrics.rag.response_generation.correctness.token_overlap",
"metrics.rag.response_generation.faithfullness.token_overlap",
]
# testing the card is too slow with the bert-score metric, so dropping it
card_for_test = deepcopy(card)
card_for_test.task.metrics = [
"metrics.rag.response_generation.correctness.token_overlap",
"metrics.rag.response_generation.faithfullness.token_overlap",
]

test_card(
card_for_test,
strict=True,
demos_taken_from="test",
)
add_to_catalog(
card,
"cards.rag.response_generation.clapnq",
overwrite=True,
)
test_card(
card_for_test,
strict=True,
demos_taken_from="test",
)
add_to_catalog(
card,
f'cards.rag.response_generation.{"train." if split == "train" else ""}clapnq',
overwrite=True,
)
54 changes: 54 additions & 0 deletions prepare/cards/open_australian_legal_qa.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from copy import deepcopy

from unitxt import add_to_catalog
from unitxt.blocks import (
LoadHF,
SplitRandomMix,
TaskCard,
TemplatesDict,
)
from unitxt.operators import (
Copy,
ListFieldValues,
Shuffle,
)
from unitxt.test_utils.card import test_card

card = TaskCard(
loader=LoadHF(
path="umarbutler/open-australian-legal-qa",
),
preprocess_steps=[
SplitRandomMix(
{"train": "train[0.5]", "validation": "train[0.2]", "test": "train[0.3]"}
),
Shuffle(),
Copy(
field_to_field={
"source/text": "contexts",
"answer": "reference_answers",
"source/citation": "contexts_ids",
}
),
ListFieldValues(fields=["reference_answers"], to_field="reference_answers"),
ListFieldValues(fields=["contexts"], to_field="contexts"),
ListFieldValues(fields=["contexts_ids"], to_field="contexts_ids"),
],
task="tasks.rag.response_generation",
templates=TemplatesDict(
{"default": "templates.rag.response_generation.please_respond_chat"}
),
)

# testing the card is too slow with the bert-score metric, so dropping it
card_for_test = deepcopy(card)
card_for_test.task.metrics = ["metrics.rouge"]

test_card(
card_for_test,
strict=True,
demos_taken_from="test",
)
add_to_catalog(
card, "cards.rag.response_generation.train.open_australian_legal_qa", overwrite=True
)
10 changes: 10 additions & 0 deletions prepare/templates/rag/response_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,16 @@
overwrite=True,
)

add_to_catalog(
MultiReferenceTemplate(
instruction="Please respond to the following question using the context",
input_format="Context: {contexts}\nQuestion: {question}.\n",
references_field="reference_answers",
),
"templates.rag.response_generation.please_respond_chat",
overwrite=True,
)

add_to_catalog(
MultiReferenceTemplate(
instruction="Answer the question, basing your answer on the context",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{
"__type__": "task_card",
"loader": {
"__type__": "load_hf",
"path": "nvidia/ChatRAG-Bench",
"name": "doc2dial",
"split": "test"
},
"preprocess_steps": [
{
"__type__": "split_random_mix",
"mix": {
"train": "test[0.6]",
"validation": "test[0.2]",
"test": "test[0.2]"
}
},
{
"__type__": "shuffle"
},
{
"__type__": "copy",
"field_to_field": {
"ctxs/*/text": "contexts",
"messages": "dialog",
"answers": "reference_answers"
}
},
{
"__type__": "set",
"fields": {
"contexts_ids": []
}
},
{
"__type__": "serialize_open_ai_format_dialog",
"field": "dialog",
"to_field": "question",
"format": "formats.user_assistant",
"slice_first_and_last_turns_format": true,
"last_response_to_field": "dummy"
}
],
"task": "tasks.rag.response_generation",
"templates": {
"default": "templates.rag.response_generation.please_respond_chat"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{
"__type__": "task_card",
"loader": {
"__type__": "load_hf",
"path": "nvidia/ChatRAG-Bench",
"name": "doqa_cooking",
"split": "test"
},
"preprocess_steps": [
{
"__type__": "split_random_mix",
"mix": {
"train": "test[0.6]",
"validation": "test[0.2]",
"test": "test[0.2]"
}
},
{
"__type__": "shuffle"
},
{
"__type__": "copy",
"field_to_field": {
"ctxs/*/text": "contexts",
"messages": "dialog",
"answers": "reference_answers"
}
},
{
"__type__": "set",
"fields": {
"contexts_ids": []
}
},
{
"__type__": "serialize_open_ai_format_dialog",
"field": "dialog",
"to_field": "question",
"format": "formats.user_assistant",
"slice_first_and_last_turns_format": true,
"last_response_to_field": "dummy"
}
],
"task": "tasks.rag.response_generation",
"templates": {
"default": "templates.rag.response_generation.please_respond_chat"
}
}
Loading

0 comments on commit f897cf6

Please sign in to comment.