Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor RenameFields to Rename with appropirate deprecation process #1123

Merged
merged 7 commits into from
Aug 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
4 changes: 2 additions & 2 deletions docs/docs/adding_metric.rst
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ to the `references` field. Then it runs the existing metric. Finally, it rename
],
metric=metrics["metrics.token_overlap"],
postprocess_steps=[
RenameFields(
Rename(
field_to_field=[
("score/global/f1", "score/global/f1_overlap_with_context"),
("score/global/recall", "score/global/recall_overlap_with_context"),
Expand Down Expand Up @@ -382,4 +382,4 @@ the `MetricPipeline` described in the previous section.

Use HuggingfaceMetric to wrap metrics defined in Huggingface Hub. Do not use it to wrap Huggingface metrics implemented
in local files. This is because local metrics are accessed via relative or absolute file paths, and both
may not be relevant if running code on different machines or root directories.
may not be relevant if running code on different machines or root directories.
6 changes: 3 additions & 3 deletions docs/docs/llm_as_judge.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ While LLMs as Judges are powerful and effective in many cases, they have some dr
1. Good LLM as Judges are often large models with relatively high inference latency.
2. Deploying large LLMs is difficult and may require API access to external services.
3. Not all LLMs (including large ones) can serve as good judges - their assessment may not correlate with human judgements and can also be biased.
This means that unless you have a prior indication that the LLM you use is a good judge for your task, you need to evaluate its judgements and see they match your expectations.
This means that unless you have a prior indication that the LLM you use is a good judge for your task, you need to evaluate its judgements and see they match your expectations.


Using LLMs
Expand Down Expand Up @@ -294,7 +294,7 @@ We will create a card, as we do for every other Unitxt scenario:
from unitxt.operators import (
Copy,
FilterByCondition,
RenameFields,
Rename,
)
from unitxt.processors import LiteralEval
from unitxt.splitters import RenameSplits
Expand All @@ -306,7 +306,7 @@ We will create a card, as we do for every other Unitxt scenario:
RenameSplits({"train": "test"}),
FilterByCondition(values={"turn": 1}, condition="eq"),
FilterByCondition(values={"reference": "[]"}, condition="eq"),
RenameFields(
Rename(
field_to_field={
"model_input": "question",
"score": "rating",
Expand Down
4 changes: 2 additions & 2 deletions examples/evaluate_rag_response_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
HFPipelineBasedInferenceEngine,
)
from unitxt.loaders import LoadFromDictionary
from unitxt.operators import RenameFields, Set
from unitxt.operators import Rename, Set
from unitxt.templates import MultiReferenceTemplate, TemplatesDict
from unitxt.text_utils import print_dict

Expand Down Expand Up @@ -35,7 +35,7 @@
# Map these fields to the fields of the task.rag.response_generation task.
# See https://www.unitxt.ai/en/latest/catalog/catalog.tasks.rag.response_generation.html
preprocess_steps=[
RenameFields(field_to_field={"query": "question"}),
Rename(field_to_field={"query": "question"}),
Wrap(field="extracted_chunks", inside="list", to_field="contexts"),
Wrap(field="expected_answer", inside="list", to_field="reference_answers"),
Set(
Expand Down
4 changes: 2 additions & 2 deletions prepare/cards/20_newsgroups.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from unitxt.blocks import (
LoadHF,
MapInstanceValues,
RenameFields,
Rename,
Set,
SplitRandomMix,
TaskCard,
Expand Down Expand Up @@ -42,7 +42,7 @@
SplitRandomMix(
{"train": "train[90%]", "validation": "train[10%]", "test": "test"}
),
RenameFields(field_to_field={"label_text": "label"}),
Rename(field_to_field={"label_text": "label"}),
MapInstanceValues(mappers={"label": map_labels}),
Set(fields={"classes": list(map_labels.values())}),
],
Expand Down
4 changes: 2 additions & 2 deletions prepare/cards/20newsgroups_sklearn.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from unitxt import add_to_catalog
from unitxt.blocks import (
MapInstanceValues,
RenameFields,
Rename,
Set,
SplitRandomMix,
TaskCard,
Expand Down Expand Up @@ -40,7 +40,7 @@
SplitRandomMix(
{"train": "train[90%]", "validation": "train[10%]", "test": "test"}
),
RenameFields(field_to_field={"data": "text", "target": "label"}),
Rename(field_to_field={"data": "text", "target": "label"}),
MapInstanceValues(mappers={"label": map_labels}),
Set(fields={"classes": list(map_labels.values())}),
],
Expand Down
4 changes: 2 additions & 2 deletions prepare/cards/CFPB_product.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from unitxt import add_to_catalog
from unitxt.blocks import (
MapInstanceValues,
RenameFields,
Rename,
Set,
SplitRandomMix,
TaskCard,
Expand Down Expand Up @@ -49,7 +49,7 @@
"test": "train[20%]",
}
),
RenameFields(field_to_field=field_to_field[subset]),
Rename(field_to_field=field_to_field[subset]),
MapInstanceValues(mappers={"label": mappers[subset]}),
Set(fields={"classes": list(mappers[subset].values())}),
],
Expand Down
4 changes: 2 additions & 2 deletions prepare/cards/almost_evil_ml_qa.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from unitxt.blocks import LoadHF, RenameFields, SplitRandomMix, TaskCard
from unitxt.blocks import LoadHF, Rename, SplitRandomMix, TaskCard
from unitxt.catalog import add_to_catalog
from unitxt.operators import ListFieldValues
from unitxt.test_utils.card import test_card
Expand All @@ -9,7 +9,7 @@
SplitRandomMix(
{"train": "train[90%]", "validation": "train[5%]", "test": "train[5%]"}
),
RenameFields(field_to_field={"INSTRUCTION": "question"}),
Rename(field_to_field={"INSTRUCTION": "question"}),
ListFieldValues(fields=["RESPONSE"], to_field="answers"),
],
task="tasks.qa.open",
Expand Down
4 changes: 2 additions & 2 deletions prepare/cards/almost_evil_ml_qa_mulitlingual.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from unitxt.blocks import LoadHF, RenameFields, SplitRandomMix, TaskCard
from unitxt.blocks import LoadHF, Rename, SplitRandomMix, TaskCard
from unitxt.catalog import add_to_catalog
from unitxt.operators import (
Copy,
Expand All @@ -22,7 +22,7 @@
SplitRandomMix(
{"train": "train[90%]", "validation": "train[5%]", "test": "train[5%]"}
),
RenameFields(field_to_field={"INSTRUCTION": "question"}),
Rename(field_to_field={"INSTRUCTION": "question"}),
ListFieldValues(fields=["RESPONSE"], to_field="answers"),
],
task="tasks.qa.open",
Expand Down
4 changes: 2 additions & 2 deletions prepare/cards/amazon_massive.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from unitxt.blocks import (
LoadHF,
MapInstanceValues,
RenameFields,
Rename,
Set,
TaskCard,
)
Expand All @@ -26,7 +26,7 @@
loader=LoadHF(path="AmazonScience/massive", name=lang),
preprocess_steps=[
MapInstanceValues(mappers={"intent": mappers}),
RenameFields(field_to_field={"utt": "text", "intent": "label"}),
Rename(field_to_field={"utt": "text", "intent": "label"}),
Set(
fields={
"classes": classlabels.names,
Expand Down
4 changes: 2 additions & 2 deletions prepare/cards/arc.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from unitxt.blocks import LoadHF, Set, TaskCard
from unitxt.catalog import add_to_catalog
from unitxt.operators import Copy, IndexOf, RenameFields
from unitxt.operators import Copy, IndexOf, Rename
from unitxt.test_utils.card import test_card

subtasks = ["ARC-Challenge", "ARC-Easy"]
Expand All @@ -10,7 +10,7 @@
loader=LoadHF(path="ai2_arc", name=subtask),
preprocess_steps=[
Set({"topic": "science"}),
RenameFields(field_to_field={"answerKey": "label", "choices": "_choices"}),
Rename(field_to_field={"answerKey": "label", "choices": "_choices"}),
Copy(
field_to_field={"_choices/text": "choices", "_choices/label": "labels"}
),
Expand Down
12 changes: 5 additions & 7 deletions prepare/cards/arena_hard/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
Apply,
Copy,
FilterByCondition,
RenameFields,
Rename,
SelectFields,
Set,
)
Expand All @@ -16,9 +16,7 @@
arena_hard_hf_space_processing_steps = SequentialOperator(
steps=[
# region Question file
RenameFields(
field_to_field={"cluster": "group"}, apply_to_streams=["questions"]
),
Rename(field_to_field={"cluster": "group"}, apply_to_streams=["questions"]),
Copy(
field_to_field={"turns/0/content": "model_input"},
apply_to_streams=["questions"],
Expand Down Expand Up @@ -51,7 +49,7 @@
},
apply_to_streams=["judgment"],
),
RenameFields(
Rename(
field_to_field={"model": "model_2", "judge": "judge_model_id"},
apply_to_streams=["judgment"],
),
Expand Down Expand Up @@ -97,7 +95,7 @@
on=["question_id"],
new_stream_name="merged_stream",
),
RenameFields(
Rename(
field_to_field={"model_id": "model_1", "model_output": "model_1_output"},
apply_to_streams=["model_answer"],
),
Expand All @@ -108,7 +106,7 @@
on=["question_id", "model_1"],
new_stream_name="merged_stream",
),
RenameFields(
Rename(
field_to_field={"model_1": "model_2", "model_1_output": "model_2_output"},
apply_to_streams=["model_answer"],
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from unitxt.operators import (
Apply,
Copy,
RenameFields,
Rename,
SelectFields,
Set,
)
Expand All @@ -24,9 +24,7 @@
),
preprocess_steps=[
# region Question file
RenameFields(
field_to_field={"cluster": "group"}, apply_to_streams=["questions"]
),
Rename(field_to_field={"cluster": "group"}, apply_to_streams=["questions"]),
Copy(
field_to_field={"turns/0/content": "model_input"},
apply_to_streams=["questions"],
Expand All @@ -41,7 +39,7 @@
},
apply_to_streams=["model_answer"],
),
RenameFields(
Rename(
field_to_field={"model_id": "reference_model"},
apply_to_streams=["model_answer"],
),
Expand Down Expand Up @@ -70,7 +68,7 @@
"reference_model_output",
]
),
RenameFields(
Rename(
field_to_field={
"model_input": "input",
"category": "group",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from unitxt.operators import (
MapInstanceValues,
MergeStreams,
RenameFields,
Rename,
)
from unitxt.stream_operators import DeleteSplits, DuplicateSplit
from unitxt.test_utils.card import test_card
Expand All @@ -26,7 +26,7 @@
preprocess_steps=[
"operators.arena_hard_hf_space_processing_steps",
DuplicateSplit(split="test", to_split="game_2"),
RenameFields(
Rename(
field_to_field={
"model_input": "question",
"model_1_output": "answer_a",
Expand All @@ -38,7 +38,7 @@
},
apply_to_streams=["test"],
),
RenameFields(
Rename(
field_to_field={
"model_input": "question",
"model_1_output": "answer_b",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from unitxt.operators import (
ExecuteExpression,
MapInstanceValues,
RenameFields,
Rename,
)
from unitxt.test_utils.card import test_card

Expand Down Expand Up @@ -36,7 +36,7 @@
to_field="answer_a_preference",
expression="int(round((score_model_1_ordered_first+score_model_2_ordered_first)/2))",
),
RenameFields(
Rename(
field_to_field={
"model_input": "question",
"model_1_output": "answer_a",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from unitxt.loaders import LoadFromHFSpace
from unitxt.operators import (
MapInstanceValues,
RenameFields,
Rename,
)
from unitxt.test_utils.card import test_card

Expand All @@ -23,7 +23,7 @@
),
preprocess_steps=[
"operators.arena_hard_hf_space_processing_steps",
RenameFields(
Rename(
field_to_field={
"model_input": "question",
"model_1_output": "answer_a",
Expand Down
4 changes: 2 additions & 2 deletions prepare/cards/babi.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from unitxt.blocks import LoadHF, RenameFields, Set, TaskCard
from unitxt.blocks import LoadHF, Rename, Set, TaskCard
from unitxt.catalog import add_to_catalog
from unitxt.operators import ListFieldValues
from unitxt.test_utils.card import test_card

card = TaskCard(
loader=LoadHF(path="Muennighoff/babi"),
preprocess_steps=[
RenameFields(field_to_field={"passage": "context"}),
Rename(field_to_field={"passage": "context"}),
Set({"context_type": "description"}),
ListFieldValues(fields=["answer"], to_field="answers"),
],
Expand Down
4 changes: 2 additions & 2 deletions prepare/cards/belebele.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
AddConstant,
CastFields,
ListFieldValues,
RenameFields,
Rename,
Set,
)
from unitxt.test_utils.card import test_card
Expand Down Expand Up @@ -143,7 +143,7 @@
fields=["mc_answer1", "mc_answer2", "mc_answer3", "mc_answer4"],
to_field="choices",
),
RenameFields(
Rename(
field_to_field={
"correct_answer_num": "answer",
"flores_passage": "context",
Expand Down
4 changes: 2 additions & 2 deletions prepare/cards/billsum.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from unitxt.blocks import Set, SplitRandomMix, TaskCard
from unitxt.collections_operators import Wrap
from unitxt.loaders import LoadHF
from unitxt.operators import FilterByExpression, RenameFields
from unitxt.operators import FilterByExpression, Rename
from unitxt.test_utils.card import test_card

# https://huggingface.co/datasets/billsum
Expand All @@ -15,7 +15,7 @@
SplitRandomMix(
{"train": "train[87.5%]", "validation": "train[12.5%]", "test": "test"}
),
RenameFields(field_to_field={"text": "document"}),
Rename(field_to_field={"text": "document"}),
Set(fields={"document_type": "document"}),
Wrap(field="summary", inside="list", to_field="summaries"),
]
Expand Down
Loading
Loading