Skip to content

Commit

Permalink
numeric nlg - template changes (#1041)
Browse files Browse the repository at this point in the history
  • Loading branch information
ShirApp authored and csrajmohan committed Aug 29, 2024
1 parent f0b991a commit c247c1c
Show file tree
Hide file tree
Showing 9 changed files with 109 additions and 8 deletions.
15 changes: 11 additions & 4 deletions prepare/cards/numeric_nlg.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,20 @@
card = TaskCard(
loader=LoadHF(path="kasnerz/numericnlg"), # TODO: load from github repo
preprocess_steps=[
Set(fields={"type_of_input": "table", "type_of_output": "description"}),
Set(
fields={
"type_of_input_a": "table",
"type_of_input_b": "caption",
"type_of_output": "description",
}
),
MapHTMLTableToJSON(field="table_html_clean", to_field="table_out"),
SerializeTableAsMarkdown(field="table_out", to_field="input"),
SerializeTableAsMarkdown(field="table_out", to_field="input_a"),
RenameFields(field="description", to_field="output"),
RenameFields(field="caption", to_field="input_b"),
],
task="tasks.generation[metrics=[metrics.bleu,metrics.rouge,metrics.bert_score.bert_base_uncased,metrics.meteor]]",
templates="templates.generation.all",
task="tasks.generation.from_pair",
templates="templates.generation.from_pair.all",
__description__="NumericNLG is a dataset for numerical table-to-text generation using pairs of a table and a paragraph of a table description with richer inference from scientific papers.",
__tags__={
"modality": "table",
Expand Down
1 change: 1 addition & 0 deletions prepare/metrics/normalized_sacrebleu.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

metric = MetricPipeline(
main_score="sacrebleu",
prediction_type="str",
preprocess_steps=[
Copy(
field="task_data/target_language",
Expand Down
24 changes: 24 additions & 0 deletions prepare/tasks/generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,27 @@
"tasks.generation",
overwrite=True,
)

add_to_catalog(
Task(
input_fields={
"input_a": "str",
"type_of_input_a": "str",
"input_b": "str",
"type_of_input_b": "str",
"type_of_output": "str",
},
reference_fields={"output": "str"},
prediction_type="str",
metrics=[
"metrics.bleu",
"metrics.rouge",
"metrics.bert_score.bert_base_uncased",
"metrics.meteor",
],
augmentable_inputs=["input_a", "input_b"],
defaults={"type_of_output": "Text"},
),
"tasks.generation.from_pair",
overwrite=True,
)
20 changes: 20 additions & 0 deletions prepare/templates/generation/generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,23 @@
"templates.generation.all",
overwrite=True,
)

add_to_catalog(
InputOutputTemplate(
input_format="Given the following {type_of_input_a} and {type_of_input_b}, generate the corresponding {type_of_output}."
"\n{type_of_input_a}: \n{input_a} \n{type_of_input_b}: \n{input_b} \n{type_of_output}:",
output_format="{output}",
postprocessors=[
"processors.take_first_non_empty_line",
"processors.lower_case_till_punc",
],
),
"templates.generation.from_pair.default",
overwrite=True,
)

add_to_catalog(
TemplatesList(["templates.generation.from_pair.default"]),
"templates.generation.from_pair.all",
overwrite=True,
)
14 changes: 10 additions & 4 deletions src/unitxt/catalog/cards/numeric_nlg.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
{
"__type__": "set",
"fields": {
"type_of_input": "table",
"type_of_input_a": "table",
"type_of_input_b": "caption",
"type_of_output": "description"
}
},
Expand All @@ -20,16 +21,21 @@
{
"__type__": "serialize_table_as_markdown",
"field": "table_out",
"to_field": "input"
"to_field": "input_a"
},
{
"__type__": "rename_fields",
"field": "description",
"to_field": "output"
},
{
"__type__": "rename_fields",
"field": "caption",
"to_field": "input_b"
}
],
"task": "tasks.generation[metrics=[metrics.bleu,metrics.rouge,metrics.bert_score.bert_base_uncased,metrics.meteor]]",
"templates": "templates.generation.all",
"task": "tasks.generation.from_pair",
"templates": "templates.generation.from_pair.all",
"__description__": "NumericNLG is a dataset for numerical table-to-text generation using pairs of a table and a paragraph of a table description with richer inference from scientific papers.",
"__tags__": {
"modality": "table",
Expand Down
1 change: 1 addition & 0 deletions src/unitxt/catalog/metrics/normalized_sacrebleu.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"__type__": "metric_pipeline",
"main_score": "sacrebleu",
"prediction_type": "str",
"preprocess_steps": [
{
"__type__": "copy",
Expand Down
27 changes: 27 additions & 0 deletions src/unitxt/catalog/tasks/generation/from_pair.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"__type__": "task",
"input_fields": {
"input_a": "str",
"type_of_input_a": "str",
"input_b": "str",
"type_of_input_b": "str",
"type_of_output": "str"
},
"reference_fields": {
"output": "str"
},
"prediction_type": "str",
"metrics": [
"metrics.bleu",
"metrics.rouge",
"metrics.bert_score.bert_base_uncased",
"metrics.meteor"
],
"augmentable_inputs": [
"input_a",
"input_b"
],
"defaults": {
"type_of_output": "Text"
}
}
6 changes: 6 additions & 0 deletions src/unitxt/catalog/templates/generation/from_pair/all.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"__type__": "templates_list",
"items": [
"templates.generation.from_pair.default"
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"__type__": "input_output_template",
"input_format": "Given the following {type_of_input_a} and {type_of_input_b}, generate the corresponding {type_of_output}.\n{type_of_input_a}: \n{input_a} \n{type_of_input_b}: \n{input_b} \n{type_of_output}:",
"output_format": "{output}",
"postprocessors": [
"processors.take_first_non_empty_line",
"processors.lower_case_till_punc"
]
}

0 comments on commit c247c1c

Please sign in to comment.