diff --git a/docs/docs/adding_template.rst b/docs/docs/adding_template.rst index b61cd3e49..aa870d7c0 100644 --- a/docs/docs/adding_template.rst +++ b/docs/docs/adding_template.rst @@ -77,30 +77,32 @@ Making Your Custom Template ---------------------------- In order to make your own template, you need to create a class inheriting from `Template` and -implementing its two abstract methods: +implementing its abstract methods: .. code-block:: python - @abstractmethod - def inputs_to_source(self, inputs: Dict[str, object]) -> Tuple[str, str]: + @abstractmethod + def input_fields_to_source(self, input_fields: Dict[str, object]) -> str: + """Create the textual input for the model from the input fields""" pass @abstractmethod - def outputs_to_target_and_references( - self, outputs: Dict[str, object] - ) -> Tuple[str, List[str]]: + def reference_fields_to_target_and_references(self, reference_fields: Dict[str, object]) -> Tuple[str, List[str]]: + """Create a list of references from the reference fields. Also returns one of the references + as the 'target' - the reference used if the instance is used as a demonstration." pass -For instance: + + +For instance, this templates passes all the input fields to the model as a json string. +It also formats the references , by taking two of the dataset reference fields the 'top_answer' and 'alternative_answer'. .. code-block:: python class MyCustomTemplate(Template): - def inputs_to_source(self, inputs: Dict[str, object]) -> Tuple[str, str]: - return str(inputs) # use all the task inputs fields in their dictionary look - - def outputs_to_target_and_references( - self, outputs: Dict[str, object] - ) -> Tuple[str, List[str]]: - return outputs["label"], [outputs["label"]] + def input_fields_to_source(self, inputs_fields: Dict[str, object]) -> str: + return json.dumps(inputs_fields) # provide the json string with all fields as the input to the model + def reference_fields_to_target_and_references(self, reference_fields: Dict[str, object]) -> Tuple[str, List[str]] + return outputs_fields["top_answer"], # target + [outputs_fields["top_answer"],outputs_fields["alternative_answer"]] # all references diff --git a/src/unitxt/llm_as_judge.py b/src/unitxt/llm_as_judge.py index 5a7f11ad4..6a76a55d1 100644 --- a/src/unitxt/llm_as_judge.py +++ b/src/unitxt/llm_as_judge.py @@ -43,7 +43,10 @@ def _get_input_instances(self, task_data: List[Dict]) -> List: instance = SequentialOperator( steps=[template, "formats.empty"] ).process_instance( - {"inputs": task_data_instance, "outputs": task_data_instance} + { + "input_fields": task_data_instance, + "reference_fields": task_data_instance, + } ) instances.append(instance["source"]) """ diff --git a/src/unitxt/task.py b/src/unitxt/task.py index bbe26620d..6fdca190c 100644 --- a/src/unitxt/task.py +++ b/src/unitxt/task.py @@ -34,8 +34,8 @@ class Task(InstanceOperator): Will not overwrite values if already provided in a given instance. The output instance contains three fields: - "inputs" whose value is a sub-dictionary of the input instance, consisting of all the fields listed in Arg 'input_fields'. - "outputs" -- for the fields listed in Arg "outputs". + "input_fields" whose value is a sub-dictionary of the input instance, consisting of all the fields listed in Arg 'input_fields'. + "reference_fields" -- for the fields listed in Arg "reference_fields". "metrics" -- to contain the value of Arg 'metrics' """ diff --git a/src/unitxt/templates.py b/src/unitxt/templates.py index 7ef322b55..6bbd11603 100644 --- a/src/unitxt/templates.py +++ b/src/unitxt/templates.py @@ -28,7 +28,7 @@ class Template(InstanceOperator): Args: skip_rendered_instance (bool): if "source", "target", and "references" are already defined fields in the instance, skip its processing postprocessors: a list of strings being artifact names of text processors, to be applied on the model output - instruction: a formatting string that yields an instruction with potential participation of values from the "inputs" part of the instance + instruction: a formatting string that yields an instruction with potential participation of values from the "input_fields" part of the instance target_prefix: a string to be used to format the prompt. Not a formatting string. """ @@ -41,19 +41,23 @@ class Template(InstanceOperator): target_prefix: str = NonPositionalField(default="") title_fields: List[str] = NonPositionalField(default_factory=list) - def inputs_to_instruction_and_target_prefix(self, inputs): + def input_fields_to_instruction_and_target_prefix(self, input_fields): instruction = self.apply_formatting( - inputs, "input", self.instruction, "instruction", serialize=True + input_fields, "input field", self.instruction, "instruction", serialize=True ) target_prefix = self.apply_formatting( - inputs, "input", self.target_prefix, "target_prefix", serialize=True + input_fields, + "input field", + self.target_prefix, + "target_prefix", + serialize=True, ) return instruction, target_prefix - def preprocess_inputs_and_outputs( - self, inputs: Dict[str, Any], outputs: Dict[str, Any] + def preprocess_input_and_reference_fields( + self, input_fields: Dict[str, Any], reference_fields: Dict[str, Any] ) -> Tuple[Dict[str, Any], Dict[str, Any]]: - return inputs, outputs + return input_fields, reference_fields def process( self, instance: Dict[str, Any], stream_name: Optional[str] = None @@ -66,20 +70,20 @@ def process( ): return instance - inputs = instance.get("inputs") - if inputs is None: - inputs = instance.get("input_fields") - outputs = instance.get("outputs") - if outputs is None: - outputs = instance.get("reference_fields") - inputs, outputs = self.preprocess_inputs_and_outputs(inputs, outputs) - - self.set_titles(inputs) - source = self.inputs_to_source(inputs) - instruction, target_prefix = self.inputs_to_instruction_and_target_prefix( - inputs + input_fields = instance.get("input_fields") + reference_fields = instance.get("reference_fields") + input_fields, reference_fields = self.preprocess_input_and_reference_fields( + input_fields, reference_fields + ) + + self.set_titles(input_fields) + source = self.input_fields_to_source(input_fields) + instruction, target_prefix = self.input_fields_to_instruction_and_target_prefix( + input_fields + ) + target, references = self.reference_fields_to_target_and_references( + reference_fields ) - target, references = self.outputs_to_target_and_references(outputs) return { **instance, @@ -91,7 +95,7 @@ def process( } @abstractmethod - def inputs_to_source(self, inputs: Dict[str, object]) -> Tuple[str, str]: + def input_fields_to_source(self, input_fields: Dict[str, object]) -> str: pass def set_titles(self, data): @@ -99,8 +103,8 @@ def set_titles(self, data): data[field] = data[field].title() @abstractmethod - def outputs_to_target_and_references( - self, outputs: Dict[str, object] + def reference_fields_to_target_and_references( + self, reference_fields: Dict[str, object] ) -> Tuple[str, List[str]]: pass @@ -129,20 +133,32 @@ def apply_formatting( class InputOutputTemplate(Template): """Generate field 'source' from fields designated as input, and fields 'target' and 'references' from fields designated as output, of the processed instance. - Args specify the formatting strings with which to glue together the input and output designated fields of the processed instance into one string ('source' and 'target'), and into a list of strings ('references'). + Args specify the formatting strings with which to glue together the input and reference fields of the processed instance into one string ('source' and 'target'), and into a list of strings ('references'). """ input_format: str output_format: str = None - def inputs_to_source(self, inputs: Dict[str, object]) -> Tuple[str, str]: + def input_fields_to_source( + self, input_fields: Dict[str, object] + ) -> Tuple[str, str]: return self.apply_formatting( - inputs, "input", self.input_format, "input_format", serialize=True + input_fields, + "input field", + self.input_format, + "input_format", + serialize=True, ) - def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str: + def reference_fields_to_target_and_references( + self, reference_fields: Dict[str, object] + ) -> str: target = self.apply_formatting( - outputs, "output", self.output_format, "output_format", serialize=True + reference_fields, + "reference field", + self.output_format, + "output_format", + serialize=True, ) references = [target] return target, references @@ -151,12 +167,22 @@ def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str: class InputOutputTemplateWithCustomTarget(InputOutputTemplate): reference: str - def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str: + def reference_fields_to_target_and_references( + self, reference_fields: Dict[str, object] + ) -> str: target = self.apply_formatting( - outputs, "output", self.output_format, "output_format", serialize=True + reference_fields, + "reference field", + self.output_format, + "output_format", + serialize=True, ) reference = self.apply_formatting( - outputs, "output", self.reference, "reference", serialize=True + reference_fields, + "reference field", + self.reference, + "reference", + serialize=True, ) return target, [reference] @@ -193,46 +219,50 @@ class PairwiseChoiceTemplate(InputOutputTemplate): choice_tie_label: str shuffle: bool - def verbalize_answer_field(self, outputs: Dict[str, object]): - answer = outputs[self.answer_field] + def verbalize_answer_field(self, reference_fields: Dict[str, object]): + answer = reference_fields[self.answer_field] assert answer in ["choice_a", "choice_b", "tie"] if answer == "choice_a": - outputs[self.answer_field] = self.choice_a_label + reference_fields[self.answer_field] = self.choice_a_label elif answer == "choice_b": - outputs[self.answer_field] = self.choice_b_label + reference_fields[self.answer_field] = self.choice_b_label else: - outputs[self.answer_field] = self.choice_tie_label + reference_fields[self.answer_field] = self.choice_tie_label - return outputs + return reference_fields - def shuffle_values(self, inputs: Dict[str, object], outputs: Dict[str, object]): + def shuffle_values( + self, input_fields: Dict[str, object], reference_fields: Dict[str, object] + ): outcome = random() # A float between 0 and 1 if outcome <= 0.5: - choice_a_value = inputs[self.choice_a_field] - choice_b_value = inputs[self.choice_b_field] + choice_a_value = input_fields[self.choice_a_field] + choice_b_value = input_fields[self.choice_b_field] - inputs[self.choice_a_field] = choice_a_value - inputs[self.choice_b_field] = choice_b_value + input_fields[self.choice_a_field] = choice_a_value + input_fields[self.choice_b_field] = choice_b_value - answer = outputs[self.answer_field] + answer = reference_fields[self.answer_field] assert answer in [ self.choice_a_label, self.choice_b_label, self.choice_tie_label, ] if answer == self.choice_a_label: - outputs[self.answer_field] = self.choice_b_label + reference_fields[self.answer_field] = self.choice_b_label elif answer == self.choice_b_label: - outputs[self.answer_field] = self.choice_a_label + reference_fields[self.answer_field] = self.choice_a_label - return inputs, outputs + return input_fields, reference_fields - def preprocess_inputs_and_outputs( - self, inputs: Dict[str, Any], outputs: Dict[str, Any] + def preprocess_input_and_reference_fields( + self, input_fields: Dict[str, Any], reference_fields: Dict[str, Any] ) -> Tuple[Dict[str, Any], Dict[str, Any]]: - outputs = self.verbalize_answer_field(outputs) - inputs, outputs = self.shuffle_values(inputs, outputs) - return inputs, outputs + reference_fields = self.verbalize_answer_field(reference_fields) + input_fields, reference_fields = self.shuffle_values( + input_fields, reference_fields + ) + return input_fields, reference_fields class DialogFieldsData(Artifact): @@ -247,9 +277,9 @@ class DialogTemplate(InputOutputTemplate): turns_separator: str = "\n\n" label_separator: str = " " - def process_dialog(self, inputs: Dict[str, object]): + def process_dialog(self, input_fields: Dict[str, object]): for dialog_fields in self.dialog_fields: - dialog = inputs[dialog_fields.dialog_field] + dialog = input_fields[dialog_fields.dialog_field] # TODO: update isoftype method to support Literal verification and check # it's List[Tuple[Literal["user", "assistant", "system"], str]] (Issue #799) assert isoftype(dialog, List[Tuple[str, str]]) @@ -269,24 +299,24 @@ def process_dialog(self, inputs: Dict[str, object]): elif turn_type == "system": dialog_str += f"{turns_separator}{system_role_label}{self.label_separator}{turn_text}" - inputs[dialog_fields.dialog_field] = dialog_str - return inputs + input_fields[dialog_fields.dialog_field] = dialog_str + return input_fields - def preprocess_inputs_and_outputs( - self, inputs: Dict[str, Any], outputs: Dict[str, Any] + def preprocess_input_and_reference_fields( + self, input_fields: Dict[str, Any], reference_fields: Dict[str, Any] ) -> Tuple[Dict[str, Any], Dict[str, Any]]: - return self.process_dialog(inputs), outputs + return self.process_dialog(input_fields), reference_fields class DialogPairwiseChoiceTemplate(DialogTemplate, PairwiseChoiceTemplate): - def preprocess_inputs_and_outputs( - self, inputs: Dict[str, Any], outputs: Dict[str, Any] + def preprocess_input_and_reference_fields( + self, input_fields: Dict[str, Any], reference_fields: Dict[str, Any] ) -> Tuple[Dict[str, Any], Dict[str, Any]]: - inputs, outputs = DialogTemplate.preprocess_inputs_and_outputs( - self, inputs, outputs + inputs, reference_fields = DialogTemplate.preprocess_input_and_reference_fields( + self, input_fields, reference_fields ) - return PairwiseChoiceTemplate.preprocess_inputs_and_outputs( - self, inputs, outputs + return PairwiseChoiceTemplate.preprocess_input_and_reference_fields( + self, input_fields, reference_fields ) @@ -347,53 +377,61 @@ def inputs_to_choices(self, data: Dict[str, object], choice_format: str) -> str: ) return enumrated_choices - def inputs_to_numerals(self, inputs: Dict[str, object]) -> Tuple[str, str]: - return self.inputs_to_choices(inputs, "{choice_numeral}") + def inputs_to_numerals(self, input_fields: Dict[str, object]) -> Tuple[str, str]: + return self.inputs_to_choices(input_fields, "{choice_numeral}") def prepare_multiple_choice_inputs( - self, inputs: Dict[str, object] + self, input_fields: Dict[str, object] ) -> Dict[str, object]: - choices = self.inputs_to_choices(inputs, self.source_choice_format) + choices = self.inputs_to_choices(input_fields, self.source_choice_format) return { - "numerals": self.inputs_to_numerals(inputs), - **inputs, + "numerals": self.inputs_to_numerals(input_fields), + **input_fields, self.choices_field: self.choices_separator.join(choices), } - def inputs_to_source(self, inputs: Dict[str, object]) -> Tuple[str, str]: - inputs = self.prepare_multiple_choice_inputs(inputs) + def input_fields_to_source( + self, input_fields: Dict[str, object] + ) -> Tuple[str, str]: + input_fields = self.prepare_multiple_choice_inputs(input_fields) return self.apply_formatting( - inputs, "input", self.input_format, "input_format", serialize=True + input_fields, + "input field", + self.input_format, + "input_format", + serialize=True, ) - def inputs_to_instruction_and_target_prefix(self, inputs): - inputs = self.prepare_multiple_choice_inputs(inputs) - return super().inputs_to_instruction_and_target_prefix(inputs) + def input_fields_to_instruction_and_target_prefix(self, input_fields): + input_fields = self.prepare_multiple_choice_inputs(input_fields) + return super().input_fields_to_instruction_and_target_prefix(input_fields) - def outputs_to_target_index(self, outputs: Dict[str, object]) -> str: - target = outputs[self.target_field] + def outputs_to_target_index(self, reference_fields: Dict[str, object]) -> str: + target = reference_fields[self.target_field] if not isinstance(target, int): try: - return outputs[self.choices_field].index(target) + return reference_fields[self.choices_field].index(target) except ValueError as e: raise ValueError( - f"MultipleChoiceTemplate could not locate textual target '{target}' in choices list: {outputs[self.choices_field]}" + f"MultipleChoiceTemplate could not locate textual target '{target}' in choices list: {reference_fields[self.choices_field]}" ) from e return target - def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str: - target = outputs[self.target_field] + def reference_fields_to_target_and_references( + self, reference_fields: Dict[str, object] + ) -> str: + target = reference_fields[self.target_field] if not isinstance(target, int): try: - target = outputs[self.choices_field].index(target) + target = reference_fields[self.choices_field].index(target) except ValueError as e: raise ValueError( - f"MultipleChoiceTemplate could not locate textual target '{target}' in choices list: {outputs[self.choices_field]}" + f"MultipleChoiceTemplate could not locate textual target '{target}' in choices list: {reference_fields[self.choices_field]}" ) from e - choices = self.inputs_to_choices(outputs, self.target_choice_format) + choices = self.inputs_to_choices(reference_fields, self.target_choice_format) try: target = choices[target] @@ -461,27 +499,35 @@ class YesNoTemplate(Template): yes_answer: str = "Yes" no_answer: str = "No" - def inputs_to_source(self, inputs: Dict[str, object]) -> Tuple[str, str]: + def input_fields_to_source( + self, input_fields: Dict[str, object] + ) -> Tuple[str, str]: return self.apply_formatting( - inputs, "input", self.input_format, "input_format", serialize=True + input_fields, + "input field", + self.input_format, + "input_format", + serialize=True, ) - def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str: + def reference_fields_to_target_and_references( + self, reference_fields: Dict[str, object] + ) -> str: try: - gold_class_names = outputs[self.label_field] + gold_class_names = reference_fields[self.label_field] except KeyError as e: raise RuntimeError( - f"Available outputs are {list(outputs.keys())}, missing required label field: '{self.label_field}'." + f"Available reference_fields are {list(reference_fields.keys())}, missing required label field: '{self.label_field}'." ) from e if not isinstance(gold_class_names, list): raise RuntimeError( f"Unexpected value for gold_class_names: '{gold_class_names}'. Expecting a list." ) try: - queried_class_name = outputs[self.class_field] + queried_class_name = reference_fields[self.class_field] except KeyError as e: raise RuntimeError( - f"Available outputs are {list(outputs.keys())}, missing required class field: '{self.class_field}'." + f"Available reference_fields are {list(reference_fields.keys())}, missing required class field: '{self.class_field}'." ) from e if not queried_class_name or not isinstance(queried_class_name, str): raise RuntimeError( @@ -514,17 +560,21 @@ def process_dict( pairs.append(key_val_sep.join(key_val)) return pairs_sep.join(pairs) - def inputs_to_source(self, inputs: Dict[str, object]) -> Tuple[str, str]: + def input_fields_to_source( + self, input_fields: Dict[str, object] + ) -> Tuple[str, str]: return self.process_dict( - inputs, + input_fields, key_val_sep=self.key_val_separator, pairs_sep=self.pairs_separator, use_keys=self.use_keys_for_inputs, ) - def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str: + def reference_fields_to_target_and_references( + self, reference_fields: Dict[str, object] + ) -> str: target = self.process_dict( - outputs, + reference_fields, key_val_sep=self.key_val_separator, pairs_sep=self.pairs_separator, use_keys=self.use_keys_for_outputs, @@ -535,21 +585,23 @@ def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str: class OutputQuantizingTemplate(InputOutputTemplate): quantum: Union[float, int] = 0.1 # Now supports both int and float - def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str: + def reference_fields_to_target_and_references( + self, reference_fields: Dict[str, object] + ) -> str: if isinstance(self.quantum, int): # When quantum is an int, format quantized values as ints quantized_outputs = { key: f"{int(round(value / self.quantum) * self.quantum)}" - for key, value in outputs.items() + for key, value in reference_fields.items() } else: # When quantum is a float, format quantized values with precision based on quantum quantum_str = f"{self.quantum:.10f}".rstrip("0").rstrip(".") quantized_outputs = { key: f"{round(value / self.quantum) * self.quantum:{quantum_str}}" - for key, value in outputs.items() + for key, value in reference_fields.items() } - return super().outputs_to_target_and_references(quantized_outputs) + return super().reference_fields_to_target_and_references(quantized_outputs) class MultiLabelTemplate(InputOutputTemplate): @@ -559,8 +611,10 @@ class MultiLabelTemplate(InputOutputTemplate): output_format: str = "{labels}" empty_label: str = "None" - def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str: - labels = outputs[self.labels_field] + def reference_fields_to_target_and_references( + self, reference_fields: Dict[str, object] + ) -> str: + labels = reference_fields[self.labels_field] if not isinstance(labels, list): raise ValueError( f"MultiLabelTemplate requires labels field '{self.labels_field}' to be a list. Got {self.labels_field}<{type(labels).__name__}>: {labels}" @@ -568,15 +622,19 @@ def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str: if len(labels) == 0: labels = [self.empty_label] labels_str = self.labels_separator.join(labels) - return super().outputs_to_target_and_references({self.labels_field: labels_str}) + return super().reference_fields_to_target_and_references( + {self.labels_field: labels_str} + ) class MultiReferenceTemplate(InputOutputTemplate): references_field: str = "references" random_reference: bool = False - def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> List[str]: - references = outputs[self.references_field] + def reference_fields_to_target_and_references( + self, reference_fields: Dict[str, object] + ) -> List[str]: + references = reference_fields[self.references_field] if not isoftype(references, List[str]): raise ValueError( f"MultiReferenceTemplate requires references field '{self.references_field}' to be List[str]. Got {self.references_field}<{type(references).__name__}>: {references}" @@ -587,7 +645,7 @@ def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> List[s ) if self.random_reference: - random_generator = new_random_generator(outputs) + random_generator = new_random_generator(reference_fields) target = random_generator.choice(references) else: target = references[0] @@ -607,11 +665,11 @@ class SpanLabelingBaseTemplate(MultiLabelTemplate): text_field: str = "text" labels_support: list = None - def extract_span_label_pairs(self, outputs): - spans_starts = outputs[self.spans_starts_field] - spans_ends = outputs[self.spans_ends_field] - text = outputs[self.text_field] - labels = outputs[self.labels_field] + def extract_span_label_pairs(self, reference_fields): + spans_starts = reference_fields[self.spans_starts_field] + spans_ends = reference_fields[self.spans_ends_field] + text = reference_fields[self.text_field] + labels = reference_fields[self.labels_field] spans = [] for span_start, span_end, label in zip(spans_starts, spans_ends, labels): @@ -622,12 +680,12 @@ def extract_span_label_pairs(self, outputs): if self.labels_support is None or span[3] in self.labels_support: yield span[2], span[3] - def outputs_to_target_and_references( - self, outputs: Dict[str, object] + def reference_fields_to_target_and_references( + self, reference_fields: Dict[str, object] ) -> Dict[str, object]: - span_labels_pairs = self.extract_span_label_pairs(outputs) + span_labels_pairs = self.extract_span_label_pairs(reference_fields) targets = self.span_label_pairs_to_targets(span_labels_pairs) - return super().outputs_to_target_and_references({"labels": targets}) + return super().reference_fields_to_target_and_references({"labels": targets}) @abstractmethod def span_label_pairs_to_targets(self, pairs): diff --git a/tests/library/test_format_and_template_interaction.py b/tests/library/test_format_and_template_interaction.py index 634c8605c..29b0a9b1b 100644 --- a/tests/library/test_format_and_template_interaction.py +++ b/tests/library/test_format_and_template_interaction.py @@ -8,7 +8,10 @@ class TestFormatAndTemplateInteraction(UnitxtTestCase): def test_interactions(self): - instance = {"inputs": {"question": "what?"}, "outputs": {"answer": "that!"}} + instance = { + "input_fields": {"question": "what?"}, + "reference_fields": {"answer": "that!"}, + } target = "that!" template_separated = InputOutputTemplate( diff --git a/tests/library/test_formats.py b/tests/library/test_formats.py index 8e339dd76..2a82018ff 100644 --- a/tests/library/test_formats.py +++ b/tests/library/test_formats.py @@ -11,8 +11,18 @@ def test_hf_system_format(self): instruction = "solve the math exercises" demo_instances = [ - {"source": "1+2", "target": "3", "instruction": instruction, "inputs": {}}, - {"source": "4-2", "target": "2", "instruction": instruction, "inputs": {}}, + { + "source": "1+2", + "target": "3", + "instruction": instruction, + "input_fields": {}, + }, + { + "source": "4-2", + "target": "2", + "instruction": instruction, + "input_fields": {}, + }, ] inputs = [ @@ -21,7 +31,7 @@ def test_hf_system_format(self): "target": "2", "instruction": instruction, "demos": demo_instances, - "inputs": {}, + "input_fields": {}, "target_prefix": "The answer is ", "system_prompt": "You are a smart assistant.", }, @@ -30,7 +40,7 @@ def test_hf_system_format(self): "target": "5", "instruction": instruction, "demos": demo_instances, - "inputs": {}, + "input_fields": {}, "target_prefix": "The answer is ", "system_prompt": "You are a smart assistant.", }, @@ -42,12 +52,12 @@ def test_hf_system_format(self): targets = [ { "target": "2", - "inputs": {}, + "input_fields": {}, "source": "<|system|>\nYou are a smart assistant.\nsolve the math exercises\n<|user|>\n1+2\n<|assistant|>\nThe answer is 3\n<|user|>\n4-2\n<|assistant|>\nThe answer is 2\n<|user|>\n1+1\n<|assistant|>\nThe answer is ", }, { "target": "5", - "inputs": {}, + "input_fields": {}, "source": "<|system|>\nYou are a smart assistant.\nsolve the math exercises\n<|user|>\n1+2\n<|assistant|>\nThe answer is 3\n<|user|>\n4-2\n<|assistant|>\nThe answer is 2\n<|user|>\n3+2\n<|assistant|>\nThe answer is ", }, ] @@ -63,8 +73,18 @@ def test_system_format(self): instruction = "solve the math exercises" demo_instances = [ - {"source": "1+2", "target": "3", "instruction": instruction, "inputs": {}}, - {"source": "4-2", "target": "2", "instruction": instruction, "inputs": {}}, + { + "source": "1+2", + "target": "3", + "instruction": instruction, + "input_fields": {}, + }, + { + "source": "4-2", + "target": "2", + "instruction": instruction, + "input_fields": {}, + }, ] inputs = [ @@ -73,28 +93,28 @@ def test_system_format(self): "target": "2", "instruction": instruction, "demos": demo_instances, - "inputs": {}, + "input_fields": {}, }, { "source": "3+2", "target": "5", "instruction": instruction, "demos": demo_instances, - "inputs": {}, + "input_fields": {}, }, { "source": "7-4", "target": "3", "instruction": instruction, "demos": demo_instances, - "inputs": {}, + "input_fields": {}, }, { "source": "12-3", "target": "9", "instruction": instruction, "demos": demo_instances, - "inputs": {}, + "input_fields": {}, }, ] @@ -108,22 +128,22 @@ def test_system_format(self): targets = [ { "target": "2", - "inputs": {}, + "input_fields": {}, "source": "User: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: solve the math exercises\n\n1+1\nAgent: ", }, { "target": "5", - "inputs": {}, + "input_fields": {}, "source": "User: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: solve the math exercises\n\n3+2\nAgent: ", }, { "target": "3", - "inputs": {}, + "input_fields": {}, "source": "User: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: solve the math exercises\n\n7-4\nAgent: ", }, { "target": "9", - "inputs": {}, + "input_fields": {}, "source": "User: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: solve the math exercises\n\n12-3\nAgent: ", }, ] @@ -145,22 +165,22 @@ def test_system_format(self): targets = [ { "target": "2", - "inputs": {}, + "input_fields": {}, "source": "Instruction: solve the math exercises\n\nUser: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: 1+1\nAgent: ", }, { "target": "5", - "inputs": {}, + "input_fields": {}, "source": "Instruction: solve the math exercises\n\nUser: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: 3+2\nAgent: ", }, { "target": "3", - "inputs": {}, + "input_fields": {}, "source": "Instruction: solve the math exercises\n\nUser: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: 7-4\nAgent: ", }, { "target": "9", - "inputs": {}, + "input_fields": {}, "source": "Instruction: solve the math exercises\n\nUser: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: 12-3\nAgent: ", }, ] @@ -187,22 +207,22 @@ def test_system_format(self): targets_no_instruction = [ { "target": "2", - "inputs": {}, + "input_fields": {}, "source": "User: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: 1+1\nAgent: ", }, { "target": "5", - "inputs": {}, + "input_fields": {}, "source": "User: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: 3+2\nAgent: ", }, { "target": "3", - "inputs": {}, + "input_fields": {}, "source": "User: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: 7-4\nAgent: ", }, { "target": "9", - "inputs": {}, + "input_fields": {}, "source": "User: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: 12-3\nAgent: ", }, ] @@ -218,7 +238,7 @@ def test_system_format(self): "source": 'This is my sentence: "was so bad"', "target": "negative", "references": ["negative"], - "inputs": {}, + "input_fields": {}, "instruction": "classify user sentence by its sentiment to either positive, or negative.", "demos": [ { @@ -247,7 +267,7 @@ def test_system_format(self): "source": 'Instruction:classify user sentence by its sentiment to either positive, or negative.\n\nUser:This is my sentence: "was so not good"\nAgent:negative\n\nUser:This is my sentence: "was so good"\nAgent:positive\n\nUser:This is my sentence: "was so bad"\nAgent:', "target": "negative", "references": ["negative"], - "inputs": {}, + "input_fields": {}, } self.assertDictEqual(result, target) @@ -256,7 +276,7 @@ def test_system_format(self): "source": 'This is my sentence: "was so bad"', "target": "negative", "references": ["negative"], - "inputs": {}, + "input_fields": {}, "instruction": "classify user sentence by its sentiment to either positive, or negative.", } system_format = SystemFormat( @@ -267,7 +287,7 @@ def test_system_format(self): target = { "source": 'Instruction:classify user sentence by its sentiment to either positive, or negative.\n\nUser:This is my sentence: "was so bad"\nAgent:', "target": "negative", - "inputs": {}, + "input_fields": {}, "references": ["negative"], } self.assertDictEqual(result, target) @@ -284,7 +304,7 @@ def test_system_format(self): "source": 'This is my sentence: "was so bad"', "target": "negative", "references": ["negative"], - "inputs": {}, + "input_fields": {}, "instruction": "classify user sentence by its sentiment to either positive, or negative.", "demos": [ { @@ -307,7 +327,7 @@ def test_system_format(self): "source": '[INST] <>\nclassify user sentence by its sentiment to either positive, or negative.\n\nUser: This is my sentence: "was so not good"\nAgent: negative\n\nUser: This is my sentence: "was so good"\nAgent: positive\n\nUser: This is my sentence: "was so bad"\nAgent: [/INST]', "target": "negative", "references": ["negative"], - "inputs": {}, + "input_fields": {}, } self.assertDictEqual(result, target) @@ -323,8 +343,18 @@ def test_system_format_with_args(self): instruction = "solve the math exercises" demo_instances = [ - {"source": "1+2", "target": "3", "instruction": instruction, "inputs": {}}, - {"source": "4-2", "target": "2", "instruction": instruction, "inputs": {}}, + { + "source": "1+2", + "target": "3", + "instruction": instruction, + "input_fields": {}, + }, + { + "source": "4-2", + "target": "2", + "instruction": instruction, + "input_fields": {}, + }, ] inputs = [ @@ -333,50 +363,50 @@ def test_system_format_with_args(self): "target": "2", "instruction": instruction, "demos": demo_instances, - "inputs": {}, + "input_fields": {}, }, { "source": "3+2", "target": "5", "instruction": instruction, "demos": demo_instances, - "inputs": {}, + "input_fields": {}, }, { "source": "7-4", "target": "3", "instruction": instruction, "demos": demo_instances, - "inputs": {}, + "input_fields": {}, }, { "source": "12-3", "target": "9", "instruction": instruction, "demos": demo_instances, - "inputs": {}, + "input_fields": {}, }, ] targets = [ { "target": "2", - "inputs": {}, + "input_fields": {}, "source": "User: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: solve the math exercises\n\n1+1\nAgent: ", }, { "target": "5", - "inputs": {}, + "input_fields": {}, "source": "User: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: solve the math exercises\n\n3+2\nAgent: ", }, { "target": "3", - "inputs": {}, + "input_fields": {}, "source": "User: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: solve the math exercises\n\n7-4\nAgent: ", }, { "target": "9", - "inputs": {}, + "input_fields": {}, "source": "User: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: solve the math exercises\n\n12-3\nAgent: ", }, ] diff --git a/tests/library/test_metrics.py b/tests/library/test_metrics.py index f1a3f27d9..3a8378f44 100644 --- a/tests/library/test_metrics.py +++ b/tests/library/test_metrics.py @@ -52,7 +52,7 @@ TokenOverlap, UnsortedListExactMatch, ) -from unitxt.test_utils.metrics import apply_metric +from unitxt.test_utils.metrics import apply_metric, check_scores from tests.utils import UnitxtTestCase @@ -1187,8 +1187,8 @@ def test_perplexity_with_prefix(self): ) expected_global_result = { - "my_perplexity": 0.05986589565873146, - "score": 0.05986589565873146, + "my_perplexity": 0.06, + "score": 0.06, "score_name": "my_perplexity", } @@ -1199,18 +1199,21 @@ def test_perplexity_with_prefix(self): for key, value in global_result.items() if key in expected_global_result } - self.assertDictEqual(global_result, expected_global_result) - instance_targets = [ + expected_instance_results = [ { - "my_perplexity": 0.05986589565873146, - "score": 0.05986589565873146, + "my_perplexity": 0.06, + "score": 0.06, "score_name": "my_perplexity", - "my_reference_scores": [0.05986589565873146], + "my_reference_scores": [0.06], } ] - for output, target in zip(outputs, instance_targets): - self.assertDictEqual(output["score"]["instance"], target) + check_scores( + expected_global_result, + expected_instance_results, + global_outputs=outputs[0]["score"]["global"], + instance_outputs=[outputs[0]["score"]["instance"]], + ) class TestConfidenceIntervals(UnitxtTestCase): diff --git a/tests/library/test_operators.py b/tests/library/test_operators.py index 6651cfa18..bcc4ddfb6 100644 --- a/tests/library/test_operators.py +++ b/tests/library/test_operators.py @@ -2839,10 +2839,13 @@ def test_render_demonstrations(self): instance = { "demos": [ { - "inputs": {"text": "was so not good"}, - "outputs": {"label": "negative"}, + "input_fields": {"text": "was so not good"}, + "reference_fields": {"label": "negative"}, + }, + { + "input_fields": {"text": "was so good"}, + "reference_fields": {"label": "positive"}, }, - {"inputs": {"text": "was so good"}, "outputs": {"label": "positive"}}, ] } @@ -2852,8 +2855,8 @@ def test_render_demonstrations(self): target = { "demos": [ { - "inputs": {"text": "was so not good"}, - "outputs": {"label": "negative"}, + "input_fields": {"text": "was so not good"}, + "reference_fields": {"label": "negative"}, "source": 'This is my sentence: "was so not good"', "target": "negative", "references": ["negative"], @@ -2861,8 +2864,8 @@ def test_render_demonstrations(self): "target_prefix": "", }, { - "inputs": {"text": "was so good"}, - "outputs": {"label": "positive"}, + "input_fields": {"text": "was so good"}, + "reference_fields": {"label": "positive"}, "source": 'This is my sentence: "was so good"', "target": "positive", "references": ["positive"], @@ -2882,12 +2885,12 @@ def test_render_demonstrations_multi_reference(self): instance = { "demos": [ { - "inputs": {"text": "who was he?"}, - "outputs": {"answer": ["Dan", "Yossi"]}, + "input_fields": {"text": "who was he?"}, + "reference_fields": {"answer": ["Dan", "Yossi"]}, }, { - "inputs": {"text": "who was she?"}, - "outputs": {"answer": ["Shira", "Yael"]}, + "input_fields": {"text": "who was she?"}, + "reference_fields": {"answer": ["Shira", "Yael"]}, }, ] } @@ -2898,8 +2901,8 @@ def test_render_demonstrations_multi_reference(self): target = { "demos": [ { - "inputs": {"text": "who was he?"}, - "outputs": {"answer": ["Dan", "Yossi"]}, + "input_fields": {"text": "who was he?"}, + "reference_fields": {"answer": ["Dan", "Yossi"]}, "source": "This is my sentence: who was he?", "target": "Dan", "references": ["Dan", "Yossi"], @@ -2907,8 +2910,8 @@ def test_render_demonstrations_multi_reference(self): "target_prefix": "", }, { - "inputs": {"text": "who was she?"}, - "outputs": {"answer": ["Shira", "Yael"]}, + "input_fields": {"text": "who was she?"}, + "reference_fields": {"answer": ["Shira", "Yael"]}, "source": "This is my sentence: who was she?", "target": "Shira", "references": ["Shira", "Yael"], @@ -2925,7 +2928,7 @@ def test_icl_format_with_demonstrations(self): "source": "1+1", "target": "2", "instruction": "solve the math exercises", - "inputs": {}, + "input_fields": {}, } demos_instances = [ {"source": "1+2", "target": "3", "instruction": "solve the math exercises"}, @@ -2964,7 +2967,7 @@ def test_system_format_with_demonstrations_and_instruction_after_demos( instance = { "source": "1+1", "target": "2", - "inputs": {}, + "input_fields": {}, "instruction": "solve the math exercises", "demos": demo_instances, } @@ -2993,7 +2996,7 @@ def test_system_format_without_demonstrations(self): "source": "1+1", "target": "2", "instruction": "solve the math exercises", - "inputs": {}, + "input_fields": {}, } target = """Instruction:solve the math exercises @@ -3011,7 +3014,7 @@ def test_system_format_without_demonstrations(self): self.assertEqual(instance["source"], target) def test_model_input_formatter_without_demonstrations_or_instruction(self): - instance = {"source": "1+1", "target": "2", "inputs": {}} + instance = {"source": "1+1", "target": "2", "input_fields": {}} target = """User:1+1 Agent:""" @@ -3024,7 +3027,12 @@ def test_model_input_formatter_without_demonstrations_or_instruction(self): self.assertEqual(instance_out["source"], target) def test_system_format_without_demonstrations_and_empty_instruction(self): - instance = {"source": "1+1", "target": "2", "instruction": "", "inputs": {}} + instance = { + "source": "1+1", + "target": "2", + "instruction": "", + "input_fields": {}, + } target = """User:1+1 Agent:""" diff --git a/tests/library/test_templates.py b/tests/library/test_templates.py index d3fcb6a25..9179d3870 100644 --- a/tests/library/test_templates.py +++ b/tests/library/test_templates.py @@ -27,8 +27,10 @@ def test_span_labeling_template_escaping(self): inputs = [ { - "inputs": {"text": "John,: Doe is from New York and works at Goo:gle."}, - "outputs": { + "input_fields": { + "text": "John,: Doe is from New York and works at Goo:gle." + }, + "reference_fields": { "spans_starts": [0, 19, 41], "spans_ends": [10, 27, 48], "labels": ["PER", "LOC", "ORG"], @@ -36,10 +38,10 @@ def test_span_labeling_template_escaping(self): }, }, { - "inputs": { + "input_fields": { "text": "John,: Doe is from New York and works at Goo:gle.", }, - "outputs": { + "reference_fields": { "spans_starts": [], "spans_ends": [], "labels": [], @@ -50,8 +52,10 @@ def test_span_labeling_template_escaping(self): targets = [ { - "inputs": {"text": "John,: Doe is from New York and works at Goo:gle."}, - "outputs": { + "input_fields": { + "text": "John,: Doe is from New York and works at Goo:gle." + }, + "reference_fields": { "spans_starts": [0, 19, 41], "spans_ends": [10, 27, 48], "labels": ["PER", "LOC", "ORG"], @@ -64,10 +68,10 @@ def test_span_labeling_template_escaping(self): "target_prefix": "", }, { - "inputs": { + "input_fields": { "text": "John,: Doe is from New York and works at Goo:gle.", }, - "outputs": { + "reference_fields": { "spans_starts": [], "spans_ends": [], "labels": [], @@ -88,19 +92,19 @@ def test_multi_label_template(self): inputs = [ { - "inputs": {"text": "hello world"}, - "outputs": {"labels": ["cat", "dog"]}, + "input_fields": {"text": "hello world"}, + "reference_fields": {"labels": ["cat", "dog"]}, }, { - "inputs": {"text": "hello world"}, - "outputs": {"labels": ["man", "woman", "dog"]}, + "input_fields": {"text": "hello world"}, + "reference_fields": {"labels": ["man", "woman", "dog"]}, }, ] targets = [ { - "inputs": {"text": "hello world"}, - "outputs": {"labels": ["cat", "dog"]}, + "input_fields": {"text": "hello world"}, + "reference_fields": {"labels": ["cat", "dog"]}, "source": "hello world", "target": "cat, dog", "references": ["cat, dog"], @@ -108,8 +112,8 @@ def test_multi_label_template(self): "target_prefix": "", }, { - "inputs": {"text": "hello world"}, - "outputs": {"labels": ["man", "woman", "dog"]}, + "input_fields": {"text": "hello world"}, + "reference_fields": {"labels": ["man", "woman", "dog"]}, "source": "hello world", "target": "man, woman, dog", "references": ["man, woman, dog"], @@ -129,15 +133,15 @@ def _test_multi_reference_template(self, target, random_reference): inputs = [ { - "inputs": {"text": "who was he?"}, - "outputs": {"answer": ["Dan", "Yossi"]}, + "input_fields": {"text": "who was he?"}, + "reference_fields": {"answer": ["Dan", "Yossi"]}, } ] targets = [ { - "inputs": {"text": "who was he?"}, - "outputs": {"answer": ["Dan", "Yossi"]}, + "input_fields": {"text": "who was he?"}, + "reference_fields": {"answer": ["Dan", "Yossi"]}, "source": "This is my sentence: who was he?", "target": target, "references": ["Dan", "Yossi"], @@ -161,8 +165,8 @@ def _test_multi_reference_template_with_exception( input_format="This is my sentence: {text}", references_field="answer" ) instance = { - "inputs": {"text": "who was he?"}, - "outputs": {"answer": references}, + "input_fields": {"text": "who was he?"}, + "reference_fields": {"answer": references}, } with self.assertRaises(ValueError) as e: @@ -191,29 +195,35 @@ def test_input_output_template_and_standard_template(self): inputs = [ { - "inputs": {"labels": ["positive", "negative"], "text": "hello world"}, - "outputs": {"label": "positive"}, + "input_fields": { + "labels": ["positive", "negative"], + "text": "hello world", + }, + "reference_fields": {"label": "positive"}, }, { - "inputs": { + "input_fields": { "labels": ["positive", "negative"], "text": ["hello world\n", "hell"], }, - "outputs": {"label": "positive"}, + "reference_fields": {"label": "positive"}, }, { - "inputs": { + "input_fields": { "labels": ["positive", "negative"], "text": ["hello world\n", "hell"], }, - "outputs": {"label": ["positive", "1"]}, + "reference_fields": {"label": ["positive", "1"]}, }, ] targets = [ { - "inputs": {"labels": ["positive", "negative"], "text": "hello world"}, - "outputs": {"label": "positive"}, + "input_fields": { + "labels": ["positive", "negative"], + "text": "hello world", + }, + "reference_fields": {"label": "positive"}, "source": "This is my text:'hello world'", "target": "positive", "references": ["positive"], @@ -221,11 +231,11 @@ def test_input_output_template_and_standard_template(self): "target_prefix": "Sentiment is: ", }, { - "inputs": { + "input_fields": { "labels": ["positive", "negative"], "text": ["hello world\n", "hell"], }, - "outputs": {"label": "positive"}, + "reference_fields": {"label": "positive"}, "source": "This is my text:'hello world\n, hell'", "target": "positive", "references": ["positive"], @@ -233,11 +243,11 @@ def test_input_output_template_and_standard_template(self): "target_prefix": "Sentiment is: ", }, { - "inputs": { + "input_fields": { "labels": ["positive", "negative"], "text": ["hello world\n", "hell"], }, - "outputs": {"label": ["positive", "1"]}, + "reference_fields": {"label": ["positive", "1"]}, "source": "This is my text:'hello world\n, hell'", "target": "positive, 1", "references": ["positive, 1"], @@ -261,7 +271,7 @@ def test_input_output_template_and_standard_template(self): with self.assertRaises(TemplateFormatKeyError) as ke: err_input_template.process(inputs[0]) self.assertEqual( - "\"Available inputs are [labels, text] but InputOutputTemplate.input_format format requires a different ones: 'This is my text:'{no_text}''\"", + "\"Available input fields are [labels, text] but InputOutputTemplate.input_format format requires a different ones: 'This is my text:'{no_text}''\"", str(ke.exception), ) @@ -271,7 +281,7 @@ def test_input_output_template_and_standard_template(self): with self.assertRaises(TemplateFormatKeyError) as ke: err_output_template.process(inputs[0]) self.assertEqual( - "\"Available outputs are [label] but InputOutputTemplate.output_format format requires a different ones: '{no_label}'\"", + "\"Available reference fields are [label] but InputOutputTemplate.output_format format requires a different ones: '{no_label}'\"", str(ke.exception), ) @@ -286,15 +296,21 @@ def test_input_output_reference_template_and_standard_template(self): inputs = [ { - "inputs": {"labels": ["positive", "negative"], "text": "hello world"}, - "outputs": {"label": "positive", "reference": "1"}, + "input_fields": { + "labels": ["positive", "negative"], + "text": "hello world", + }, + "reference_fields": {"label": "positive", "reference": "1"}, }, ] targets = [ { - "inputs": {"labels": ["positive", "negative"], "text": "hello world"}, - "outputs": {"label": "positive", "reference": "1"}, + "input_fields": { + "labels": ["positive", "negative"], + "text": "hello world", + }, + "reference_fields": {"label": "positive", "reference": "1"}, "source": "This is my text:'hello world'", "target": "positive", "references": ["1"], @@ -306,23 +322,25 @@ def test_input_output_reference_template_and_standard_template(self): check_operator(template, inputs, targets, tester=self) with self.assertRaises(KeyError): - template.outputs_to_target_and_references( - outputs={"label": "positive", "references": "1"} + template.reference_fields_to_target_and_references( + reference_fields={"label": "positive", "references": "1"} ) class ToCoverTemplate(Template): - def inputs_to_source(self, inputs: Dict[str, object]) -> Tuple[str, str]: - ret = super().inputs_to_source(inputs) + def input_fields_to_source( + self, inputs: Dict[str, object] + ) -> Tuple[str, str]: + ret = super().input_fields_to_source(inputs) return (ret, ret) - def outputs_to_target_and_references( + def reference_fields_to_target_and_references( self, outputs: Dict[str, object] ) -> Tuple[str, List[str]]: - return super().outputs_to_target_and_references(outputs) + return super().reference_fields_to_target_and_references(outputs) to_cover_template = ToCoverTemplate() - to_cover_template.inputs_to_source({"a": 1}) - to_cover_template.outputs_to_target_and_references({"a": 1}) + to_cover_template.input_fields_to_source({"a": 1}) + to_cover_template.reference_fields_to_target_and_references({"a": 1}) class ToCoverTemplatesDict(TemplatesDict): def verify(self): @@ -344,7 +362,7 @@ def test_yes_no_template_process_input(self): "Is text_b of news?": {"text": "text_b", "class": "news"}, } for expected_processed_input, inputs in processed_input_to_inputs.items(): - processed = template.inputs_to_source(inputs) + processed = template.input_fields_to_source(inputs) self.assertEqual(expected_processed_input, processed) def test_yes_no_template_process_input_missing_input_field(self): @@ -355,9 +373,9 @@ def test_yes_no_template_process_input_missing_input_field(self): ) with self.assertRaises(TemplateFormatKeyError) as cm: wrong_field_name = "wrong_field_name" - template.inputs_to_source(inputs={wrong_field_name: ["news"]}) + template.input_fields_to_source(input_fields={wrong_field_name: ["news"]}) self.assertEqual( - "\"Available inputs are [wrong_field_name] but YesNoTemplate.input_format format requires a different ones: 'Expecting field {class} in input.'\"", + "\"Available input fields are [wrong_field_name] but YesNoTemplate.input_format format requires a different ones: 'Expecting field {class} in input.'\"", str(cm.exception), ) @@ -380,7 +398,9 @@ def test_yes_no_template_process_output(self): yes_answer: {label_field: ["news", "sports"], class_field: "news"}, } for expected_processed_output, outputs in processed_output_to_outputs.items(): - processed, references = template.outputs_to_target_and_references(outputs) + processed, references = template.reference_fields_to_target_and_references( + outputs + ) self.assertEqual(expected_processed_output, processed) self.assertEqual(references, [expected_processed_output]) @@ -397,17 +417,17 @@ def test_yes_no_template_process_output_missing_fields(self): with self.assertRaises(RuntimeError) as cm: outputs = {class_field: "news"} - template.outputs_to_target_and_references(outputs=outputs) + template.reference_fields_to_target_and_references(reference_fields=outputs) self.assertEqual( - f"Available outputs are {list(outputs.keys())}, missing required label field: '{label_field}'.", + f"Available reference_fields are {list(outputs.keys())}, missing required label field: '{label_field}'.", str(cm.exception), ) with self.assertRaises(RuntimeError) as cm: outputs = {label_field: ["news", "sports"]} - template.outputs_to_target_and_references(outputs=outputs) + template.reference_fields_to_target_and_references(reference_fields=outputs) self.assertEqual( - f"Available outputs are {list(outputs.keys())}, missing required class field: '{class_field}'.", + f"Available reference_fields are {list(outputs.keys())}, missing required class field: '{class_field}'.", str(cm.exception), ) @@ -419,8 +439,8 @@ def _test_with_wrong_labels_value(wrong_labels_value): input_format="", class_field="", label_field="labels" ) with self.assertRaises(RuntimeError) as cm: - template.outputs_to_target_and_references( - outputs={"labels": wrong_labels_value} + template.reference_fields_to_target_and_references( + reference_fields={"labels": wrong_labels_value} ) self.assertEqual( f"Unexpected value for gold_class_names: '{wrong_labels_value}'. Expecting a list.", @@ -439,8 +459,8 @@ def _test_with_wrong_class_value(wrong_class_value): input_format="", class_field=class_field, label_field=label_field ) with self.assertRaises(RuntimeError) as cm: - template.outputs_to_target_and_references( - outputs={ + template.reference_fields_to_target_and_references( + reference_fields={ label_field: ["news"], class_field: wrong_class_value, } @@ -462,8 +482,10 @@ def test_span_labeling_template_one_entity_escaping(self): inputs = [ { - "inputs": {"text": "John,: Doe is from New York and works at Goo:gle."}, - "outputs": { + "input_fields": { + "text": "John,: Doe is from New York and works at Goo:gle." + }, + "reference_fields": { "spans_starts": [0, 19, 41], "spans_ends": [10, 27, 48], "labels": ["PER", "PER", "ORG"], @@ -471,10 +493,10 @@ def test_span_labeling_template_one_entity_escaping(self): }, }, { - "inputs": { + "input_fields": { "text": "John,: Doe is from New York and works at Goo:gle.", }, - "outputs": { + "reference_fields": { "spans_starts": [], "spans_ends": [], "labels": [], @@ -485,8 +507,10 @@ def test_span_labeling_template_one_entity_escaping(self): targets = [ { - "inputs": {"text": "John,: Doe is from New York and works at Goo:gle."}, - "outputs": { + "input_fields": { + "text": "John,: Doe is from New York and works at Goo:gle." + }, + "reference_fields": { "spans_starts": [0, 19, 41], "spans_ends": [10, 27, 48], "labels": ["PER", "PER", "ORG"], @@ -499,10 +523,10 @@ def test_span_labeling_template_one_entity_escaping(self): "target_prefix": "", }, { - "inputs": { + "input_fields": { "text": "John,: Doe is from New York and works at Goo:gle.", }, - "outputs": { + "reference_fields": { "spans_starts": [], "spans_ends": [], "labels": [], @@ -523,8 +547,10 @@ def test_span_labeling_json_template(self): inputs = [ { - "inputs": {"text": "John,: Doe is from New York and works at Goo:gle."}, - "outputs": { + "input_fields": { + "text": "John,: Doe is from New York and works at Goo:gle." + }, + "reference_fields": { "spans_starts": [0, 19, 41], "spans_ends": [10, 27, 48], "labels": ["PER", "PER", "ORG"], @@ -532,10 +558,10 @@ def test_span_labeling_json_template(self): }, }, { - "inputs": { + "input_fields": { "text": "John,: Doe is from New York and works at Goo:gle.", }, - "outputs": { + "reference_fields": { "spans_starts": [], "spans_ends": [], "labels": [], @@ -546,8 +572,10 @@ def test_span_labeling_json_template(self): targets = [ { - "inputs": {"text": "John,: Doe is from New York and works at Goo:gle."}, - "outputs": { + "input_fields": { + "text": "John,: Doe is from New York and works at Goo:gle." + }, + "reference_fields": { "spans_starts": [0, 19, 41], "spans_ends": [10, 27, 48], "labels": ["PER", "PER", "ORG"], @@ -562,10 +590,10 @@ def test_span_labeling_json_template(self): "target_prefix": "", }, { - "inputs": { + "input_fields": { "text": "John,: Doe is from New York and works at Goo:gle.", }, - "outputs": { + "reference_fields": { "spans_starts": [], "spans_ends": [], "labels": [], @@ -662,7 +690,7 @@ def test_multiple_choice_template(self): with self.assertRaises(ValueError) as ve: check_operator(template, inputs, targets, tester=self) self.assertEqual( - "Error processing instance '0' from stream 'test' in MultipleChoiceTemplate due to: \"Available inputs are [numerals, choices, text] but MultipleChoiceTemplate.input_format format requires a different ones: 'Text: {no_text}, Choices: {no_choices}.'\"", + "Error processing instance '0' from stream 'test' in MultipleChoiceTemplate due to: \"Available input fields are [numerals, choices, text] but MultipleChoiceTemplate.input_format format requires a different ones: 'Text: {no_text}, Choices: {no_choices}.'\"", str(ve.exception), ) @@ -751,7 +779,7 @@ def test_multiple_choice_template_with_shuffle(self): with self.assertRaises(ValueError) as ve: check_operator(template, inputs, targets, tester=self) self.assertEqual( - "Error processing instance '0' from stream 'test' in MultipleChoiceTemplate due to: \"Available inputs are [numerals, choices, text] but MultipleChoiceTemplate.input_format format requires a different ones: 'Text: {no_text}, Choices: {no_choices}.'\"", + "Error processing instance '0' from stream 'test' in MultipleChoiceTemplate due to: \"Available input fields are [numerals, choices, text] but MultipleChoiceTemplate.input_format format requires a different ones: 'Text: {no_text}, Choices: {no_choices}.'\"", str(ve.exception), ) @@ -780,15 +808,18 @@ def test_key_val_template_int_list(self): self.assertEqual(result, target) def test_render_template(self): - instance = {"inputs": {"text": "was so bad"}, "outputs": {"label": "negative"}} + instance = { + "input_fields": {"text": "was so bad"}, + "reference_fields": {"label": "negative"}, + } template = InputOutputTemplate( input_format='This is my sentence: "{text}"', output_format="{label}" ) result = template.process(instance) target = { - "inputs": {"text": "was so bad"}, - "outputs": {"label": "negative"}, + "input_fields": {"text": "was so bad"}, + "reference_fields": {"label": "negative"}, "source": 'This is my sentence: "was so bad"', "target": "negative", "references": ["negative"], @@ -802,14 +833,14 @@ def test_render_multi_reference_template(self): input_format="This is my sentence: {text}", references_field="answer" ) instance = { - "inputs": {"text": "who was he?"}, - "outputs": {"answer": ["Dan", "Yossi"]}, + "input_fields": {"text": "who was he?"}, + "reference_fields": {"answer": ["Dan", "Yossi"]}, } result = template.process(instance) target = { - "inputs": {"text": "who was he?"}, - "outputs": {"answer": ["Dan", "Yossi"]}, + "input_fields": {"text": "who was he?"}, + "reference_fields": {"answer": ["Dan", "Yossi"]}, "source": "This is my sentence: who was he?", "target": "Dan", "references": ["Dan", "Yossi"],