diff --git a/docs/docs/adding_template.rst b/docs/docs/adding_template.rst
index b61cd3e49..aa870d7c0 100644
--- a/docs/docs/adding_template.rst
+++ b/docs/docs/adding_template.rst
@@ -77,30 +77,32 @@ Making Your Custom Template
 ----------------------------
 
 In order to make your own template, you need to create a class inheriting from `Template` and
-implementing its two abstract methods:
+implementing its abstract methods:
 
 .. code-block:: python
 
-    @abstractmethod
-    def inputs_to_source(self, inputs: Dict[str, object]) -> Tuple[str, str]:
+     @abstractmethod
+    def input_fields_to_source(self, input_fields: Dict[str, object]) -> str:
+        """Create the textual input for the model from the input fields"""
         pass
 
     @abstractmethod
-    def outputs_to_target_and_references(
-        self, outputs: Dict[str, object]
-    ) -> Tuple[str, List[str]]:
+    def reference_fields_to_target_and_references(self, reference_fields: Dict[str, object]) -> Tuple[str, List[str]]:
+        """Create a list of references from the reference fields. Also returns one of the references
+           as the 'target' - the reference used if the instance is used as a demonstration."
         pass
 
-For instance:
+    
+
+For instance, this templates passes all the input fields to the model as a json string.
+It also formats the references , by taking two of the dataset reference fields the 'top_answer' and 'alternative_answer'.
 
 .. code-block:: python
 
     class MyCustomTemplate(Template):
 
-        def inputs_to_source(self, inputs: Dict[str, object]) -> Tuple[str, str]:
-            return str(inputs) # use all the task inputs fields in their dictionary look
-
-        def outputs_to_target_and_references(
-            self, outputs: Dict[str, object]
-        ) -> Tuple[str, List[str]]:
-            return outputs["label"], [outputs["label"]]
+        def input_fields_to_source(self, inputs_fields: Dict[str, object]) -> str:
+            return json.dumps(inputs_fields) # provide the json string with all fields as the input to the model
+        def reference_fields_to_target_and_references(self, reference_fields: Dict[str, object]) -> Tuple[str, List[str]]
+            return outputs_fields["top_answer"],  # target
+                   [outputs_fields["top_answer"],outputs_fields["alternative_answer"]]   # all references
diff --git a/src/unitxt/llm_as_judge.py b/src/unitxt/llm_as_judge.py
index 5a7f11ad4..6a76a55d1 100644
--- a/src/unitxt/llm_as_judge.py
+++ b/src/unitxt/llm_as_judge.py
@@ -43,7 +43,10 @@ def _get_input_instances(self, task_data: List[Dict]) -> List:
                 instance = SequentialOperator(
                     steps=[template, "formats.empty"]
                 ).process_instance(
-                    {"inputs": task_data_instance, "outputs": task_data_instance}
+                    {
+                        "input_fields": task_data_instance,
+                        "reference_fields": task_data_instance,
+                    }
                 )
                 instances.append(instance["source"])
                 """
diff --git a/src/unitxt/task.py b/src/unitxt/task.py
index bbe26620d..6fdca190c 100644
--- a/src/unitxt/task.py
+++ b/src/unitxt/task.py
@@ -34,8 +34,8 @@ class Task(InstanceOperator):
             Will not overwrite values if already provided in a given instance.
 
     The output instance contains three fields:
-        "inputs" whose value is a sub-dictionary of the input instance, consisting of all the fields listed in Arg 'input_fields'.
-        "outputs" -- for the fields listed in Arg "outputs".
+        "input_fields" whose value is a sub-dictionary of the input instance, consisting of all the fields listed in Arg 'input_fields'.
+        "reference_fields" -- for the fields listed in Arg "reference_fields".
         "metrics" -- to contain the value of Arg 'metrics'
     """
 
diff --git a/src/unitxt/templates.py b/src/unitxt/templates.py
index 7ef322b55..6bbd11603 100644
--- a/src/unitxt/templates.py
+++ b/src/unitxt/templates.py
@@ -28,7 +28,7 @@ class Template(InstanceOperator):
     Args:
         skip_rendered_instance (bool): if "source", "target", and "references" are already defined fields in the instance, skip its processing
         postprocessors: a list of strings being artifact names of text processors, to be applied on the model output
-        instruction: a formatting string that yields an instruction with potential participation of values from the "inputs" part of the instance
+        instruction: a formatting string that yields an instruction with potential participation of values from the "input_fields" part of the instance
         target_prefix: a string to be used to format the prompt. Not a formatting string.
 
     """
@@ -41,19 +41,23 @@ class Template(InstanceOperator):
     target_prefix: str = NonPositionalField(default="")
     title_fields: List[str] = NonPositionalField(default_factory=list)
 
-    def inputs_to_instruction_and_target_prefix(self, inputs):
+    def input_fields_to_instruction_and_target_prefix(self, input_fields):
         instruction = self.apply_formatting(
-            inputs, "input", self.instruction, "instruction", serialize=True
+            input_fields, "input field", self.instruction, "instruction", serialize=True
         )
         target_prefix = self.apply_formatting(
-            inputs, "input", self.target_prefix, "target_prefix", serialize=True
+            input_fields,
+            "input field",
+            self.target_prefix,
+            "target_prefix",
+            serialize=True,
         )
         return instruction, target_prefix
 
-    def preprocess_inputs_and_outputs(
-        self, inputs: Dict[str, Any], outputs: Dict[str, Any]
+    def preprocess_input_and_reference_fields(
+        self, input_fields: Dict[str, Any], reference_fields: Dict[str, Any]
     ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
-        return inputs, outputs
+        return input_fields, reference_fields
 
     def process(
         self, instance: Dict[str, Any], stream_name: Optional[str] = None
@@ -66,20 +70,20 @@ def process(
             ):
                 return instance
 
-        inputs = instance.get("inputs")
-        if inputs is None:
-            inputs = instance.get("input_fields")
-        outputs = instance.get("outputs")
-        if outputs is None:
-            outputs = instance.get("reference_fields")
-        inputs, outputs = self.preprocess_inputs_and_outputs(inputs, outputs)
-
-        self.set_titles(inputs)
-        source = self.inputs_to_source(inputs)
-        instruction, target_prefix = self.inputs_to_instruction_and_target_prefix(
-            inputs
+        input_fields = instance.get("input_fields")
+        reference_fields = instance.get("reference_fields")
+        input_fields, reference_fields = self.preprocess_input_and_reference_fields(
+            input_fields, reference_fields
+        )
+
+        self.set_titles(input_fields)
+        source = self.input_fields_to_source(input_fields)
+        instruction, target_prefix = self.input_fields_to_instruction_and_target_prefix(
+            input_fields
+        )
+        target, references = self.reference_fields_to_target_and_references(
+            reference_fields
         )
-        target, references = self.outputs_to_target_and_references(outputs)
 
         return {
             **instance,
@@ -91,7 +95,7 @@ def process(
         }
 
     @abstractmethod
-    def inputs_to_source(self, inputs: Dict[str, object]) -> Tuple[str, str]:
+    def input_fields_to_source(self, input_fields: Dict[str, object]) -> str:
         pass
 
     def set_titles(self, data):
@@ -99,8 +103,8 @@ def set_titles(self, data):
             data[field] = data[field].title()
 
     @abstractmethod
-    def outputs_to_target_and_references(
-        self, outputs: Dict[str, object]
+    def reference_fields_to_target_and_references(
+        self, reference_fields: Dict[str, object]
     ) -> Tuple[str, List[str]]:
         pass
 
@@ -129,20 +133,32 @@ def apply_formatting(
 class InputOutputTemplate(Template):
     """Generate field 'source' from fields designated as input, and fields 'target' and 'references' from fields designated as output, of the processed instance.
 
-    Args specify the formatting strings with which to glue together the input and output designated fields of the processed instance into one string ('source' and 'target'), and into a list of strings ('references').
+    Args specify the formatting strings with which to glue together the input and reference fields of the processed instance into one string ('source' and 'target'), and into a list of strings ('references').
     """
 
     input_format: str
     output_format: str = None
 
-    def inputs_to_source(self, inputs: Dict[str, object]) -> Tuple[str, str]:
+    def input_fields_to_source(
+        self, input_fields: Dict[str, object]
+    ) -> Tuple[str, str]:
         return self.apply_formatting(
-            inputs, "input", self.input_format, "input_format", serialize=True
+            input_fields,
+            "input field",
+            self.input_format,
+            "input_format",
+            serialize=True,
         )
 
-    def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str:
+    def reference_fields_to_target_and_references(
+        self, reference_fields: Dict[str, object]
+    ) -> str:
         target = self.apply_formatting(
-            outputs, "output", self.output_format, "output_format", serialize=True
+            reference_fields,
+            "reference field",
+            self.output_format,
+            "output_format",
+            serialize=True,
         )
         references = [target]
         return target, references
@@ -151,12 +167,22 @@ def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str:
 class InputOutputTemplateWithCustomTarget(InputOutputTemplate):
     reference: str
 
-    def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str:
+    def reference_fields_to_target_and_references(
+        self, reference_fields: Dict[str, object]
+    ) -> str:
         target = self.apply_formatting(
-            outputs, "output", self.output_format, "output_format", serialize=True
+            reference_fields,
+            "reference field",
+            self.output_format,
+            "output_format",
+            serialize=True,
         )
         reference = self.apply_formatting(
-            outputs, "output", self.reference, "reference", serialize=True
+            reference_fields,
+            "reference field",
+            self.reference,
+            "reference",
+            serialize=True,
         )
         return target, [reference]
 
@@ -193,46 +219,50 @@ class PairwiseChoiceTemplate(InputOutputTemplate):
     choice_tie_label: str
     shuffle: bool
 
-    def verbalize_answer_field(self, outputs: Dict[str, object]):
-        answer = outputs[self.answer_field]
+    def verbalize_answer_field(self, reference_fields: Dict[str, object]):
+        answer = reference_fields[self.answer_field]
         assert answer in ["choice_a", "choice_b", "tie"]
         if answer == "choice_a":
-            outputs[self.answer_field] = self.choice_a_label
+            reference_fields[self.answer_field] = self.choice_a_label
         elif answer == "choice_b":
-            outputs[self.answer_field] = self.choice_b_label
+            reference_fields[self.answer_field] = self.choice_b_label
         else:
-            outputs[self.answer_field] = self.choice_tie_label
+            reference_fields[self.answer_field] = self.choice_tie_label
 
-        return outputs
+        return reference_fields
 
-    def shuffle_values(self, inputs: Dict[str, object], outputs: Dict[str, object]):
+    def shuffle_values(
+        self, input_fields: Dict[str, object], reference_fields: Dict[str, object]
+    ):
         outcome = random()  # A float between 0 and 1
         if outcome <= 0.5:
-            choice_a_value = inputs[self.choice_a_field]
-            choice_b_value = inputs[self.choice_b_field]
+            choice_a_value = input_fields[self.choice_a_field]
+            choice_b_value = input_fields[self.choice_b_field]
 
-            inputs[self.choice_a_field] = choice_a_value
-            inputs[self.choice_b_field] = choice_b_value
+            input_fields[self.choice_a_field] = choice_a_value
+            input_fields[self.choice_b_field] = choice_b_value
 
-            answer = outputs[self.answer_field]
+            answer = reference_fields[self.answer_field]
             assert answer in [
                 self.choice_a_label,
                 self.choice_b_label,
                 self.choice_tie_label,
             ]
             if answer == self.choice_a_label:
-                outputs[self.answer_field] = self.choice_b_label
+                reference_fields[self.answer_field] = self.choice_b_label
             elif answer == self.choice_b_label:
-                outputs[self.answer_field] = self.choice_a_label
+                reference_fields[self.answer_field] = self.choice_a_label
 
-        return inputs, outputs
+        return input_fields, reference_fields
 
-    def preprocess_inputs_and_outputs(
-        self, inputs: Dict[str, Any], outputs: Dict[str, Any]
+    def preprocess_input_and_reference_fields(
+        self, input_fields: Dict[str, Any], reference_fields: Dict[str, Any]
     ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
-        outputs = self.verbalize_answer_field(outputs)
-        inputs, outputs = self.shuffle_values(inputs, outputs)
-        return inputs, outputs
+        reference_fields = self.verbalize_answer_field(reference_fields)
+        input_fields, reference_fields = self.shuffle_values(
+            input_fields, reference_fields
+        )
+        return input_fields, reference_fields
 
 
 class DialogFieldsData(Artifact):
@@ -247,9 +277,9 @@ class DialogTemplate(InputOutputTemplate):
     turns_separator: str = "\n\n"
     label_separator: str = " "
 
-    def process_dialog(self, inputs: Dict[str, object]):
+    def process_dialog(self, input_fields: Dict[str, object]):
         for dialog_fields in self.dialog_fields:
-            dialog = inputs[dialog_fields.dialog_field]
+            dialog = input_fields[dialog_fields.dialog_field]
             # TODO: update isoftype method to support Literal verification and check
             #  it's List[Tuple[Literal["user", "assistant", "system"], str]] (Issue #799)
             assert isoftype(dialog, List[Tuple[str, str]])
@@ -269,24 +299,24 @@ def process_dialog(self, inputs: Dict[str, object]):
                 elif turn_type == "system":
                     dialog_str += f"{turns_separator}{system_role_label}{self.label_separator}{turn_text}"
 
-            inputs[dialog_fields.dialog_field] = dialog_str
-        return inputs
+            input_fields[dialog_fields.dialog_field] = dialog_str
+        return input_fields
 
-    def preprocess_inputs_and_outputs(
-        self, inputs: Dict[str, Any], outputs: Dict[str, Any]
+    def preprocess_input_and_reference_fields(
+        self, input_fields: Dict[str, Any], reference_fields: Dict[str, Any]
     ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
-        return self.process_dialog(inputs), outputs
+        return self.process_dialog(input_fields), reference_fields
 
 
 class DialogPairwiseChoiceTemplate(DialogTemplate, PairwiseChoiceTemplate):
-    def preprocess_inputs_and_outputs(
-        self, inputs: Dict[str, Any], outputs: Dict[str, Any]
+    def preprocess_input_and_reference_fields(
+        self, input_fields: Dict[str, Any], reference_fields: Dict[str, Any]
     ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
-        inputs, outputs = DialogTemplate.preprocess_inputs_and_outputs(
-            self, inputs, outputs
+        inputs, reference_fields = DialogTemplate.preprocess_input_and_reference_fields(
+            self, input_fields, reference_fields
         )
-        return PairwiseChoiceTemplate.preprocess_inputs_and_outputs(
-            self, inputs, outputs
+        return PairwiseChoiceTemplate.preprocess_input_and_reference_fields(
+            self, input_fields, reference_fields
         )
 
 
@@ -347,53 +377,61 @@ def inputs_to_choices(self, data: Dict[str, object], choice_format: str) -> str:
             )
         return enumrated_choices
 
-    def inputs_to_numerals(self, inputs: Dict[str, object]) -> Tuple[str, str]:
-        return self.inputs_to_choices(inputs, "{choice_numeral}")
+    def inputs_to_numerals(self, input_fields: Dict[str, object]) -> Tuple[str, str]:
+        return self.inputs_to_choices(input_fields, "{choice_numeral}")
 
     def prepare_multiple_choice_inputs(
-        self, inputs: Dict[str, object]
+        self, input_fields: Dict[str, object]
     ) -> Dict[str, object]:
-        choices = self.inputs_to_choices(inputs, self.source_choice_format)
+        choices = self.inputs_to_choices(input_fields, self.source_choice_format)
         return {
-            "numerals": self.inputs_to_numerals(inputs),
-            **inputs,
+            "numerals": self.inputs_to_numerals(input_fields),
+            **input_fields,
             self.choices_field: self.choices_separator.join(choices),
         }
 
-    def inputs_to_source(self, inputs: Dict[str, object]) -> Tuple[str, str]:
-        inputs = self.prepare_multiple_choice_inputs(inputs)
+    def input_fields_to_source(
+        self, input_fields: Dict[str, object]
+    ) -> Tuple[str, str]:
+        input_fields = self.prepare_multiple_choice_inputs(input_fields)
         return self.apply_formatting(
-            inputs, "input", self.input_format, "input_format", serialize=True
+            input_fields,
+            "input field",
+            self.input_format,
+            "input_format",
+            serialize=True,
         )
 
-    def inputs_to_instruction_and_target_prefix(self, inputs):
-        inputs = self.prepare_multiple_choice_inputs(inputs)
-        return super().inputs_to_instruction_and_target_prefix(inputs)
+    def input_fields_to_instruction_and_target_prefix(self, input_fields):
+        input_fields = self.prepare_multiple_choice_inputs(input_fields)
+        return super().input_fields_to_instruction_and_target_prefix(input_fields)
 
-    def outputs_to_target_index(self, outputs: Dict[str, object]) -> str:
-        target = outputs[self.target_field]
+    def outputs_to_target_index(self, reference_fields: Dict[str, object]) -> str:
+        target = reference_fields[self.target_field]
 
         if not isinstance(target, int):
             try:
-                return outputs[self.choices_field].index(target)
+                return reference_fields[self.choices_field].index(target)
             except ValueError as e:
                 raise ValueError(
-                    f"MultipleChoiceTemplate could not locate textual target '{target}' in choices list: {outputs[self.choices_field]}"
+                    f"MultipleChoiceTemplate could not locate textual target '{target}' in choices list: {reference_fields[self.choices_field]}"
                 ) from e
         return target
 
-    def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str:
-        target = outputs[self.target_field]
+    def reference_fields_to_target_and_references(
+        self, reference_fields: Dict[str, object]
+    ) -> str:
+        target = reference_fields[self.target_field]
 
         if not isinstance(target, int):
             try:
-                target = outputs[self.choices_field].index(target)
+                target = reference_fields[self.choices_field].index(target)
             except ValueError as e:
                 raise ValueError(
-                    f"MultipleChoiceTemplate could not locate textual target '{target}' in choices list: {outputs[self.choices_field]}"
+                    f"MultipleChoiceTemplate could not locate textual target '{target}' in choices list: {reference_fields[self.choices_field]}"
                 ) from e
 
-        choices = self.inputs_to_choices(outputs, self.target_choice_format)
+        choices = self.inputs_to_choices(reference_fields, self.target_choice_format)
 
         try:
             target = choices[target]
@@ -461,27 +499,35 @@ class YesNoTemplate(Template):
     yes_answer: str = "Yes"
     no_answer: str = "No"
 
-    def inputs_to_source(self, inputs: Dict[str, object]) -> Tuple[str, str]:
+    def input_fields_to_source(
+        self, input_fields: Dict[str, object]
+    ) -> Tuple[str, str]:
         return self.apply_formatting(
-            inputs, "input", self.input_format, "input_format", serialize=True
+            input_fields,
+            "input field",
+            self.input_format,
+            "input_format",
+            serialize=True,
         )
 
-    def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str:
+    def reference_fields_to_target_and_references(
+        self, reference_fields: Dict[str, object]
+    ) -> str:
         try:
-            gold_class_names = outputs[self.label_field]
+            gold_class_names = reference_fields[self.label_field]
         except KeyError as e:
             raise RuntimeError(
-                f"Available outputs are {list(outputs.keys())}, missing required label field: '{self.label_field}'."
+                f"Available reference_fields are {list(reference_fields.keys())}, missing required label field: '{self.label_field}'."
             ) from e
         if not isinstance(gold_class_names, list):
             raise RuntimeError(
                 f"Unexpected value for gold_class_names: '{gold_class_names}'. Expecting a list."
             )
         try:
-            queried_class_name = outputs[self.class_field]
+            queried_class_name = reference_fields[self.class_field]
         except KeyError as e:
             raise RuntimeError(
-                f"Available outputs are {list(outputs.keys())}, missing required class field: '{self.class_field}'."
+                f"Available reference_fields are {list(reference_fields.keys())}, missing required class field: '{self.class_field}'."
             ) from e
         if not queried_class_name or not isinstance(queried_class_name, str):
             raise RuntimeError(
@@ -514,17 +560,21 @@ def process_dict(
             pairs.append(key_val_sep.join(key_val))
         return pairs_sep.join(pairs)
 
-    def inputs_to_source(self, inputs: Dict[str, object]) -> Tuple[str, str]:
+    def input_fields_to_source(
+        self, input_fields: Dict[str, object]
+    ) -> Tuple[str, str]:
         return self.process_dict(
-            inputs,
+            input_fields,
             key_val_sep=self.key_val_separator,
             pairs_sep=self.pairs_separator,
             use_keys=self.use_keys_for_inputs,
         )
 
-    def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str:
+    def reference_fields_to_target_and_references(
+        self, reference_fields: Dict[str, object]
+    ) -> str:
         target = self.process_dict(
-            outputs,
+            reference_fields,
             key_val_sep=self.key_val_separator,
             pairs_sep=self.pairs_separator,
             use_keys=self.use_keys_for_outputs,
@@ -535,21 +585,23 @@ def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str:
 class OutputQuantizingTemplate(InputOutputTemplate):
     quantum: Union[float, int] = 0.1  # Now supports both int and float
 
-    def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str:
+    def reference_fields_to_target_and_references(
+        self, reference_fields: Dict[str, object]
+    ) -> str:
         if isinstance(self.quantum, int):
             # When quantum is an int, format quantized values as ints
             quantized_outputs = {
                 key: f"{int(round(value / self.quantum) * self.quantum)}"
-                for key, value in outputs.items()
+                for key, value in reference_fields.items()
             }
         else:
             # When quantum is a float, format quantized values with precision based on quantum
             quantum_str = f"{self.quantum:.10f}".rstrip("0").rstrip(".")
             quantized_outputs = {
                 key: f"{round(value / self.quantum) * self.quantum:{quantum_str}}"
-                for key, value in outputs.items()
+                for key, value in reference_fields.items()
             }
-        return super().outputs_to_target_and_references(quantized_outputs)
+        return super().reference_fields_to_target_and_references(quantized_outputs)
 
 
 class MultiLabelTemplate(InputOutputTemplate):
@@ -559,8 +611,10 @@ class MultiLabelTemplate(InputOutputTemplate):
     output_format: str = "{labels}"
     empty_label: str = "None"
 
-    def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str:
-        labels = outputs[self.labels_field]
+    def reference_fields_to_target_and_references(
+        self, reference_fields: Dict[str, object]
+    ) -> str:
+        labels = reference_fields[self.labels_field]
         if not isinstance(labels, list):
             raise ValueError(
                 f"MultiLabelTemplate requires labels field '{self.labels_field}' to be a list. Got {self.labels_field}<{type(labels).__name__}>: {labels}"
@@ -568,15 +622,19 @@ def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> str:
         if len(labels) == 0:
             labels = [self.empty_label]
         labels_str = self.labels_separator.join(labels)
-        return super().outputs_to_target_and_references({self.labels_field: labels_str})
+        return super().reference_fields_to_target_and_references(
+            {self.labels_field: labels_str}
+        )
 
 
 class MultiReferenceTemplate(InputOutputTemplate):
     references_field: str = "references"
     random_reference: bool = False
 
-    def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> List[str]:
-        references = outputs[self.references_field]
+    def reference_fields_to_target_and_references(
+        self, reference_fields: Dict[str, object]
+    ) -> List[str]:
+        references = reference_fields[self.references_field]
         if not isoftype(references, List[str]):
             raise ValueError(
                 f"MultiReferenceTemplate requires references field '{self.references_field}' to be List[str]. Got {self.references_field}<{type(references).__name__}>: {references}"
@@ -587,7 +645,7 @@ def outputs_to_target_and_references(self, outputs: Dict[str, object]) -> List[s
             )
 
         if self.random_reference:
-            random_generator = new_random_generator(outputs)
+            random_generator = new_random_generator(reference_fields)
             target = random_generator.choice(references)
         else:
             target = references[0]
@@ -607,11 +665,11 @@ class SpanLabelingBaseTemplate(MultiLabelTemplate):
     text_field: str = "text"
     labels_support: list = None
 
-    def extract_span_label_pairs(self, outputs):
-        spans_starts = outputs[self.spans_starts_field]
-        spans_ends = outputs[self.spans_ends_field]
-        text = outputs[self.text_field]
-        labels = outputs[self.labels_field]
+    def extract_span_label_pairs(self, reference_fields):
+        spans_starts = reference_fields[self.spans_starts_field]
+        spans_ends = reference_fields[self.spans_ends_field]
+        text = reference_fields[self.text_field]
+        labels = reference_fields[self.labels_field]
 
         spans = []
         for span_start, span_end, label in zip(spans_starts, spans_ends, labels):
@@ -622,12 +680,12 @@ def extract_span_label_pairs(self, outputs):
             if self.labels_support is None or span[3] in self.labels_support:
                 yield span[2], span[3]
 
-    def outputs_to_target_and_references(
-        self, outputs: Dict[str, object]
+    def reference_fields_to_target_and_references(
+        self, reference_fields: Dict[str, object]
     ) -> Dict[str, object]:
-        span_labels_pairs = self.extract_span_label_pairs(outputs)
+        span_labels_pairs = self.extract_span_label_pairs(reference_fields)
         targets = self.span_label_pairs_to_targets(span_labels_pairs)
-        return super().outputs_to_target_and_references({"labels": targets})
+        return super().reference_fields_to_target_and_references({"labels": targets})
 
     @abstractmethod
     def span_label_pairs_to_targets(self, pairs):
diff --git a/tests/library/test_format_and_template_interaction.py b/tests/library/test_format_and_template_interaction.py
index 634c8605c..29b0a9b1b 100644
--- a/tests/library/test_format_and_template_interaction.py
+++ b/tests/library/test_format_and_template_interaction.py
@@ -8,7 +8,10 @@
 
 class TestFormatAndTemplateInteraction(UnitxtTestCase):
     def test_interactions(self):
-        instance = {"inputs": {"question": "what?"}, "outputs": {"answer": "that!"}}
+        instance = {
+            "input_fields": {"question": "what?"},
+            "reference_fields": {"answer": "that!"},
+        }
         target = "that!"
 
         template_separated = InputOutputTemplate(
diff --git a/tests/library/test_formats.py b/tests/library/test_formats.py
index 8e339dd76..2a82018ff 100644
--- a/tests/library/test_formats.py
+++ b/tests/library/test_formats.py
@@ -11,8 +11,18 @@ def test_hf_system_format(self):
         instruction = "solve the math exercises"
 
         demo_instances = [
-            {"source": "1+2", "target": "3", "instruction": instruction, "inputs": {}},
-            {"source": "4-2", "target": "2", "instruction": instruction, "inputs": {}},
+            {
+                "source": "1+2",
+                "target": "3",
+                "instruction": instruction,
+                "input_fields": {},
+            },
+            {
+                "source": "4-2",
+                "target": "2",
+                "instruction": instruction,
+                "input_fields": {},
+            },
         ]
 
         inputs = [
@@ -21,7 +31,7 @@ def test_hf_system_format(self):
                 "target": "2",
                 "instruction": instruction,
                 "demos": demo_instances,
-                "inputs": {},
+                "input_fields": {},
                 "target_prefix": "The answer is ",
                 "system_prompt": "You are a smart assistant.",
             },
@@ -30,7 +40,7 @@ def test_hf_system_format(self):
                 "target": "5",
                 "instruction": instruction,
                 "demos": demo_instances,
-                "inputs": {},
+                "input_fields": {},
                 "target_prefix": "The answer is ",
                 "system_prompt": "You are a smart assistant.",
             },
@@ -42,12 +52,12 @@ def test_hf_system_format(self):
         targets = [
             {
                 "target": "2",
-                "inputs": {},
+                "input_fields": {},
                 "source": "<|system|>\nYou are a smart assistant.\nsolve the math exercises</s>\n<|user|>\n1+2</s>\n<|assistant|>\nThe answer is 3</s>\n<|user|>\n4-2</s>\n<|assistant|>\nThe answer is 2</s>\n<|user|>\n1+1</s>\n<|assistant|>\nThe answer is ",
             },
             {
                 "target": "5",
-                "inputs": {},
+                "input_fields": {},
                 "source": "<|system|>\nYou are a smart assistant.\nsolve the math exercises</s>\n<|user|>\n1+2</s>\n<|assistant|>\nThe answer is 3</s>\n<|user|>\n4-2</s>\n<|assistant|>\nThe answer is 2</s>\n<|user|>\n3+2</s>\n<|assistant|>\nThe answer is ",
             },
         ]
@@ -63,8 +73,18 @@ def test_system_format(self):
         instruction = "solve the math exercises"
 
         demo_instances = [
-            {"source": "1+2", "target": "3", "instruction": instruction, "inputs": {}},
-            {"source": "4-2", "target": "2", "instruction": instruction, "inputs": {}},
+            {
+                "source": "1+2",
+                "target": "3",
+                "instruction": instruction,
+                "input_fields": {},
+            },
+            {
+                "source": "4-2",
+                "target": "2",
+                "instruction": instruction,
+                "input_fields": {},
+            },
         ]
 
         inputs = [
@@ -73,28 +93,28 @@ def test_system_format(self):
                 "target": "2",
                 "instruction": instruction,
                 "demos": demo_instances,
-                "inputs": {},
+                "input_fields": {},
             },
             {
                 "source": "3+2",
                 "target": "5",
                 "instruction": instruction,
                 "demos": demo_instances,
-                "inputs": {},
+                "input_fields": {},
             },
             {
                 "source": "7-4",
                 "target": "3",
                 "instruction": instruction,
                 "demos": demo_instances,
-                "inputs": {},
+                "input_fields": {},
             },
             {
                 "source": "12-3",
                 "target": "9",
                 "instruction": instruction,
                 "demos": demo_instances,
-                "inputs": {},
+                "input_fields": {},
             },
         ]
 
@@ -108,22 +128,22 @@ def test_system_format(self):
         targets = [
             {
                 "target": "2",
-                "inputs": {},
+                "input_fields": {},
                 "source": "User: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: solve the math exercises\n\n1+1\nAgent: ",
             },
             {
                 "target": "5",
-                "inputs": {},
+                "input_fields": {},
                 "source": "User: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: solve the math exercises\n\n3+2\nAgent: ",
             },
             {
                 "target": "3",
-                "inputs": {},
+                "input_fields": {},
                 "source": "User: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: solve the math exercises\n\n7-4\nAgent: ",
             },
             {
                 "target": "9",
-                "inputs": {},
+                "input_fields": {},
                 "source": "User: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: solve the math exercises\n\n12-3\nAgent: ",
             },
         ]
@@ -145,22 +165,22 @@ def test_system_format(self):
         targets = [
             {
                 "target": "2",
-                "inputs": {},
+                "input_fields": {},
                 "source": "Instruction: solve the math exercises\n\nUser: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: 1+1\nAgent: ",
             },
             {
                 "target": "5",
-                "inputs": {},
+                "input_fields": {},
                 "source": "Instruction: solve the math exercises\n\nUser: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: 3+2\nAgent: ",
             },
             {
                 "target": "3",
-                "inputs": {},
+                "input_fields": {},
                 "source": "Instruction: solve the math exercises\n\nUser: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: 7-4\nAgent: ",
             },
             {
                 "target": "9",
-                "inputs": {},
+                "input_fields": {},
                 "source": "Instruction: solve the math exercises\n\nUser: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: 12-3\nAgent: ",
             },
         ]
@@ -187,22 +207,22 @@ def test_system_format(self):
         targets_no_instruction = [
             {
                 "target": "2",
-                "inputs": {},
+                "input_fields": {},
                 "source": "User: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: 1+1\nAgent: ",
             },
             {
                 "target": "5",
-                "inputs": {},
+                "input_fields": {},
                 "source": "User: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: 3+2\nAgent: ",
             },
             {
                 "target": "3",
-                "inputs": {},
+                "input_fields": {},
                 "source": "User: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: 7-4\nAgent: ",
             },
             {
                 "target": "9",
-                "inputs": {},
+                "input_fields": {},
                 "source": "User: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: 12-3\nAgent: ",
             },
         ]
@@ -218,7 +238,7 @@ def test_system_format(self):
             "source": 'This is my sentence: "was so bad"',
             "target": "negative",
             "references": ["negative"],
-            "inputs": {},
+            "input_fields": {},
             "instruction": "classify user sentence by its sentiment to either positive, or negative.",
             "demos": [
                 {
@@ -247,7 +267,7 @@ def test_system_format(self):
             "source": 'Instruction:classify user sentence by its sentiment to either positive, or negative.\n\nUser:This is my sentence: "was so not good"\nAgent:negative\n\nUser:This is my sentence: "was so good"\nAgent:positive\n\nUser:This is my sentence: "was so bad"\nAgent:',
             "target": "negative",
             "references": ["negative"],
-            "inputs": {},
+            "input_fields": {},
         }
         self.assertDictEqual(result, target)
 
@@ -256,7 +276,7 @@ def test_system_format(self):
             "source": 'This is my sentence: "was so bad"',
             "target": "negative",
             "references": ["negative"],
-            "inputs": {},
+            "input_fields": {},
             "instruction": "classify user sentence by its sentiment to either positive, or negative.",
         }
         system_format = SystemFormat(
@@ -267,7 +287,7 @@ def test_system_format(self):
         target = {
             "source": 'Instruction:classify user sentence by its sentiment to either positive, or negative.\n\nUser:This is my sentence: "was so bad"\nAgent:',
             "target": "negative",
-            "inputs": {},
+            "input_fields": {},
             "references": ["negative"],
         }
         self.assertDictEqual(result, target)
@@ -284,7 +304,7 @@ def test_system_format(self):
             "source": 'This is my sentence: "was so bad"',
             "target": "negative",
             "references": ["negative"],
-            "inputs": {},
+            "input_fields": {},
             "instruction": "classify user sentence by its sentiment to either positive, or negative.",
             "demos": [
                 {
@@ -307,7 +327,7 @@ def test_system_format(self):
             "source": '[INST] <<SYS>>\nclassify user sentence by its sentiment to either positive, or negative.\n\nUser: This is my sentence: "was so not good"\nAgent: negative\n\nUser: This is my sentence: "was so good"\nAgent: positive\n\nUser: This is my sentence: "was so bad"\nAgent: [/INST]',
             "target": "negative",
             "references": ["negative"],
-            "inputs": {},
+            "input_fields": {},
         }
 
         self.assertDictEqual(result, target)
@@ -323,8 +343,18 @@ def test_system_format_with_args(self):
         instruction = "solve the math exercises"
 
         demo_instances = [
-            {"source": "1+2", "target": "3", "instruction": instruction, "inputs": {}},
-            {"source": "4-2", "target": "2", "instruction": instruction, "inputs": {}},
+            {
+                "source": "1+2",
+                "target": "3",
+                "instruction": instruction,
+                "input_fields": {},
+            },
+            {
+                "source": "4-2",
+                "target": "2",
+                "instruction": instruction,
+                "input_fields": {},
+            },
         ]
 
         inputs = [
@@ -333,50 +363,50 @@ def test_system_format_with_args(self):
                 "target": "2",
                 "instruction": instruction,
                 "demos": demo_instances,
-                "inputs": {},
+                "input_fields": {},
             },
             {
                 "source": "3+2",
                 "target": "5",
                 "instruction": instruction,
                 "demos": demo_instances,
-                "inputs": {},
+                "input_fields": {},
             },
             {
                 "source": "7-4",
                 "target": "3",
                 "instruction": instruction,
                 "demos": demo_instances,
-                "inputs": {},
+                "input_fields": {},
             },
             {
                 "source": "12-3",
                 "target": "9",
                 "instruction": instruction,
                 "demos": demo_instances,
-                "inputs": {},
+                "input_fields": {},
             },
         ]
 
         targets = [
             {
                 "target": "2",
-                "inputs": {},
+                "input_fields": {},
                 "source": "User: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: solve the math exercises\n\n1+1\nAgent: ",
             },
             {
                 "target": "5",
-                "inputs": {},
+                "input_fields": {},
                 "source": "User: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: solve the math exercises\n\n3+2\nAgent: ",
             },
             {
                 "target": "3",
-                "inputs": {},
+                "input_fields": {},
                 "source": "User: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: solve the math exercises\n\n7-4\nAgent: ",
             },
             {
                 "target": "9",
-                "inputs": {},
+                "input_fields": {},
                 "source": "User: 1+2\nAgent: 3\n\nUser: 4-2\nAgent: 2\n\nUser: solve the math exercises\n\n12-3\nAgent: ",
             },
         ]
diff --git a/tests/library/test_metrics.py b/tests/library/test_metrics.py
index f1a3f27d9..3a8378f44 100644
--- a/tests/library/test_metrics.py
+++ b/tests/library/test_metrics.py
@@ -52,7 +52,7 @@
     TokenOverlap,
     UnsortedListExactMatch,
 )
-from unitxt.test_utils.metrics import apply_metric
+from unitxt.test_utils.metrics import apply_metric, check_scores
 
 from tests.utils import UnitxtTestCase
 
@@ -1187,8 +1187,8 @@ def test_perplexity_with_prefix(self):
         )
 
         expected_global_result = {
-            "my_perplexity": 0.05986589565873146,
-            "score": 0.05986589565873146,
+            "my_perplexity": 0.06,
+            "score": 0.06,
             "score_name": "my_perplexity",
         }
 
@@ -1199,18 +1199,21 @@ def test_perplexity_with_prefix(self):
             for key, value in global_result.items()
             if key in expected_global_result
         }
-        self.assertDictEqual(global_result, expected_global_result)
 
-        instance_targets = [
+        expected_instance_results = [
             {
-                "my_perplexity": 0.05986589565873146,
-                "score": 0.05986589565873146,
+                "my_perplexity": 0.06,
+                "score": 0.06,
                 "score_name": "my_perplexity",
-                "my_reference_scores": [0.05986589565873146],
+                "my_reference_scores": [0.06],
             }
         ]
-        for output, target in zip(outputs, instance_targets):
-            self.assertDictEqual(output["score"]["instance"], target)
+        check_scores(
+            expected_global_result,
+            expected_instance_results,
+            global_outputs=outputs[0]["score"]["global"],
+            instance_outputs=[outputs[0]["score"]["instance"]],
+        )
 
 
 class TestConfidenceIntervals(UnitxtTestCase):
diff --git a/tests/library/test_operators.py b/tests/library/test_operators.py
index 6651cfa18..bcc4ddfb6 100644
--- a/tests/library/test_operators.py
+++ b/tests/library/test_operators.py
@@ -2839,10 +2839,13 @@ def test_render_demonstrations(self):
         instance = {
             "demos": [
                 {
-                    "inputs": {"text": "was so not good"},
-                    "outputs": {"label": "negative"},
+                    "input_fields": {"text": "was so not good"},
+                    "reference_fields": {"label": "negative"},
+                },
+                {
+                    "input_fields": {"text": "was so good"},
+                    "reference_fields": {"label": "positive"},
                 },
-                {"inputs": {"text": "was so good"}, "outputs": {"label": "positive"}},
             ]
         }
 
@@ -2852,8 +2855,8 @@ def test_render_demonstrations(self):
         target = {
             "demos": [
                 {
-                    "inputs": {"text": "was so not good"},
-                    "outputs": {"label": "negative"},
+                    "input_fields": {"text": "was so not good"},
+                    "reference_fields": {"label": "negative"},
                     "source": 'This is my sentence: "was so not good"',
                     "target": "negative",
                     "references": ["negative"],
@@ -2861,8 +2864,8 @@ def test_render_demonstrations(self):
                     "target_prefix": "",
                 },
                 {
-                    "inputs": {"text": "was so good"},
-                    "outputs": {"label": "positive"},
+                    "input_fields": {"text": "was so good"},
+                    "reference_fields": {"label": "positive"},
                     "source": 'This is my sentence: "was so good"',
                     "target": "positive",
                     "references": ["positive"],
@@ -2882,12 +2885,12 @@ def test_render_demonstrations_multi_reference(self):
         instance = {
             "demos": [
                 {
-                    "inputs": {"text": "who was he?"},
-                    "outputs": {"answer": ["Dan", "Yossi"]},
+                    "input_fields": {"text": "who was he?"},
+                    "reference_fields": {"answer": ["Dan", "Yossi"]},
                 },
                 {
-                    "inputs": {"text": "who was she?"},
-                    "outputs": {"answer": ["Shira", "Yael"]},
+                    "input_fields": {"text": "who was she?"},
+                    "reference_fields": {"answer": ["Shira", "Yael"]},
                 },
             ]
         }
@@ -2898,8 +2901,8 @@ def test_render_demonstrations_multi_reference(self):
         target = {
             "demos": [
                 {
-                    "inputs": {"text": "who was he?"},
-                    "outputs": {"answer": ["Dan", "Yossi"]},
+                    "input_fields": {"text": "who was he?"},
+                    "reference_fields": {"answer": ["Dan", "Yossi"]},
                     "source": "This is my sentence: who was he?",
                     "target": "Dan",
                     "references": ["Dan", "Yossi"],
@@ -2907,8 +2910,8 @@ def test_render_demonstrations_multi_reference(self):
                     "target_prefix": "",
                 },
                 {
-                    "inputs": {"text": "who was she?"},
-                    "outputs": {"answer": ["Shira", "Yael"]},
+                    "input_fields": {"text": "who was she?"},
+                    "reference_fields": {"answer": ["Shira", "Yael"]},
                     "source": "This is my sentence: who was she?",
                     "target": "Shira",
                     "references": ["Shira", "Yael"],
@@ -2925,7 +2928,7 @@ def test_icl_format_with_demonstrations(self):
             "source": "1+1",
             "target": "2",
             "instruction": "solve the math exercises",
-            "inputs": {},
+            "input_fields": {},
         }
         demos_instances = [
             {"source": "1+2", "target": "3", "instruction": "solve the math exercises"},
@@ -2964,7 +2967,7 @@ def test_system_format_with_demonstrations_and_instruction_after_demos(
         instance = {
             "source": "1+1",
             "target": "2",
-            "inputs": {},
+            "input_fields": {},
             "instruction": "solve the math exercises",
             "demos": demo_instances,
         }
@@ -2993,7 +2996,7 @@ def test_system_format_without_demonstrations(self):
             "source": "1+1",
             "target": "2",
             "instruction": "solve the math exercises",
-            "inputs": {},
+            "input_fields": {},
         }
 
         target = """Instruction:solve the math exercises
@@ -3011,7 +3014,7 @@ def test_system_format_without_demonstrations(self):
         self.assertEqual(instance["source"], target)
 
     def test_model_input_formatter_without_demonstrations_or_instruction(self):
-        instance = {"source": "1+1", "target": "2", "inputs": {}}
+        instance = {"source": "1+1", "target": "2", "input_fields": {}}
         target = """User:1+1
 Agent:"""
 
@@ -3024,7 +3027,12 @@ def test_model_input_formatter_without_demonstrations_or_instruction(self):
         self.assertEqual(instance_out["source"], target)
 
     def test_system_format_without_demonstrations_and_empty_instruction(self):
-        instance = {"source": "1+1", "target": "2", "instruction": "", "inputs": {}}
+        instance = {
+            "source": "1+1",
+            "target": "2",
+            "instruction": "",
+            "input_fields": {},
+        }
 
         target = """User:1+1
 Agent:"""
diff --git a/tests/library/test_templates.py b/tests/library/test_templates.py
index d3fcb6a25..9179d3870 100644
--- a/tests/library/test_templates.py
+++ b/tests/library/test_templates.py
@@ -27,8 +27,10 @@ def test_span_labeling_template_escaping(self):
 
         inputs = [
             {
-                "inputs": {"text": "John,: Doe is from New York and works at Goo:gle."},
-                "outputs": {
+                "input_fields": {
+                    "text": "John,: Doe is from New York and works at Goo:gle."
+                },
+                "reference_fields": {
                     "spans_starts": [0, 19, 41],
                     "spans_ends": [10, 27, 48],
                     "labels": ["PER", "LOC", "ORG"],
@@ -36,10 +38,10 @@ def test_span_labeling_template_escaping(self):
                 },
             },
             {
-                "inputs": {
+                "input_fields": {
                     "text": "John,: Doe is from New York and works at Goo:gle.",
                 },
-                "outputs": {
+                "reference_fields": {
                     "spans_starts": [],
                     "spans_ends": [],
                     "labels": [],
@@ -50,8 +52,10 @@ def test_span_labeling_template_escaping(self):
 
         targets = [
             {
-                "inputs": {"text": "John,: Doe is from New York and works at Goo:gle."},
-                "outputs": {
+                "input_fields": {
+                    "text": "John,: Doe is from New York and works at Goo:gle."
+                },
+                "reference_fields": {
                     "spans_starts": [0, 19, 41],
                     "spans_ends": [10, 27, 48],
                     "labels": ["PER", "LOC", "ORG"],
@@ -64,10 +68,10 @@ def test_span_labeling_template_escaping(self):
                 "target_prefix": "",
             },
             {
-                "inputs": {
+                "input_fields": {
                     "text": "John,: Doe is from New York and works at Goo:gle.",
                 },
-                "outputs": {
+                "reference_fields": {
                     "spans_starts": [],
                     "spans_ends": [],
                     "labels": [],
@@ -88,19 +92,19 @@ def test_multi_label_template(self):
 
         inputs = [
             {
-                "inputs": {"text": "hello world"},
-                "outputs": {"labels": ["cat", "dog"]},
+                "input_fields": {"text": "hello world"},
+                "reference_fields": {"labels": ["cat", "dog"]},
             },
             {
-                "inputs": {"text": "hello world"},
-                "outputs": {"labels": ["man", "woman", "dog"]},
+                "input_fields": {"text": "hello world"},
+                "reference_fields": {"labels": ["man", "woman", "dog"]},
             },
         ]
 
         targets = [
             {
-                "inputs": {"text": "hello world"},
-                "outputs": {"labels": ["cat", "dog"]},
+                "input_fields": {"text": "hello world"},
+                "reference_fields": {"labels": ["cat", "dog"]},
                 "source": "hello world",
                 "target": "cat, dog",
                 "references": ["cat, dog"],
@@ -108,8 +112,8 @@ def test_multi_label_template(self):
                 "target_prefix": "",
             },
             {
-                "inputs": {"text": "hello world"},
-                "outputs": {"labels": ["man", "woman", "dog"]},
+                "input_fields": {"text": "hello world"},
+                "reference_fields": {"labels": ["man", "woman", "dog"]},
                 "source": "hello world",
                 "target": "man, woman, dog",
                 "references": ["man, woman, dog"],
@@ -129,15 +133,15 @@ def _test_multi_reference_template(self, target, random_reference):
 
         inputs = [
             {
-                "inputs": {"text": "who was he?"},
-                "outputs": {"answer": ["Dan", "Yossi"]},
+                "input_fields": {"text": "who was he?"},
+                "reference_fields": {"answer": ["Dan", "Yossi"]},
             }
         ]
 
         targets = [
             {
-                "inputs": {"text": "who was he?"},
-                "outputs": {"answer": ["Dan", "Yossi"]},
+                "input_fields": {"text": "who was he?"},
+                "reference_fields": {"answer": ["Dan", "Yossi"]},
                 "source": "This is my sentence: who was he?",
                 "target": target,
                 "references": ["Dan", "Yossi"],
@@ -161,8 +165,8 @@ def _test_multi_reference_template_with_exception(
             input_format="This is my sentence: {text}", references_field="answer"
         )
         instance = {
-            "inputs": {"text": "who was he?"},
-            "outputs": {"answer": references},
+            "input_fields": {"text": "who was he?"},
+            "reference_fields": {"answer": references},
         }
 
         with self.assertRaises(ValueError) as e:
@@ -191,29 +195,35 @@ def test_input_output_template_and_standard_template(self):
 
         inputs = [
             {
-                "inputs": {"labels": ["positive", "negative"], "text": "hello world"},
-                "outputs": {"label": "positive"},
+                "input_fields": {
+                    "labels": ["positive", "negative"],
+                    "text": "hello world",
+                },
+                "reference_fields": {"label": "positive"},
             },
             {
-                "inputs": {
+                "input_fields": {
                     "labels": ["positive", "negative"],
                     "text": ["hello world\n", "hell"],
                 },
-                "outputs": {"label": "positive"},
+                "reference_fields": {"label": "positive"},
             },
             {
-                "inputs": {
+                "input_fields": {
                     "labels": ["positive", "negative"],
                     "text": ["hello world\n", "hell"],
                 },
-                "outputs": {"label": ["positive", "1"]},
+                "reference_fields": {"label": ["positive", "1"]},
             },
         ]
 
         targets = [
             {
-                "inputs": {"labels": ["positive", "negative"], "text": "hello world"},
-                "outputs": {"label": "positive"},
+                "input_fields": {
+                    "labels": ["positive", "negative"],
+                    "text": "hello world",
+                },
+                "reference_fields": {"label": "positive"},
                 "source": "This is my text:'hello world'",
                 "target": "positive",
                 "references": ["positive"],
@@ -221,11 +231,11 @@ def test_input_output_template_and_standard_template(self):
                 "target_prefix": "Sentiment is: ",
             },
             {
-                "inputs": {
+                "input_fields": {
                     "labels": ["positive", "negative"],
                     "text": ["hello world\n", "hell"],
                 },
-                "outputs": {"label": "positive"},
+                "reference_fields": {"label": "positive"},
                 "source": "This is my text:'hello world\n, hell'",
                 "target": "positive",
                 "references": ["positive"],
@@ -233,11 +243,11 @@ def test_input_output_template_and_standard_template(self):
                 "target_prefix": "Sentiment is: ",
             },
             {
-                "inputs": {
+                "input_fields": {
                     "labels": ["positive", "negative"],
                     "text": ["hello world\n", "hell"],
                 },
-                "outputs": {"label": ["positive", "1"]},
+                "reference_fields": {"label": ["positive", "1"]},
                 "source": "This is my text:'hello world\n, hell'",
                 "target": "positive, 1",
                 "references": ["positive, 1"],
@@ -261,7 +271,7 @@ def test_input_output_template_and_standard_template(self):
         with self.assertRaises(TemplateFormatKeyError) as ke:
             err_input_template.process(inputs[0])
         self.assertEqual(
-            "\"Available inputs are [labels, text] but InputOutputTemplate.input_format format requires a different ones: 'This is my text:'{no_text}''\"",
+            "\"Available input fields are [labels, text] but InputOutputTemplate.input_format format requires a different ones: 'This is my text:'{no_text}''\"",
             str(ke.exception),
         )
 
@@ -271,7 +281,7 @@ def test_input_output_template_and_standard_template(self):
         with self.assertRaises(TemplateFormatKeyError) as ke:
             err_output_template.process(inputs[0])
         self.assertEqual(
-            "\"Available outputs are [label] but InputOutputTemplate.output_format format requires a different ones: '{no_label}'\"",
+            "\"Available reference fields are [label] but InputOutputTemplate.output_format format requires a different ones: '{no_label}'\"",
             str(ke.exception),
         )
 
@@ -286,15 +296,21 @@ def test_input_output_reference_template_and_standard_template(self):
 
         inputs = [
             {
-                "inputs": {"labels": ["positive", "negative"], "text": "hello world"},
-                "outputs": {"label": "positive", "reference": "1"},
+                "input_fields": {
+                    "labels": ["positive", "negative"],
+                    "text": "hello world",
+                },
+                "reference_fields": {"label": "positive", "reference": "1"},
             },
         ]
 
         targets = [
             {
-                "inputs": {"labels": ["positive", "negative"], "text": "hello world"},
-                "outputs": {"label": "positive", "reference": "1"},
+                "input_fields": {
+                    "labels": ["positive", "negative"],
+                    "text": "hello world",
+                },
+                "reference_fields": {"label": "positive", "reference": "1"},
                 "source": "This is my text:'hello world'",
                 "target": "positive",
                 "references": ["1"],
@@ -306,23 +322,25 @@ def test_input_output_reference_template_and_standard_template(self):
         check_operator(template, inputs, targets, tester=self)
 
         with self.assertRaises(KeyError):
-            template.outputs_to_target_and_references(
-                outputs={"label": "positive", "references": "1"}
+            template.reference_fields_to_target_and_references(
+                reference_fields={"label": "positive", "references": "1"}
             )
 
         class ToCoverTemplate(Template):
-            def inputs_to_source(self, inputs: Dict[str, object]) -> Tuple[str, str]:
-                ret = super().inputs_to_source(inputs)
+            def input_fields_to_source(
+                self, inputs: Dict[str, object]
+            ) -> Tuple[str, str]:
+                ret = super().input_fields_to_source(inputs)
                 return (ret, ret)
 
-            def outputs_to_target_and_references(
+            def reference_fields_to_target_and_references(
                 self, outputs: Dict[str, object]
             ) -> Tuple[str, List[str]]:
-                return super().outputs_to_target_and_references(outputs)
+                return super().reference_fields_to_target_and_references(outputs)
 
         to_cover_template = ToCoverTemplate()
-        to_cover_template.inputs_to_source({"a": 1})
-        to_cover_template.outputs_to_target_and_references({"a": 1})
+        to_cover_template.input_fields_to_source({"a": 1})
+        to_cover_template.reference_fields_to_target_and_references({"a": 1})
 
         class ToCoverTemplatesDict(TemplatesDict):
             def verify(self):
@@ -344,7 +362,7 @@ def test_yes_no_template_process_input(self):
             "Is text_b of news?": {"text": "text_b", "class": "news"},
         }
         for expected_processed_input, inputs in processed_input_to_inputs.items():
-            processed = template.inputs_to_source(inputs)
+            processed = template.input_fields_to_source(inputs)
             self.assertEqual(expected_processed_input, processed)
 
     def test_yes_no_template_process_input_missing_input_field(self):
@@ -355,9 +373,9 @@ def test_yes_no_template_process_input_missing_input_field(self):
         )
         with self.assertRaises(TemplateFormatKeyError) as cm:
             wrong_field_name = "wrong_field_name"
-            template.inputs_to_source(inputs={wrong_field_name: ["news"]})
+            template.input_fields_to_source(input_fields={wrong_field_name: ["news"]})
         self.assertEqual(
-            "\"Available inputs are [wrong_field_name] but YesNoTemplate.input_format format requires a different ones: 'Expecting field {class} in input.'\"",
+            "\"Available input fields are [wrong_field_name] but YesNoTemplate.input_format format requires a different ones: 'Expecting field {class} in input.'\"",
             str(cm.exception),
         )
 
@@ -380,7 +398,9 @@ def test_yes_no_template_process_output(self):
             yes_answer: {label_field: ["news", "sports"], class_field: "news"},
         }
         for expected_processed_output, outputs in processed_output_to_outputs.items():
-            processed, references = template.outputs_to_target_and_references(outputs)
+            processed, references = template.reference_fields_to_target_and_references(
+                outputs
+            )
             self.assertEqual(expected_processed_output, processed)
             self.assertEqual(references, [expected_processed_output])
 
@@ -397,17 +417,17 @@ def test_yes_no_template_process_output_missing_fields(self):
 
         with self.assertRaises(RuntimeError) as cm:
             outputs = {class_field: "news"}
-            template.outputs_to_target_and_references(outputs=outputs)
+            template.reference_fields_to_target_and_references(reference_fields=outputs)
         self.assertEqual(
-            f"Available outputs are {list(outputs.keys())}, missing required label field: '{label_field}'.",
+            f"Available reference_fields are {list(outputs.keys())}, missing required label field: '{label_field}'.",
             str(cm.exception),
         )
 
         with self.assertRaises(RuntimeError) as cm:
             outputs = {label_field: ["news", "sports"]}
-            template.outputs_to_target_and_references(outputs=outputs)
+            template.reference_fields_to_target_and_references(reference_fields=outputs)
         self.assertEqual(
-            f"Available outputs are {list(outputs.keys())}, missing required class field: '{class_field}'.",
+            f"Available reference_fields are {list(outputs.keys())}, missing required class field: '{class_field}'.",
             str(cm.exception),
         )
 
@@ -419,8 +439,8 @@ def _test_with_wrong_labels_value(wrong_labels_value):
                 input_format="", class_field="", label_field="labels"
             )
             with self.assertRaises(RuntimeError) as cm:
-                template.outputs_to_target_and_references(
-                    outputs={"labels": wrong_labels_value}
+                template.reference_fields_to_target_and_references(
+                    reference_fields={"labels": wrong_labels_value}
                 )
             self.assertEqual(
                 f"Unexpected value for gold_class_names: '{wrong_labels_value}'. Expecting a list.",
@@ -439,8 +459,8 @@ def _test_with_wrong_class_value(wrong_class_value):
                 input_format="", class_field=class_field, label_field=label_field
             )
             with self.assertRaises(RuntimeError) as cm:
-                template.outputs_to_target_and_references(
-                    outputs={
+                template.reference_fields_to_target_and_references(
+                    reference_fields={
                         label_field: ["news"],
                         class_field: wrong_class_value,
                     }
@@ -462,8 +482,10 @@ def test_span_labeling_template_one_entity_escaping(self):
 
         inputs = [
             {
-                "inputs": {"text": "John,: Doe is from New York and works at Goo:gle."},
-                "outputs": {
+                "input_fields": {
+                    "text": "John,: Doe is from New York and works at Goo:gle."
+                },
+                "reference_fields": {
                     "spans_starts": [0, 19, 41],
                     "spans_ends": [10, 27, 48],
                     "labels": ["PER", "PER", "ORG"],
@@ -471,10 +493,10 @@ def test_span_labeling_template_one_entity_escaping(self):
                 },
             },
             {
-                "inputs": {
+                "input_fields": {
                     "text": "John,: Doe is from New York and works at Goo:gle.",
                 },
-                "outputs": {
+                "reference_fields": {
                     "spans_starts": [],
                     "spans_ends": [],
                     "labels": [],
@@ -485,8 +507,10 @@ def test_span_labeling_template_one_entity_escaping(self):
 
         targets = [
             {
-                "inputs": {"text": "John,: Doe is from New York and works at Goo:gle."},
-                "outputs": {
+                "input_fields": {
+                    "text": "John,: Doe is from New York and works at Goo:gle."
+                },
+                "reference_fields": {
                     "spans_starts": [0, 19, 41],
                     "spans_ends": [10, 27, 48],
                     "labels": ["PER", "PER", "ORG"],
@@ -499,10 +523,10 @@ def test_span_labeling_template_one_entity_escaping(self):
                 "target_prefix": "",
             },
             {
-                "inputs": {
+                "input_fields": {
                     "text": "John,: Doe is from New York and works at Goo:gle.",
                 },
-                "outputs": {
+                "reference_fields": {
                     "spans_starts": [],
                     "spans_ends": [],
                     "labels": [],
@@ -523,8 +547,10 @@ def test_span_labeling_json_template(self):
 
         inputs = [
             {
-                "inputs": {"text": "John,: Doe is from New York and works at Goo:gle."},
-                "outputs": {
+                "input_fields": {
+                    "text": "John,: Doe is from New York and works at Goo:gle."
+                },
+                "reference_fields": {
                     "spans_starts": [0, 19, 41],
                     "spans_ends": [10, 27, 48],
                     "labels": ["PER", "PER", "ORG"],
@@ -532,10 +558,10 @@ def test_span_labeling_json_template(self):
                 },
             },
             {
-                "inputs": {
+                "input_fields": {
                     "text": "John,: Doe is from New York and works at Goo:gle.",
                 },
-                "outputs": {
+                "reference_fields": {
                     "spans_starts": [],
                     "spans_ends": [],
                     "labels": [],
@@ -546,8 +572,10 @@ def test_span_labeling_json_template(self):
 
         targets = [
             {
-                "inputs": {"text": "John,: Doe is from New York and works at Goo:gle."},
-                "outputs": {
+                "input_fields": {
+                    "text": "John,: Doe is from New York and works at Goo:gle."
+                },
+                "reference_fields": {
                     "spans_starts": [0, 19, 41],
                     "spans_ends": [10, 27, 48],
                     "labels": ["PER", "PER", "ORG"],
@@ -562,10 +590,10 @@ def test_span_labeling_json_template(self):
                 "target_prefix": "",
             },
             {
-                "inputs": {
+                "input_fields": {
                     "text": "John,: Doe is from New York and works at Goo:gle.",
                 },
-                "outputs": {
+                "reference_fields": {
                     "spans_starts": [],
                     "spans_ends": [],
                     "labels": [],
@@ -662,7 +690,7 @@ def test_multiple_choice_template(self):
         with self.assertRaises(ValueError) as ve:
             check_operator(template, inputs, targets, tester=self)
         self.assertEqual(
-            "Error processing instance '0' from stream 'test' in MultipleChoiceTemplate due to: \"Available inputs are [numerals, choices, text] but MultipleChoiceTemplate.input_format format requires a different ones: 'Text: {no_text}, Choices: {no_choices}.'\"",
+            "Error processing instance '0' from stream 'test' in MultipleChoiceTemplate due to: \"Available input fields are [numerals, choices, text] but MultipleChoiceTemplate.input_format format requires a different ones: 'Text: {no_text}, Choices: {no_choices}.'\"",
             str(ve.exception),
         )
 
@@ -751,7 +779,7 @@ def test_multiple_choice_template_with_shuffle(self):
         with self.assertRaises(ValueError) as ve:
             check_operator(template, inputs, targets, tester=self)
         self.assertEqual(
-            "Error processing instance '0' from stream 'test' in MultipleChoiceTemplate due to: \"Available inputs are [numerals, choices, text] but MultipleChoiceTemplate.input_format format requires a different ones: 'Text: {no_text}, Choices: {no_choices}.'\"",
+            "Error processing instance '0' from stream 'test' in MultipleChoiceTemplate due to: \"Available input fields are [numerals, choices, text] but MultipleChoiceTemplate.input_format format requires a different ones: 'Text: {no_text}, Choices: {no_choices}.'\"",
             str(ve.exception),
         )
 
@@ -780,15 +808,18 @@ def test_key_val_template_int_list(self):
         self.assertEqual(result, target)
 
     def test_render_template(self):
-        instance = {"inputs": {"text": "was so bad"}, "outputs": {"label": "negative"}}
+        instance = {
+            "input_fields": {"text": "was so bad"},
+            "reference_fields": {"label": "negative"},
+        }
         template = InputOutputTemplate(
             input_format='This is my sentence: "{text}"', output_format="{label}"
         )
 
         result = template.process(instance)
         target = {
-            "inputs": {"text": "was so bad"},
-            "outputs": {"label": "negative"},
+            "input_fields": {"text": "was so bad"},
+            "reference_fields": {"label": "negative"},
             "source": 'This is my sentence: "was so bad"',
             "target": "negative",
             "references": ["negative"],
@@ -802,14 +833,14 @@ def test_render_multi_reference_template(self):
             input_format="This is my sentence: {text}", references_field="answer"
         )
         instance = {
-            "inputs": {"text": "who was he?"},
-            "outputs": {"answer": ["Dan", "Yossi"]},
+            "input_fields": {"text": "who was he?"},
+            "reference_fields": {"answer": ["Dan", "Yossi"]},
         }
 
         result = template.process(instance)
         target = {
-            "inputs": {"text": "who was he?"},
-            "outputs": {"answer": ["Dan", "Yossi"]},
+            "input_fields": {"text": "who was he?"},
+            "reference_fields": {"answer": ["Dan", "Yossi"]},
             "source": "This is my sentence: who was he?",
             "target": "Dan",
             "references": ["Dan", "Yossi"],