diff --git a/src/evaluate/utils/gradio.py b/src/evaluate/utils/gradio.py index 02d7e83e3..3b73d9c67 100644 --- a/src/evaluate/utils/gradio.py +++ b/src/evaluate/utils/gradio.py @@ -118,7 +118,10 @@ def compute(data): datatype=json_to_string_type(gradio_input_types), ), outputs=gr.outputs.Textbox(label=metric.name), - description=metric.info.description, + description=( + metric.info.description + "\nIf this is a text-based metric, make sure to wrap you input in double quotes." + " Alternatively you can use a JSON-formatted list as input." + ), title=f"Metric: {metric.name}", article=parse_readme(local_path / "README.md"), # TODO: load test cases and use them to populate examples