From 9699777705706e48c164335c2d1c32ecbbb5cea0 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Mon, 15 Jan 2024 14:43:23 -0500 Subject: [PATCH 1/3] Add support for ultrachat200k --- .../transformers/eval_downstream.py | 34 ++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/src/deepsparse/transformers/eval_downstream.py b/src/deepsparse/transformers/eval_downstream.py index b4712153c4..b3b530fe52 100644 --- a/src/deepsparse/transformers/eval_downstream.py +++ b/src/deepsparse/transformers/eval_downstream.py @@ -79,7 +79,20 @@ _LOGGER = logging.getLogger(__name__) -PPL_DATASETS = ["wikitext2", "c4", "openai_humaneval"] +DEFAULT_ULTRACHAT200K_TEMPLATE = ( + "{% for message in messages %}\n" + "{% if message['role'] == 'user' %}\n" + "{{ '<|user|>\n' + message['content'] + eos_token }}\n" + "{% elif message['role'] == 'system' %}\n" + "{{ '<|system|>\n' + message['content'] + eos_token }}\n" + "{% elif message['role'] == 'assistant' %}\n" + "{{ '<|assistant|>\n' + message['content'] + eos_token }}\n" + "{% endif %}\n" + "{% if loop.last and add_generation_prompt %}\n" + "{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}" +) + +PPL_DATASETS = ["wikitext2", "c4", "openai_humaneval", "ultrachat200k"] def perplexity_eval(args, dataset_name="openai_humaneval"): @@ -112,6 +125,13 @@ def perplexity_eval(args, dataset_name="openai_humaneval"): trust_remote_code=args.trust_remote_code, ) + if dataset_name == "ultrachat200k": + if ( + not hasattr(text_generation.tokenizer, "chat_template") + or text_generation.tokenizer.chat_template is None + ): + text_generation.tokenizer.chat_template = DEFAULT_ULTRACHAT200K_TEMPLATE + # Instantiate perplexity metric perplexity_metrics = Perplexity(accumulate=accumulate) @@ -124,6 +144,18 @@ def perplexity_eval(args, dataset_name="openai_humaneval"): # Collect input sequence if dataset_name == "openai_humaneval": sample = sample["prompt"] + sample["canonical_solution"] + elif dataset_name == "ultrachat200k": + messages = sample["messages"] + # We add an empty system message if there is none + if messages[0]["role"] != "system": + messages.insert(0, {"role": "system", "content": ""}) + + sample = text_generation.tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=False, + ) + batch_samples.append(sample) if args.max_samples and idx == args.max_samples - 1: From db792895b160fe3bb8d3ba455b4d9c589a086e2f Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Tue, 16 Jan 2024 15:12:18 -0500 Subject: [PATCH 2/3] Update loading of ultrachat dataset --- src/deepsparse/transformers/eval_downstream.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/deepsparse/transformers/eval_downstream.py b/src/deepsparse/transformers/eval_downstream.py index b3b530fe52..8778673991 100644 --- a/src/deepsparse/transformers/eval_downstream.py +++ b/src/deepsparse/transformers/eval_downstream.py @@ -110,6 +110,9 @@ def perplexity_eval(args, dataset_name="openai_humaneval"): # Set perplexity computation to accumulate negative log-likelihood across # sections accumulate = True + elif dataset_name == "ultrachat200k": + dataset = load_dataset("HuggingFaceH4/ultrachat_200k", name="default", split="test_sft") + accumulate = False else: dataset = load_dataset(dataset_name, split="test") accumulate = False @@ -581,6 +584,10 @@ def _split_train_val(train_dataset, val_ratio, seed=42): args, dataset_name="c4", ), + "ultrachat200k": lambda args: perplexity_eval( + args, + dataset_name="ultrachat200k", + ), } From d5e11f5a02126afc1fbba64ef78e660c1cc11069 Mon Sep 17 00:00:00 2001 From: Alexandre Marques Date: Tue, 16 Jan 2024 15:15:10 -0500 Subject: [PATCH 3/3] Style fixes --- src/deepsparse/transformers/eval_downstream.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/deepsparse/transformers/eval_downstream.py b/src/deepsparse/transformers/eval_downstream.py index 8778673991..ddec84aba9 100644 --- a/src/deepsparse/transformers/eval_downstream.py +++ b/src/deepsparse/transformers/eval_downstream.py @@ -111,7 +111,9 @@ def perplexity_eval(args, dataset_name="openai_humaneval"): # sections accumulate = True elif dataset_name == "ultrachat200k": - dataset = load_dataset("HuggingFaceH4/ultrachat_200k", name="default", split="test_sft") + dataset = load_dataset( + "HuggingFaceH4/ultrachat_200k", name="default", split="test_sft" + ) accumulate = False else: dataset = load_dataset(dataset_name, split="test")