Skip to content

Commit

Permalink
Add starcoder evaluation support when finetuing (#452)
Browse files Browse the repository at this point in the history
Signed-off-by: changwangss <chang1.wang@intel.com>
Signed-off-by: Wenxin Zhang <wenxin.zhang@intel.com>
Co-authored-by: Wenxin Zhang <wenxin.zhang@intel.com>
  • Loading branch information
changwangss and VincyZhang authored Oct 13, 2023
1 parent 1e260bd commit 073bdde
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 4 deletions.
3 changes: 2 additions & 1 deletion docker/Dockerfile_code_gen
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ RUN ${PYTHON} -m pip install -r examples/huggingface/pytorch/code-generation/qua

RUN ${PYTHON} -m pip install /app/intel-extension-for-transformers/intel_extension_for_pytorch-2.1.0-cp310-cp310-manylinux2014_x86_64.whl

RUN ${PYTHON} -m pip install intel_extension_for_transformers
RUN ${PYTHON} -m pip install . --no-cache-dir
#RUN ${PYTHON} -m pip install intel_extension_for_transformers
FROM itrex-base as prod

ARG PYTHON=python3.10
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ def preprocess_dataset(raw_datasets, tokenizer, data_args, finetune_args):
preprocess = SummarizationDataPreprocess()
preprocess_fn = preprocess.tokenize_func(tokenizer, data_args, finetune_args)

elif finetune_args.task == "completion":
elif finetune_args.task == "completion" or finetune_args.task == "code-generation":
# default use alpaca template
preprocess = CompletionDataPreprocess()
for key in raw_datasets:
Expand Down
41 changes: 40 additions & 1 deletion intel_extension_for_transformers/llm/finetuning/finetuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,8 +521,47 @@ def concatenate_data(dataset, max_seq_length):
unwrapped_model.save_pretrained(
training_args.output_dir, state_dict=unwrapped_model.state_dict()
)
if finetune_args.do_lm_eval and finetune_args.task == "code-generation":
unwrapped_model.eval()
class Eval_Args:
n_samples = 20
limit = 20
allow_code_execution = True
prefix = ""
generation_only = False
postprocess = False
save_references = False
save_generations = False
instruction_tokens = None
save_generations_path = None
load_generations_path = None
metric_output_path = "evaluation_results.json"
seed = 0
max_length_generation = 512
temperature = 0.8
top_p = 0.8
top_k = 0
do_sample = True
check_references = False
max_memory_per_gpu = None
modeltype = "causal"
limit_start = 0
batch_size = 20 # batch_size >= n_samples if do_sample.
eval_args = Eval_Args()
from intel_extension_for_transformers.llm.evaluation.lm_code_eval import evaluate
with training_args.main_process_first(desc="lm_eval"):
if is_main_process(training_args.local_rank):
with torch.no_grad():
results = evaluate(
model=unwrapped_model,
tokenizer=tokenizer,
tasks="humaneval",
batch_size=eval_args.batch_size,
args=eval_args,
)
self.logger.info(results)

if finetune_args.do_lm_eval and finetune_args.task != "summarization":
elif finetune_args.do_lm_eval and finetune_args.task != "summarization":
unwrapped_model.eval()
from intel_extension_for_transformers.llm.evaluation.lm_eval import evaluate
with training_args.main_process_first(desc="lm_eval"):
Expand Down
2 changes: 1 addition & 1 deletion intel_extension_for_transformers/neural_chat/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ class FinetuningArguments:
task: Optional[str] = field(
default="completion",
metadata={"help": "task name, different task means different data format.",
"choices": ["completion", "chat", "summarization"]
"choices": ["completion", "chat", "summarization", "code-generation"]
},
)
do_lm_eval: bool = field(
Expand Down

0 comments on commit 073bdde

Please sign in to comment.