From e911c9f73051fa7c0879337503739079ac53e5eb Mon Sep 17 00:00:00 2001 From: Frank-lilinjie Date: Sun, 29 Sep 2024 18:45:47 +0800 Subject: [PATCH] LLM_Agent_Benchmark Signed-off-by: Frank-lilinjie --- examples/llm-agent/config/config.json | 11 ++ examples/llm-agent/config/train_config.json | 11 ++ .../singletask_learning_bench/README.md | 0 .../benchmarkingjob.yaml | 66 +++++++++++ .../testalgorithms/basemodel.py | 107 ++++++++++++++++++ .../testalgorithms/test_algorithm.yaml | 16 +++ .../testenv/rouge.py | 42 +++++++ .../testenv/testenv.yaml | 20 ++++ 8 files changed, 273 insertions(+) create mode 100644 examples/llm-agent/config/config.json create mode 100644 examples/llm-agent/config/train_config.json create mode 100644 examples/llm-agent/singletask_learning_bench/README.md create mode 100644 examples/llm-agent/singletask_learning_bench/benchmarkingjob.yaml create mode 100644 examples/llm-agent/singletask_learning_bench/testalgorithms/basemodel.py create mode 100644 examples/llm-agent/singletask_learning_bench/testalgorithms/test_algorithm.yaml create mode 100644 examples/llm-agent/singletask_learning_bench/testenv/rouge.py create mode 100644 examples/llm-agent/singletask_learning_bench/testenv/testenv.yaml diff --git a/examples/llm-agent/config/config.json b/examples/llm-agent/config/config.json new file mode 100644 index 00000000..c336da4c --- /dev/null +++ b/examples/llm-agent/config/config.json @@ -0,0 +1,11 @@ +{ + "tokenizer_dir": "./examples/LLM-Agent-Benchmark/pretrains/Langboat/bloom-1b4-zh", + "auth_token": "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", + "data_dir" :"./examples/LLM-Agent-Benchmark/dataset/activity_classification.json", + "token_factor": 32, + "half_model": true, + "token_padding": "right", + "trust_remote": true, + "device": "auto", + "output_dir": "./checkpoint" + } \ No newline at end of file diff --git a/examples/llm-agent/config/train_config.json b/examples/llm-agent/config/train_config.json new file mode 100644 index 00000000..df757133 --- /dev/null +++ b/examples/llm-agent/config/train_config.json @@ -0,0 +1,11 @@ +{ + "per_device_train_batch_size":5, + "logging_steps":50, + "num_train_epochs":2, + "output_dir":"./checkpoint", + "half_lora":"True", + "learning_rate":2e-4, + "weight_decay":0.01, + "save_strategy":"epoch", + "save_total_limit":10 +} \ No newline at end of file diff --git a/examples/llm-agent/singletask_learning_bench/README.md b/examples/llm-agent/singletask_learning_bench/README.md new file mode 100644 index 00000000..e69de29b diff --git a/examples/llm-agent/singletask_learning_bench/benchmarkingjob.yaml b/examples/llm-agent/singletask_learning_bench/benchmarkingjob.yaml new file mode 100644 index 00000000..4bb6f873 --- /dev/null +++ b/examples/llm-agent/singletask_learning_bench/benchmarkingjob.yaml @@ -0,0 +1,66 @@ +benchmarkingjob: + # job name of bechmarking; string type; + name: "benchmarkingjob" + # the url address of job workspace that will reserve the output of tests; string type; + workspace: "./workspace" + + # the url address of test environment configuration file; string type; + # the file format supports yaml/yml; + testenv: "./examples/LLM-Agent-Benchmark/singletask_learning_bench/testenv/testenv.yaml" + + # the configuration of test object + test_object: + # test type; string type; + # currently the option of value is "algorithms",the others will be added in succession. + type: "algorithms" + # test algorithm configuration files; list type; + algorithms: + # algorithm name; string type; + - name: "LLM_agent" + # the url address of test algorithm configuration file; string type; + # the file format supports yaml/yml + url: "./examples/LLM-Agent-Benchmark/singletask_learning_bench/testalgorithms/test_algorithm.yaml" + + # the configuration of ranking leaderboard + rank: + # rank leaderboard with metric of test case's evaluation and order ; list type; + # the sorting priority is based on the sequence of metrics in the list from front to back; + sort_by: [{ "rouge1": "descend" }] + + # visualization configuration + visualization: + # mode of visualization in the leaderboard; string type; + # There are quite a few possible dataitems in the leaderboard. Not all of them can be shown simultaneously on the screen. + # In the leaderboard, we provide the "selected_only" mode for the user to configure what is shown or is not shown. + mode: "selected_only" + # method of visualization for selected dataitems; string type; + # currently the options of value are as follows: + # 1> "print_table": print selected dataitems; + method: "print_table" + + # selected dataitem configuration + # The user can add his/her interested dataitems in terms of "paradigms", "modules", "hyperparameters" and "metrics", + # so that the selected columns will be shown. + selected_dataitem: + # currently the options of value are as follows: + # 1> "all": select all paradigms in the leaderboard; + # 2> paradigms in the leaderboard, e.g., "singletasklearning" + paradigms: [ "all" ] + # currently the options of value are as follows: + # 1> "all": select all modules in the leaderboard; + # 2> modules in the leaderboard, e.g., "basemodel" + modules: [ "all" ] + # currently the options of value are as follows: + # 1> "all": select all hyperparameters in the leaderboard; + # 2> hyperparameters in the leaderboard, e.g., "momentum" + hyperparameters: [ "all" ] + # currently the options of value are as follows: + # 1> "all": select all metrics in the leaderboard; + # 2> metrics in the leaderboard, e.g., "F1_SCORE" + metrics: ["rouge1","rouge2","rougeL"] + + # model of save selected and all dataitems in workspace `./rank` ; string type; + # currently the options of value are as follows: + # 1> "selected_and_all": save selected and all dataitems; + # 2> "selected_only": save selected dataitems; + save_mode: "selected_and_all" \ No newline at end of file diff --git a/examples/llm-agent/singletask_learning_bench/testalgorithms/basemodel.py b/examples/llm-agent/singletask_learning_bench/testalgorithms/basemodel.py new file mode 100644 index 00000000..aef90ded --- /dev/null +++ b/examples/llm-agent/singletask_learning_bench/testalgorithms/basemodel.py @@ -0,0 +1,107 @@ +import os +import zipfile +import logging +from transformers import AutoTokenizer,AutoModelForCausalLM +import torch +from peft import LoraConfig,get_peft_model,TaskType,PeftModel +from transformers import AutoModelForCausalLM,TrainingArguments,Trainer,pipeline,AutoTokenizer,DataCollatorForSeq2Seq +from sedna.common.class_factory import ClassType, ClassFactory +from sedna.common.config import Context +from sedna.common.file_ops import FileOps +from sedna.common.log import LOGGER +from functools import partial +import datasets +import json +import os + +logging.disable(logging.WARNING) + +__all__ = ["BaseModel"] + +os.environ['BACKEND_TYPE'] = 'TORCH' + +@ClassFactory.register(ClassType.GENERAL, alias="LLM_agent") +class BaseModel: + def __init__(self, **kwargs): + config=kwargs.get("config") + with open(config, 'r', encoding='utf-8') as file: + self.config = json.load(file) + train_config=kwargs.get("train_config") + with open(train_config, 'r', encoding='utf-8') as file: + self.train_config = json.load(file) + + self.tokenizer_dir = self.config["tokenizer_dir"] + self.auth_token=self.config["auth_token"] + self.token_factor=self.config["token_factor"] + self.MAX_LENGTH = 128 + self.data_dir = self.config["data_dir"] + self.model = AutoModelForCausalLM.from_pretrained(self.tokenizer_dir, use_auth_token=self.auth_token,device_map=self.config["device"],trust_remote_code=self.config["trust_remote"]) + self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_dir,token=self.auth_token) + + def train(self, train_data, **kwargs): + train_data = self.load_json(self.data_dir, self.tokenizer) + config_lora=LoraConfig(task_type=TaskType.CAUSAL_LM, + lora_alpha = 1, + lora_dropout = 0.0 + ) + model=get_peft_model(self.model,config_lora) + half = self.train_config["half_lora"] + if half==True: + model=model.half() + del self.train_config["half_lora"] + args=TrainingArguments(adam_epsilon=(1e-4 if half else 1e-8) + ,**self.train_config) + trainer=Trainer(model=model,args=args,data_collator=DataCollatorForSeq2Seq(tokenizer=self.tokenizer,padding=True),train_dataset=train_data["train"], eval_dataset=None) + trainer.train() + self.model = trainer.model + return self.model + + from transformers import pipeline + def predict(self, data, **kwargs): + pipe=pipeline("text2text-generation",model=self.model,tokenizer=self.tokenizer) + y_pred=pipe(data) + return y_pred + + + def evaluate(self, data, **kwargs): + pass + + + def load(self, model_url, **kwargs): + if model_url: + print("load model url: ",model_url) + + def save(self, model_path = None): + pass + + def load_json(self, data_dir, tokenizer, token_factor = 32): + MYjson=datasets.load_dataset("json",data_files=data_dir) # 加载Json数据集 + # train_data=self.preprocess(train_data, self.MAX_LENGTH, tokenizer) + ds=MYjson.map(self.preprocess,fn_kwargs={"MAX_LENGTH":self.MAX_LENGTH,"tokenizer":tokenizer},batched=True,batch_size=2,remove_columns=['role','content']) + + filtered_ds=ds.filter(lambda example:not None in example["labels"]) # 过滤掉标签为 None的样本 + return filtered_ds + + + def preprocess(self, samples, MAX_LENGTH, tokenizer): + input_ids,attention_mask,labels=[],[],[] # 初始化三个空列表 + #prompt=[sample["content"] for sample in samples if sample["role"]=="user"] + #plan=[sample["content"] for sample in samples if sample["role"]=="assistant"] + prompt=samples["content"][0] # 用户的指令 + plan=samples["content"][1] # 计划 + # tokenizer将文本转化为数字的表示形式 + # 编码用户指令,并加上 "user: " 和 "assistant: " 的提示符 + instruction=tokenizer("\n".join(["user: ",prompt])+"\n\nassistant: ",add_special_tokens=False) # 编码 + response=tokenizer(plan,add_special_tokens=False) + input_ids=instruction["input_ids"]+response["input_ids"]+[tokenizer.eos_token_id] + attention_mask=instruction["attention_mask"]+response["attention_mask"]+[1] + labels=len(instruction["input_ids"])*[-100]+response["input_ids"]+[tokenizer.eos_token_id] + if len(labels)>MAX_LENGTH: + input_ids=input_ids[:MAX_LENGTH] + attention_mask=attention_mask[:MAX_LENGTH] + labels=labels[:MAX_LENGTH] + return { + "input_ids":[[None],input_ids], + "attention_mask":[[None],attention_mask], + "labels":[[None],labels] + } diff --git a/examples/llm-agent/singletask_learning_bench/testalgorithms/test_algorithm.yaml b/examples/llm-agent/singletask_learning_bench/testalgorithms/test_algorithm.yaml new file mode 100644 index 00000000..c2273ca3 --- /dev/null +++ b/examples/llm-agent/singletask_learning_bench/testalgorithms/test_algorithm.yaml @@ -0,0 +1,16 @@ +algorithm: + paradigm_type: "singletasklearning" + # initial_model_url: "./models/530_exp3_2.pth" + + modules: + - type: "basemodel" + name: "LLM_agent" + url: "./examples/LLM-Agent-Benchmark/singletask_learning_bench/testalgorithms/basemodel.py" + + hyperparameters: + - config: + values: + - "./examples/LLM-Agent-Benchmark/config/config.json" + - train_config: + values: + - "./examples/LLM-Agent-Benchmark/config/train_config.json" \ No newline at end of file diff --git a/examples/llm-agent/singletask_learning_bench/testenv/rouge.py b/examples/llm-agent/singletask_learning_bench/testenv/rouge.py new file mode 100644 index 00000000..c2897a77 --- /dev/null +++ b/examples/llm-agent/singletask_learning_bench/testenv/rouge.py @@ -0,0 +1,42 @@ +import evaluate +import numpy as np +from sedna.common.class_factory import ClassType, ClassFactory +from transformers import AutoTokenizer,AutoModelForCausalLM + +@ClassFactory.register(ClassType.GENERAL, alias="rouge1") +def rouge1(y_true, y_pred, **kwargs): + rouge=evaluate.load('./examples/LLM-Agent-Benchmark/evaluate/metrics/rouge') + y_prednew=[] + for i in range(len(y_pred)): + y_prednew.append(y_pred[i]["generated_text"]) + rou_score = rouge.compute(predictions = y_prednew, references=y_true, use_aggregator=True) + rouge1 = rou_score['rouge1'] * 10 + return rouge1 + +@ClassFactory.register(ClassType.GENERAL, alias="rouge2") +def rouge2(y_true, y_pred, **kwargs): + rouge=evaluate.load('./examples/LLM-Agent-Benchmark/evaluate/metrics/rouge') + y_prednew=[] + for i in range(len(y_pred)): + y_prednew.append(y_pred[i]["generated_text"]) + rou_score = rouge.compute(predictions = y_prednew, references=y_true, use_aggregator=True) + rouge2 = rou_score['rouge2'] * 10 + return rouge2 + +@ClassFactory.register(ClassType.GENERAL, alias="rougeL") +def rougeL(y_true, y_pred, **kwargs): + rouge=evaluate.load('./examples/LLM-Agent-Benchmark/evaluate/metrics/rouge') + y_prednew=[] + for i in range(len(y_pred)): + y_prednew.append(y_pred[i]["generated_text"]) + rou_score = rouge.compute(predictions = y_prednew, references=y_true, use_aggregator=True) + rougeL = rou_score['rougeL'] * 10 + return rougeL + +def calculate_mean(lst): + print(lst) + if not isinstance(lst,list): + return lst + if not lst: + return None + return sum(lst) / len(lst) \ No newline at end of file diff --git a/examples/llm-agent/singletask_learning_bench/testenv/testenv.yaml b/examples/llm-agent/singletask_learning_bench/testenv/testenv.yaml new file mode 100644 index 00000000..34fafdba --- /dev/null +++ b/examples/llm-agent/singletask_learning_bench/testenv/testenv.yaml @@ -0,0 +1,20 @@ +testenv: + # dataset configuration + dataset: + # the url address of train dataset index; string type; + train_url: "./examples/LLM-Agent-Benchmark/dataset/activity_classification.json" + # the url address of test dataset index; string type; + test_url: "./examples/LLM-Agent-Benchmark/dataset/activity_classification.json" + # model eval configuration of incremental learning; + + metrics: + # metric name; string type; + - name: "rouge1" + # the url address of python file + url: "./examples/LLM-Agent-Benchmark/singletask_learning_bench/testenv/rouge.py" + - name: "rouge2" + # the url address of python file + url: "./examples/LLM-Agent-Benchmark/singletask_learning_bench/testenv/rouge.py" + - name: "rougeL" + # the url address of python file + url: "./examples/LLM-Agent-Benchmark/singletask_learning_bench/testenv/rouge.py" \ No newline at end of file