LLM_Agent_Benchmark

Signed-off-by: Frank-lilinjie <lilinjie@bupt.edu.cn>
kubeedge · Sep 29, 2024 · e911c9f · e911c9f
1 parent de4b244
commit e911c9f
Show file tree

Hide file tree

Showing 8 changed files with 273 additions and 0 deletions.
diff --git a/examples/llm-agent/config/config.json b/examples/llm-agent/config/config.json
@@ -0,0 +1,11 @@
+{
+    "tokenizer_dir": "./examples/LLM-Agent-Benchmark/pretrains/Langboat/bloom-1b4-zh",
+    "auth_token": "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",
+    "data_dir" :"./examples/LLM-Agent-Benchmark/dataset/activity_classification.json",
+    "token_factor": 32,
+    "half_model": true,
+    "token_padding": "right",
+    "trust_remote": true,
+    "device": "auto",
+    "output_dir": "./checkpoint"
+  }
diff --git a/examples/llm-agent/config/train_config.json b/examples/llm-agent/config/train_config.json
@@ -0,0 +1,11 @@
+{
+    "per_device_train_batch_size":5,
+    "logging_steps":50,
+    "num_train_epochs":2,
+    "output_dir":"./checkpoint",
+    "half_lora":"True",
+    "learning_rate":2e-4,
+    "weight_decay":0.01,
+    "save_strategy":"epoch",
+    "save_total_limit":10
+}
diff --git a/examples/llm-agent/singletask_learning_bench/README.md b/examples/llm-agent/singletask_learning_bench/README.md
diff --git a/examples/llm-agent/singletask_learning_bench/benchmarkingjob.yaml b/examples/llm-agent/singletask_learning_bench/benchmarkingjob.yaml
@@ -0,0 +1,66 @@
+benchmarkingjob:
+  # job name of bechmarking; string type;
+  name: "benchmarkingjob"
+  # the url address of job workspace that will reserve the output of tests; string type;
+  workspace: "./workspace"
+
+  # the url address of test environment configuration file; string type;
+  # the file format supports yaml/yml;
+  testenv: "./examples/LLM-Agent-Benchmark/singletask_learning_bench/testenv/testenv.yaml"
+
+  # the configuration of test object
+  test_object:
+    # test type; string type;
+    # currently the option of value is "algorithms",the others will be added in succession.
+    type: "algorithms"
+    # test algorithm configuration files; list type;
+    algorithms:
+      # algorithm name; string type;
+      - name: "LLM_agent"
+        # the url address of test algorithm configuration file; string type;
+        # the file format supports yaml/yml
+        url: "./examples/LLM-Agent-Benchmark/singletask_learning_bench/testalgorithms/test_algorithm.yaml"
+
+  # the configuration of ranking leaderboard
+  rank:
+    # rank leaderboard with metric of test case's evaluation and order ; list type;
+    # the sorting priority is based on the sequence of metrics in the list from front to back;
+    sort_by: [{ "rouge1": "descend" }]
+
+    # visualization configuration
+    visualization:
+      # mode of visualization in the leaderboard; string type;
+      # There are quite a few possible dataitems in the leaderboard. Not all of them can be shown simultaneously on the screen.
+      # In the leaderboard, we provide the "selected_only" mode for the user to configure what is shown or is not shown.
+      mode: "selected_only"
+      # method of visualization for selected dataitems; string type;
+      # currently the options of value are as follows:
+      #  1> "print_table": print selected dataitems;
+      method: "print_table"
+
+    # selected dataitem configuration
+    # The user can add his/her interested dataitems in terms of "paradigms", "modules", "hyperparameters" and "metrics",
+    # so that the selected columns will be shown.
+    selected_dataitem:
+      # currently the options of value are as follows:
+      #   1> "all": select all paradigms in the leaderboard;
+      #   2> paradigms in the leaderboard, e.g., "singletasklearning"
+      paradigms: [ "all" ]
+      # currently the options of value are as follows:
+      #   1> "all": select all modules in the leaderboard;
+      #   2> modules in the leaderboard, e.g., "basemodel"
+      modules: [ "all" ]
+      # currently the options of value are as follows:
+      #   1> "all": select all hyperparameters in the leaderboard;
+      #   2> hyperparameters in the leaderboard, e.g., "momentum"
+      hyperparameters: [ "all" ]
+      # currently the options of value are as follows:
+      #   1> "all": select all metrics in the leaderboard;
+      #   2> metrics in the leaderboard, e.g., "F1_SCORE"
+      metrics: ["rouge1","rouge2","rougeL"]
+
+    # model of save selected and all dataitems in workspace `./rank` ; string type;
+    # currently the options of value are as follows:
+    #  1> "selected_and_all": save selected and all dataitems;
+    #  2> "selected_only": save selected dataitems;
+    save_mode: "selected_and_all"
diff --git a/examples/llm-agent/singletask_learning_bench/testalgorithms/basemodel.py b/examples/llm-agent/singletask_learning_bench/testalgorithms/basemodel.py
@@ -0,0 +1,107 @@
+import os
+import zipfile
+import logging
+from transformers import AutoTokenizer,AutoModelForCausalLM
+import torch
+from peft import LoraConfig,get_peft_model,TaskType,PeftModel
+from transformers import AutoModelForCausalLM,TrainingArguments,Trainer,pipeline,AutoTokenizer,DataCollatorForSeq2Seq
+from sedna.common.class_factory import ClassType, ClassFactory
+from sedna.common.config import Context
+from sedna.common.file_ops import FileOps
+from sedna.common.log import LOGGER
+from functools import partial
+import datasets
+import json
+import os
+
+logging.disable(logging.WARNING)
+
+__all__ = ["BaseModel"]
+
+os.environ['BACKEND_TYPE'] = 'TORCH'
+
+@ClassFactory.register(ClassType.GENERAL, alias="LLM_agent")
+class BaseModel:
+    def __init__(self, **kwargs):
+        config=kwargs.get("config")
+        with open(config, 'r', encoding='utf-8') as file:
+            self.config = json.load(file)
+        train_config=kwargs.get("train_config")
+        with open(train_config, 'r', encoding='utf-8') as file:
+            self.train_config = json.load(file)
+
+        self.tokenizer_dir = self.config["tokenizer_dir"]
+        self.auth_token=self.config["auth_token"]
+        self.token_factor=self.config["token_factor"]
+        self.MAX_LENGTH = 128
+        self.data_dir = self.config["data_dir"]
+        self.model = AutoModelForCausalLM.from_pretrained(self.tokenizer_dir, use_auth_token=self.auth_token,device_map=self.config["device"],trust_remote_code=self.config["trust_remote"])
+        self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_dir,token=self.auth_token)
+
+    def train(self, train_data, **kwargs):
+        train_data = self.load_json(self.data_dir, self.tokenizer)
+        config_lora=LoraConfig(task_type=TaskType.CAUSAL_LM,
+                    lora_alpha = 1,
+                    lora_dropout = 0.0
+                    )
+        model=get_peft_model(self.model,config_lora)
+        half = self.train_config["half_lora"]
+        if half==True:
+            model=model.half()
+        del self.train_config["half_lora"]
+        args=TrainingArguments(adam_epsilon=(1e-4 if half else 1e-8)
+                       ,**self.train_config)
+        trainer=Trainer(model=model,args=args,data_collator=DataCollatorForSeq2Seq(tokenizer=self.tokenizer,padding=True),train_dataset=train_data["train"], eval_dataset=None)
+        trainer.train()
+        self.model = trainer.model
+        return self.model
+
+    from transformers import pipeline
+    def predict(self, data, **kwargs):
+        pipe=pipeline("text2text-generation",model=self.model,tokenizer=self.tokenizer)
+        y_pred=pipe(data)
+        return y_pred
+
+
+    def evaluate(self, data, **kwargs):
+        pass
+
+
+    def load(self, model_url, **kwargs):
+        if model_url:
+            print("load model url: ",model_url)
+
+    def save(self, model_path = None):
+        pass
+
+    def load_json(self, data_dir, tokenizer, token_factor = 32):
+        MYjson=datasets.load_dataset("json",data_files=data_dir) # 加载Json数据集
+        # train_data=self.preprocess(train_data, self.MAX_LENGTH, tokenizer)
+        ds=MYjson.map(self.preprocess,fn_kwargs={"MAX_LENGTH":self.MAX_LENGTH,"tokenizer":tokenizer},batched=True,batch_size=2,remove_columns=['role','content'])
+
+        filtered_ds=ds.filter(lambda example:not None in example["labels"]) # 过滤掉标签为 None的样本
+        return filtered_ds
+
+
+    def preprocess(self, samples, MAX_LENGTH, tokenizer):
+        input_ids,attention_mask,labels=[],[],[] # 初始化三个空列表
+        #prompt=[sample["content"] for sample in samples if sample["role"]=="user"]
+        #plan=[sample["content"] for sample in samples if sample["role"]=="assistant"]
+        prompt=samples["content"][0] # 用户的指令
+        plan=samples["content"][1] # 计划
+        # tokenizer将文本转化为数字的表示形式
+        # 编码用户指令，并加上 "user: " 和 "assistant: " 的提示符
+        instruction=tokenizer("\n".join(["user: ",prompt])+"\n\nassistant: ",add_special_tokens=False) # 编码
+        response=tokenizer(plan,add_special_tokens=False)
+        input_ids=instruction["input_ids"]+response["input_ids"]+[tokenizer.eos_token_id]
+        attention_mask=instruction["attention_mask"]+response["attention_mask"]+[1]
+        labels=len(instruction["input_ids"])*[-100]+response["input_ids"]+[tokenizer.eos_token_id] 
+        if len(labels)>MAX_LENGTH:
+            input_ids=input_ids[:MAX_LENGTH]
+            attention_mask=attention_mask[:MAX_LENGTH]
+            labels=labels[:MAX_LENGTH]
+        return {
+            "input_ids":[[None],input_ids],
+            "attention_mask":[[None],attention_mask],
+            "labels":[[None],labels]
+        }
diff --git a/examples/llm-agent/singletask_learning_bench/testalgorithms/test_algorithm.yaml b/examples/llm-agent/singletask_learning_bench/testalgorithms/test_algorithm.yaml
@@ -0,0 +1,16 @@
+algorithm:
+  paradigm_type: "singletasklearning"
+  # initial_model_url: "./models/530_exp3_2.pth"
+
+  modules:
+    - type: "basemodel"
+      name: "LLM_agent"
+      url: "./examples/LLM-Agent-Benchmark/singletask_learning_bench/testalgorithms/basemodel.py"
+
+      hyperparameters:
+        - config:
+            values:
+              - "./examples/LLM-Agent-Benchmark/config/config.json"
+        - train_config:
+            values:
+              - "./examples/LLM-Agent-Benchmark/config/train_config.json"
diff --git a/examples/llm-agent/singletask_learning_bench/testenv/rouge.py b/examples/llm-agent/singletask_learning_bench/testenv/rouge.py
@@ -0,0 +1,42 @@
+import evaluate
+import numpy as np
+from sedna.common.class_factory import ClassType, ClassFactory
+from transformers import AutoTokenizer,AutoModelForCausalLM
+
+@ClassFactory.register(ClassType.GENERAL, alias="rouge1")
+def rouge1(y_true, y_pred, **kwargs):
+    rouge=evaluate.load('./examples/LLM-Agent-Benchmark/evaluate/metrics/rouge')
+    y_prednew=[]
+    for i in range(len(y_pred)):
+        y_prednew.append(y_pred[i]["generated_text"])
+    rou_score = rouge.compute(predictions = y_prednew, references=y_true, use_aggregator=True)
+    rouge1 = rou_score['rouge1'] * 10
+    return rouge1
+
+@ClassFactory.register(ClassType.GENERAL, alias="rouge2")
+def rouge2(y_true, y_pred, **kwargs):
+    rouge=evaluate.load('./examples/LLM-Agent-Benchmark/evaluate/metrics/rouge')
+    y_prednew=[]
+    for i in range(len(y_pred)):
+        y_prednew.append(y_pred[i]["generated_text"])
+    rou_score = rouge.compute(predictions = y_prednew, references=y_true, use_aggregator=True)
+    rouge2 = rou_score['rouge2'] * 10
+    return rouge2
+
+@ClassFactory.register(ClassType.GENERAL, alias="rougeL")
+def rougeL(y_true, y_pred, **kwargs):
+    rouge=evaluate.load('./examples/LLM-Agent-Benchmark/evaluate/metrics/rouge')
+    y_prednew=[]
+    for i in range(len(y_pred)):
+        y_prednew.append(y_pred[i]["generated_text"])
+    rou_score = rouge.compute(predictions = y_prednew, references=y_true, use_aggregator=True)
+    rougeL = rou_score['rougeL'] * 10
+    return rougeL
+
+def calculate_mean(lst):
+    print(lst)
+    if not isinstance(lst,list):
+        return lst
+    if not lst:
+        return None
+    return sum(lst) / len(lst)
diff --git a/examples/llm-agent/singletask_learning_bench/testenv/testenv.yaml b/examples/llm-agent/singletask_learning_bench/testenv/testenv.yaml
@@ -0,0 +1,20 @@
+testenv:
+  # dataset configuration
+  dataset:
+    # the url address of train dataset index; string type;
+    train_url: "./examples/LLM-Agent-Benchmark/dataset/activity_classification.json"
+    # the url address of test dataset index; string type;
+    test_url: "./examples/LLM-Agent-Benchmark/dataset/activity_classification.json"
+    # model eval configuration of incremental learning;
+
+  metrics:
+      # metric name; string type;
+    - name: "rouge1"
+      # the url address of python file
+      url: "./examples/LLM-Agent-Benchmark/singletask_learning_bench/testenv/rouge.py"
+    - name: "rouge2"
+      # the url address of python file
+      url: "./examples/LLM-Agent-Benchmark/singletask_learning_bench/testenv/rouge.py"
+    - name: "rougeL"
+      # the url address of python file
+      url: "./examples/LLM-Agent-Benchmark/singletask_learning_bench/testenv/rouge.py"