Skip to content

Commit

Permalink
LLM_Agent_Benchmark
Browse files Browse the repository at this point in the history
Signed-off-by: Frank-lilinjie <lilinjie@bupt.edu.cn>
  • Loading branch information
Frank-lilinjie committed Sep 29, 2024
1 parent de4b244 commit e911c9f
Show file tree
Hide file tree
Showing 8 changed files with 273 additions and 0 deletions.
11 changes: 11 additions & 0 deletions examples/llm-agent/config/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"tokenizer_dir": "./examples/LLM-Agent-Benchmark/pretrains/Langboat/bloom-1b4-zh",
"auth_token": "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",
"data_dir" :"./examples/LLM-Agent-Benchmark/dataset/activity_classification.json",
"token_factor": 32,
"half_model": true,
"token_padding": "right",
"trust_remote": true,
"device": "auto",
"output_dir": "./checkpoint"
}
11 changes: 11 additions & 0 deletions examples/llm-agent/config/train_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"per_device_train_batch_size":5,
"logging_steps":50,
"num_train_epochs":2,
"output_dir":"./checkpoint",
"half_lora":"True",
"learning_rate":2e-4,
"weight_decay":0.01,
"save_strategy":"epoch",
"save_total_limit":10
}
Empty file.
66 changes: 66 additions & 0 deletions examples/llm-agent/singletask_learning_bench/benchmarkingjob.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
benchmarkingjob:
# job name of bechmarking; string type;
name: "benchmarkingjob"
# the url address of job workspace that will reserve the output of tests; string type;
workspace: "./workspace"

# the url address of test environment configuration file; string type;
# the file format supports yaml/yml;
testenv: "./examples/LLM-Agent-Benchmark/singletask_learning_bench/testenv/testenv.yaml"

# the configuration of test object
test_object:
# test type; string type;
# currently the option of value is "algorithms",the others will be added in succession.
type: "algorithms"
# test algorithm configuration files; list type;
algorithms:
# algorithm name; string type;
- name: "LLM_agent"
# the url address of test algorithm configuration file; string type;
# the file format supports yaml/yml
url: "./examples/LLM-Agent-Benchmark/singletask_learning_bench/testalgorithms/test_algorithm.yaml"

# the configuration of ranking leaderboard
rank:
# rank leaderboard with metric of test case's evaluation and order ; list type;
# the sorting priority is based on the sequence of metrics in the list from front to back;
sort_by: [{ "rouge1": "descend" }]

# visualization configuration
visualization:
# mode of visualization in the leaderboard; string type;
# There are quite a few possible dataitems in the leaderboard. Not all of them can be shown simultaneously on the screen.
# In the leaderboard, we provide the "selected_only" mode for the user to configure what is shown or is not shown.
mode: "selected_only"
# method of visualization for selected dataitems; string type;
# currently the options of value are as follows:
# 1> "print_table": print selected dataitems;
method: "print_table"

# selected dataitem configuration
# The user can add his/her interested dataitems in terms of "paradigms", "modules", "hyperparameters" and "metrics",
# so that the selected columns will be shown.
selected_dataitem:
# currently the options of value are as follows:
# 1> "all": select all paradigms in the leaderboard;
# 2> paradigms in the leaderboard, e.g., "singletasklearning"
paradigms: [ "all" ]
# currently the options of value are as follows:
# 1> "all": select all modules in the leaderboard;
# 2> modules in the leaderboard, e.g., "basemodel"
modules: [ "all" ]
# currently the options of value are as follows:
# 1> "all": select all hyperparameters in the leaderboard;
# 2> hyperparameters in the leaderboard, e.g., "momentum"
hyperparameters: [ "all" ]
# currently the options of value are as follows:
# 1> "all": select all metrics in the leaderboard;
# 2> metrics in the leaderboard, e.g., "F1_SCORE"
metrics: ["rouge1","rouge2","rougeL"]

# model of save selected and all dataitems in workspace `./rank` ; string type;
# currently the options of value are as follows:
# 1> "selected_and_all": save selected and all dataitems;
# 2> "selected_only": save selected dataitems;
save_mode: "selected_and_all"
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import os
import zipfile
import logging
from transformers import AutoTokenizer,AutoModelForCausalLM
import torch
from peft import LoraConfig,get_peft_model,TaskType,PeftModel
from transformers import AutoModelForCausalLM,TrainingArguments,Trainer,pipeline,AutoTokenizer,DataCollatorForSeq2Seq
from sedna.common.class_factory import ClassType, ClassFactory
from sedna.common.config import Context
from sedna.common.file_ops import FileOps
from sedna.common.log import LOGGER
from functools import partial
import datasets
import json
import os

logging.disable(logging.WARNING)

__all__ = ["BaseModel"]

os.environ['BACKEND_TYPE'] = 'TORCH'

@ClassFactory.register(ClassType.GENERAL, alias="LLM_agent")
class BaseModel:
def __init__(self, **kwargs):
config=kwargs.get("config")
with open(config, 'r', encoding='utf-8') as file:
self.config = json.load(file)
train_config=kwargs.get("train_config")
with open(train_config, 'r', encoding='utf-8') as file:
self.train_config = json.load(file)

self.tokenizer_dir = self.config["tokenizer_dir"]
self.auth_token=self.config["auth_token"]
self.token_factor=self.config["token_factor"]
self.MAX_LENGTH = 128
self.data_dir = self.config["data_dir"]
self.model = AutoModelForCausalLM.from_pretrained(self.tokenizer_dir, use_auth_token=self.auth_token,device_map=self.config["device"],trust_remote_code=self.config["trust_remote"])
self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_dir,token=self.auth_token)

def train(self, train_data, **kwargs):
train_data = self.load_json(self.data_dir, self.tokenizer)
config_lora=LoraConfig(task_type=TaskType.CAUSAL_LM,
lora_alpha = 1,
lora_dropout = 0.0
)
model=get_peft_model(self.model,config_lora)
half = self.train_config["half_lora"]
if half==True:
model=model.half()
del self.train_config["half_lora"]
args=TrainingArguments(adam_epsilon=(1e-4 if half else 1e-8)
,**self.train_config)
trainer=Trainer(model=model,args=args,data_collator=DataCollatorForSeq2Seq(tokenizer=self.tokenizer,padding=True),train_dataset=train_data["train"], eval_dataset=None)
trainer.train()
self.model = trainer.model
return self.model

from transformers import pipeline
def predict(self, data, **kwargs):
pipe=pipeline("text2text-generation",model=self.model,tokenizer=self.tokenizer)
y_pred=pipe(data)
return y_pred


def evaluate(self, data, **kwargs):
pass


def load(self, model_url, **kwargs):
if model_url:
print("load model url: ",model_url)

def save(self, model_path = None):
pass

def load_json(self, data_dir, tokenizer, token_factor = 32):
MYjson=datasets.load_dataset("json",data_files=data_dir) # 加载Json数据集
# train_data=self.preprocess(train_data, self.MAX_LENGTH, tokenizer)
ds=MYjson.map(self.preprocess,fn_kwargs={"MAX_LENGTH":self.MAX_LENGTH,"tokenizer":tokenizer},batched=True,batch_size=2,remove_columns=['role','content'])

filtered_ds=ds.filter(lambda example:not None in example["labels"]) # 过滤掉标签为 None的样本
return filtered_ds


def preprocess(self, samples, MAX_LENGTH, tokenizer):
input_ids,attention_mask,labels=[],[],[] # 初始化三个空列表
#prompt=[sample["content"] for sample in samples if sample["role"]=="user"]
#plan=[sample["content"] for sample in samples if sample["role"]=="assistant"]
prompt=samples["content"][0] # 用户的指令
plan=samples["content"][1] # 计划
# tokenizer将文本转化为数字的表示形式
# 编码用户指令,并加上 "user: " 和 "assistant: " 的提示符
instruction=tokenizer("\n".join(["user: ",prompt])+"\n\nassistant: ",add_special_tokens=False) # 编码
response=tokenizer(plan,add_special_tokens=False)
input_ids=instruction["input_ids"]+response["input_ids"]+[tokenizer.eos_token_id]
attention_mask=instruction["attention_mask"]+response["attention_mask"]+[1]
labels=len(instruction["input_ids"])*[-100]+response["input_ids"]+[tokenizer.eos_token_id]
if len(labels)>MAX_LENGTH:
input_ids=input_ids[:MAX_LENGTH]
attention_mask=attention_mask[:MAX_LENGTH]
labels=labels[:MAX_LENGTH]
return {
"input_ids":[[None],input_ids],
"attention_mask":[[None],attention_mask],
"labels":[[None],labels]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
algorithm:
paradigm_type: "singletasklearning"
# initial_model_url: "./models/530_exp3_2.pth"

modules:
- type: "basemodel"
name: "LLM_agent"
url: "./examples/LLM-Agent-Benchmark/singletask_learning_bench/testalgorithms/basemodel.py"

hyperparameters:
- config:
values:
- "./examples/LLM-Agent-Benchmark/config/config.json"
- train_config:
values:
- "./examples/LLM-Agent-Benchmark/config/train_config.json"
42 changes: 42 additions & 0 deletions examples/llm-agent/singletask_learning_bench/testenv/rouge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import evaluate
import numpy as np
from sedna.common.class_factory import ClassType, ClassFactory
from transformers import AutoTokenizer,AutoModelForCausalLM

@ClassFactory.register(ClassType.GENERAL, alias="rouge1")
def rouge1(y_true, y_pred, **kwargs):
rouge=evaluate.load('./examples/LLM-Agent-Benchmark/evaluate/metrics/rouge')
y_prednew=[]
for i in range(len(y_pred)):
y_prednew.append(y_pred[i]["generated_text"])
rou_score = rouge.compute(predictions = y_prednew, references=y_true, use_aggregator=True)
rouge1 = rou_score['rouge1'] * 10
return rouge1

@ClassFactory.register(ClassType.GENERAL, alias="rouge2")
def rouge2(y_true, y_pred, **kwargs):
rouge=evaluate.load('./examples/LLM-Agent-Benchmark/evaluate/metrics/rouge')
y_prednew=[]
for i in range(len(y_pred)):
y_prednew.append(y_pred[i]["generated_text"])
rou_score = rouge.compute(predictions = y_prednew, references=y_true, use_aggregator=True)
rouge2 = rou_score['rouge2'] * 10
return rouge2

@ClassFactory.register(ClassType.GENERAL, alias="rougeL")
def rougeL(y_true, y_pred, **kwargs):
rouge=evaluate.load('./examples/LLM-Agent-Benchmark/evaluate/metrics/rouge')
y_prednew=[]
for i in range(len(y_pred)):
y_prednew.append(y_pred[i]["generated_text"])
rou_score = rouge.compute(predictions = y_prednew, references=y_true, use_aggregator=True)
rougeL = rou_score['rougeL'] * 10
return rougeL

def calculate_mean(lst):
print(lst)
if not isinstance(lst,list):
return lst
if not lst:
return None
return sum(lst) / len(lst)
20 changes: 20 additions & 0 deletions examples/llm-agent/singletask_learning_bench/testenv/testenv.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
testenv:
# dataset configuration
dataset:
# the url address of train dataset index; string type;
train_url: "./examples/LLM-Agent-Benchmark/dataset/activity_classification.json"
# the url address of test dataset index; string type;
test_url: "./examples/LLM-Agent-Benchmark/dataset/activity_classification.json"
# model eval configuration of incremental learning;

metrics:
# metric name; string type;
- name: "rouge1"
# the url address of python file
url: "./examples/LLM-Agent-Benchmark/singletask_learning_bench/testenv/rouge.py"
- name: "rouge2"
# the url address of python file
url: "./examples/LLM-Agent-Benchmark/singletask_learning_bench/testenv/rouge.py"
- name: "rougeL"
# the url address of python file
url: "./examples/LLM-Agent-Benchmark/singletask_learning_bench/testenv/rouge.py"

0 comments on commit e911c9f

Please sign in to comment.