Skip to content

Commit

Permalink
Feature/ds assistant (#583)
Browse files Browse the repository at this point in the history
  • Loading branch information
dahaipeng authored Sep 13, 2024
1 parent 2029322 commit aa26f10
Showing 1 changed file with 82 additions and 10 deletions.
92 changes: 82 additions & 10 deletions modelscope_agent/agents/data_science_assistant.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
# Implementation inspired by the paper "DATA INTERPRETER: AN LLM AGENT FOR DATA SCIENCE"
import asyncio
import copy
import os
import time
from datetime import datetime
Expand Down Expand Up @@ -39,8 +37,7 @@
- **other**: Any tasks not in the defined categories
# Task:
Based on the context, write a simple plan or modify an existing plan of what you should do to achieve the goal. A plan \
consists of one to four tasks.
Based on the context, write a simple plan or modify an existing plan of what you should do to achieve the goal.
Output a list of jsons following the format:
```json
Expand All @@ -55,6 +52,44 @@
]
```
"""

DECOMPOSE_TASK_TEMPLATE = """
# Context:
{context}
# Available Task Types:
- **eda**: For performing exploratory data analysis
- **data preprocessing**: For preprocessing dataset in a data analysis or machine learning task ONLY,\
general data operation doesn't fall into this type
- **feature engineering**: Only for creating new columns fo input data.
- **model train**: Only for training model.
- **model evaluate**: Only for evaluating model.
- **ocr**: Only for OCR tasks.
- **other**: Any tasks not in the defined categories
# Previous Tasks
We have already generated the following tasks:
{previous_tasks}
# Task:
The current task is:
{current_task}
Currently, the current task is too complex to be executed in one step. Please decompose the task into smaller tasks, \
and output a list of jsons following the format:
Output a list of jsons following the format:
```json
[
{{
"task_id": str = "unique identifier for a task in plan, can be an ordinal, \
should be unique and not conflict with previous task ids",
"dependent_task_ids": list[str] = "ids of tasks prerequisite to this task",
"instruction": "what you should do in this task, one short phrase or sentence",
"task_type": "type of this task, should be one of Available Task Types",
}},
...
]
```
"""

CODE_TEMPLATE = """
# Task
you are a code generator, you need to generate a code python block in jupyter notebook to achieve the \
Expand Down Expand Up @@ -597,8 +632,8 @@ def _judge_code(self, task, previous_code_blocks, code,
if 'incorrect' in judge_result.split('\n')[-1]:
success = False
failed_reason = (
'Though the code executes successfully, The code logic is incorrect, here is the reason: '
+ judge_result)
'Though the code executes successfully, The code logic is \
incorrect, here is the reason: ' + judge_result)
return success, failed_reason

else:
Expand Down Expand Up @@ -634,7 +669,7 @@ def _run(self, user_request, save: bool = True, **kwargs):
previous_code_blocks = self._get_previous_code_blocks()
success = False
code_counter = 0
max_try = kwargs.get('max_try', 10)
max_try = kwargs.get('max_try', 1)
while not success and code_counter < max_try:
code_execute_success = False
code_logic_success = False
Expand Down Expand Up @@ -726,9 +761,13 @@ def _run(self, user_request, save: bool = True, **kwargs):
encoding='utf-8') as file:
nbformat.write(self.code_interpreter.nb, file)
else:
self.plan = self._update_plan(
user_request=user_request, curr_plan=self.plan)
self.code_interpreter.reset()
decomposed_tasks = self._decompose_task(task)
if decomposed_tasks:
self.plan.replace_task(task, decomposed_tasks)
else:
self.plan = self._update_plan(
user_request=user_request, curr_plan=self.plan)
self.code_interpreter.reset()
# save the plan into json file
if save:
after_time = time.time()
Expand Down Expand Up @@ -769,3 +808,36 @@ def _get_total_tokens(self):
except Exception as e:
logger.error(f'get total token error: {e}')
pass

def _decompose_task(self, task):
try:
print(f'decompose task {task.task_id}')
messages = [{
'role':
'user',
'content':
DECOMPOSE_TASK_TEMPLATE.format(
context='User Request: ' + task.instruction + '\n',
previous_tasks='\n'.join([
json.dumps({
'task_id': t.task_id,
'dependent_task_ids': t.dependent_task_ids,
'instruction': t.instruction,
'task_type': t.task_type
}) for t in self.plan.tasks
]),
current_task=json.dumps(task.__dict__))
}]
resp = self._call_llm(prompt=None, messages=messages, stop=None)
tasks_text = ''
for r in resp:
tasks_text += r
tasks_text = parse_code(text=tasks_text, lang='json')
logger.info(f'decomposed tasks: {tasks_text}')

tasks = json5.loads(tasks_text)
tasks = [Task(**task) for task in tasks]
return tasks
except Exception as e:
logger.error(f'decompose task error: {e}')
return None

0 comments on commit aa26f10

Please sign in to comment.