Enable Qwen-7B-Chat (#432)

Co-authored-by: lvliang-intel <liang1.lv@intel.com>
intel · Oct 13, 2023 · 698e589 · 698e589
1 parent ba5d9e3
commit 698e589
Show file tree

Hide file tree

Showing 7 changed files with 67 additions and 6 deletions.
diff --git a/intel_extension_for_transformers/neural_chat/chatbot.py b/intel_extension_for_transformers/neural_chat/chatbot.py
@@ -64,6 +64,9 @@ def build_chatbot(config: PipelineConfig=None):
     elif "chatglm" in config.model_name_or_path:
         from .models.chatglm_model import ChatGlmModel
         adapter = ChatGlmModel()
+    elif "Qwen" in config.model_name_or_path:
+        from .models.qwen_model import QwenModel
+        adapter = QwenModel()
     elif "opt" in config.model_name_or_path or \
          "gpt" in config.model_name_or_path or \
          "flan-t5" in config.model_name_or_path or \
@@ -72,7 +75,7 @@ def build_chatbot(config: PipelineConfig=None):
         adapter = BaseModel()
     else:
         raise ValueError("NeuralChat Error: Unsupported model name or path, \
-                         only supports FLAN-T5/LLAMA/MPT/GPT/BLOOM/OPT/NEURAL-CHAT now.")
+                         only supports FLAN-T5/LLAMA/MPT/GPT/BLOOM/OPT/QWEN/NEURAL-CHAT now.")
 
     # register plugin instance in model adaptor
     if config.plugins:

diff --git a/intel_extension_for_transformers/neural_chat/docs/notebooks/build_chatbot_on_spr.ipynb b/intel_extension_for_transformers/neural_chat/docs/notebooks/build_chatbot_on_spr.ipynb
@@ -47,7 +47,7 @@
    "source": [
     "!git clone https://github.com/intel/intel-extension-for-transformers.git\n",
     "!cd ./intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/\n",
-    "!pip install -r requirements.txt"
+    "!pip install -r requirements_cpu.txt"
    ]
   },
   {

diff --git a/intel_extension_for_transformers/neural_chat/docs/notebooks/deploy_chatbot_on_xpu.ipynb b/intel_extension_for_transformers/neural_chat/docs/notebooks/deploy_chatbot_on_xpu.ipynb
@@ -52,7 +52,7 @@
    "source": [
     "!git clone https://github.com/intel/intel-extension-for-transformers.git\n",
     "!cd ./intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/\n",
-    "!pip install -r requirements.txt"
+    "!pip install -r requirements_xpu.txt"
    ]
   },
   {
@@ -139,7 +139,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from neural_chat import TextChatClientExecutor\n",
+    "from intel_extension_for_transformers.neural_chat import TextChatClientExecutor\n",
     "executor = TextChatClientExecutor()\n",
     "result = executor(\n",
     "    prompt=\"Tell me about Intel Xeon Scalable Processors.\",\n",

diff --git a/intel_extension_for_transformers/neural_chat/models/model_utils.py b/intel_extension_for_transformers/neural_chat/models/model_utils.py
@@ -330,6 +330,7 @@ def load_model(
         use_fast=False if (re.search("llama", model_name, re.IGNORECASE)
             or re.search("neural-chat-7b-v2", model_name, re.IGNORECASE)) else True,
         use_auth_token=hf_access_token,
+        trust_remote_code=True if (re.search("qwen", model_name, re.IGNORECASE)) else False,
     )
     config = AutoConfig.from_pretrained(model_name, use_auth_token=hf_access_token)
     load_to_meta = model_on_meta(config)
@@ -356,6 +357,7 @@ def load_model(
         or re.search("opt", model_name, re.IGNORECASE)
         or re.search("neural-chat-7b-v1", model_name, re.IGNORECASE)
         or re.search("neural-chat-7b-v2", model_name, re.IGNORECASE)
+        or re.search("qwen", model_name, re.IGNORECASE)
     ):
         with smart_context_manager(use_deepspeed=use_deepspeed):
             model = AutoModelForCausalLM.from_pretrained(
@@ -367,7 +369,7 @@ def load_model(
             )
     else:
         raise ValueError(
-            f"Unsupported model {model_name}, only supports FLAN-T5/LLAMA/MPT/GPT/BLOOM/OPT/NEURAL-CHAT now."
+            f"Unsupported model {model_name}, only supports FLAN-T5/LLAMA/MPT/GPT/BLOOM/OPT/QWEN/NEURAL-CHAT now."
         )
 
     if re.search("llama", model.config.architectures[0], re.IGNORECASE):

diff --git a/intel_extension_for_transformers/neural_chat/models/qwen_model.py b/intel_extension_for_transformers/neural_chat/models/qwen_model.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .base_model import BaseModel, register_model_adapter
+import logging
+from fastchat.conversation import get_conv_template, Conversation
+
+logging.basicConfig(
+    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+    datefmt="%m/%d/%Y %H:%M:%S",
+    level=logging.INFO,
+)
+logger = logging.getLogger(__name__)
+
+class QwenModel(BaseModel):
+    def match(self, model_path: str):
+        """
+        Check if the provided model_path matches the current model.
+
+        Args:
+            model_path (str): Path to a model.
+
+        Returns:
+            bool: True if the model_path matches, False otherwise.
+        """
+        return "qwen" in model_path.lower()
+
+    def get_default_conv_template(self, model_path: str) -> Conversation:
+        """
+        Get the default conversation template for the given model path.
+
+        Args:
+            model_path (str): Path to the model.
+
+        Returns:
+            Conversation: A default conversation template.
+        """
+        return get_conv_template("qwen-7b-chat")
+
+register_model_adapter(QwenModel)
diff --git a/intel_extension_for_transformers/neural_chat/requirements_cpu.txt b/intel_extension_for_transformers/neural_chat/requirements_cpu.txt
@@ -1,7 +1,7 @@
 transformers>=4.32.0
 peft
 fschat
-intel_extension_for_pytorch
+intel_extension_for_pytorch==2.0.100
 num2words
 speechbrain
 paddlepaddle
@@ -35,6 +35,7 @@ openpyxl
 numpy==1.23.5
 tiktoken==0.4.0
 lm_eval
+transformers_stream_generator==0.0.4
 --extra-index-url https://download.pytorch.org/whl/cpu
 torch==2.0.1
 torchaudio==2.0.2
diff --git a/intel_extension_for_transformers/neural_chat/requirements_xpu.txt b/intel_extension_for_transformers/neural_chat/requirements_xpu.txt
@@ -28,4 +28,5 @@ rouge_score
 openpyxl
 numpy==1.23.5
 tiktoken==0.4.0
+transformers_stream_generator==0.0.4
 cchardet