[NeuralChat] Support microsoft/biogpt model as per the request (#1327)

* Support microsoft/biogpt model * add dependency --------- Signed-off-by: lvliang-intel <liang1.lv@intel.com>
intel · Feb 29, 2024 · 3e7e353 · 3e7e353
1 parent de88006
commit 3e7e353
Show file tree

Hide file tree

Showing 9 changed files with 75 additions and 14 deletions.
diff --git a/intel_extension_for_transformers/neural_chat/models/base_model.py b/intel_extension_for_transformers/neural_chat/models/base_model.py
@@ -170,7 +170,8 @@ def predict_stream(self, query, origin_query="", config=None):
         if (self.conv_template.roles[0] in query and self.conv_template.roles[1] in query) or \
               "starcoder" in self.model_name.lower() or "codellama" in self.model_name.lower() or \
               "codegen" in self.model_name.lower() or "magicoder" in self.model_name.lower() or \
-              "phi-2" in self.model_name.lower() or "sqlcoder" in self.model_name.lower() or self.hf_client:
+              "phi-2" in self.model_name.lower() or "sqlcoder" in self.model_name.lower() or \
+              "biogpt" in self.model_name.lower() or self.hf_client:
             query_include_prompt = True
 
         # plugin pre actions
@@ -293,7 +294,7 @@ def predict(self, query, origin_query="", config=None):
         if (self.conv_template.roles[0] in query and self.conv_template.roles[1] in query) or \
                "starcoder" in self.model_name.lower() or "codellama" in self.model_name.lower() or \
                "codegen" in self.model_name.lower() or "magicoder" in self.model_name.lower() or \
-               "sqlcoder" in self.model_name.lower() or self.hf_client:
+               "sqlcoder" in self.model_name.lower() or "biogpt" in self.model_name.lower() or self.hf_client:
             query_include_prompt = True
 
         # plugin pre actions

diff --git a/intel_extension_for_transformers/neural_chat/models/model_utils.py b/intel_extension_for_transformers/neural_chat/models/model_utils.py
@@ -498,10 +498,14 @@ def load_model(
         MODELS[model_name]["assistant_model"] = None
 
     try:
-        config = AutoConfig.from_pretrained(model_name, use_auth_token=hf_access_token, trust_remote_code=True \
-                                            if (re.search("chatglm", model_name, re.IGNORECASE) or \
-                                               re.search("qwen", model_name, re.IGNORECASE) or \
-                                               re.search("deci", model_name, re.IGNORECASE)) else False)
+        if re.search("biogpt", model_name, re.IGNORECASE):
+            from transformers import BioGptConfig
+            config = BioGptConfig.from_pretrained(model_name, use_auth_token=hf_access_token)
+        else:
+            config = AutoConfig.from_pretrained(model_name, use_auth_token=hf_access_token, trust_remote_code=True \
+                                                if (re.search("chatglm", model_name, re.IGNORECASE) or \
+                                                re.search("qwen", model_name, re.IGNORECASE) or \
+                                                re.search("deci", model_name, re.IGNORECASE)) else False)
     except ValueError as e:
         logging.error(f"Exception: {e}")
         if "Unrecognized model in" in str(e):
@@ -524,14 +528,18 @@ def load_model(
     MODELS[model_name]["model_type"] = config.model_type
 
     try:
-        tokenizer = AutoTokenizer.from_pretrained(
-            tokenizer_name,
-            use_fast=False if (re.search("llama", model_name, re.IGNORECASE)
-                or re.search("neural-chat-7b-v2", model_name, re.IGNORECASE)) else True,
-            use_auth_token=hf_access_token,
-            trust_remote_code=True if (re.search("qwen", model_name, re.IGNORECASE) or \
-                re.search("chatglm", model_name, re.IGNORECASE) or gguf_model_path) else False,
-        )
+        if config.model_type == "biogpt":
+            from transformers import BioGptTokenizer
+            tokenizer = BioGptTokenizer.from_pretrained(tokenizer_name)
+        else:
+            tokenizer = AutoTokenizer.from_pretrained(
+                tokenizer_name,
+                use_fast=False if (re.search("llama", model_name, re.IGNORECASE)
+                    or re.search("neural-chat-7b-v2", model_name, re.IGNORECASE)) else True,
+                use_auth_token=hf_access_token,
+                trust_remote_code=True if (re.search("qwen", model_name, re.IGNORECASE) or \
+                    re.search("chatglm", model_name, re.IGNORECASE) or gguf_model_path) else False,
+            )
     except EnvironmentError as e:
         logging.error(f"Exception: {e}")
         if "not a local folder and is not a valid model identifier" in str(e):
@@ -617,6 +625,15 @@ def load_model(
                     trust_remote_code=True if (config.model_type == "qwen" or config.model_type == "phi" or \
                         re.search("codegen", model_name, re.IGNORECASE) or config.model_type == "deci") else False
                 )
+        elif config.model_type == "biogpt":
+            from transformers import BioGptForCausalLM
+            with smart_context_manager(use_deepspeed=use_deepspeed):
+                model = BioGptForCausalLM.from_pretrained(
+                    model_name,
+                    use_auth_token=hf_access_token,
+                    torch_dtype=torch_dtype,
+                    low_cpu_mem_usage=True,
+                    quantization_config=bitsandbytes_quant_config)
         elif (
                 (config.model_type == "gpt_bigcode"
                  or config.model_type == "llama"

diff --git a/intel_extension_for_transformers/neural_chat/requirements.txt b/intel_extension_for_transformers/neural_chat/requirements.txt
@@ -23,6 +23,7 @@ pymysql
 python-dotenv
 python-multipart
 rouge_score
+sacremoses
 shortuuid
 starlette
 tensorflow>=2.13.0

diff --git a/intel_extension_for_transformers/neural_chat/requirements_cpu.txt b/intel_extension_for_transformers/neural_chat/requirements_cpu.txt
@@ -22,6 +22,7 @@ pymysql
 python-dotenv
 python-multipart
 rouge_score
+sacremoses
 shortuuid
 starlette
 tiktoken==0.4.0

diff --git a/intel_extension_for_transformers/neural_chat/requirements_hpu.txt b/intel_extension_for_transformers/neural_chat/requirements_hpu.txt
@@ -17,6 +17,7 @@ pymysql
 python-dotenv
 python-multipart
 rouge_score
+sacremoses
 shortuuid
 starlette
 transformers>=4.35.2

diff --git a/intel_extension_for_transformers/neural_chat/requirements_pc.txt b/intel_extension_for_transformers/neural_chat/requirements_pc.txt
@@ -20,6 +20,7 @@ pymysql
 python-dotenv
 python-multipart
 rouge_score
+sacremoses
 shortuuid
 starlette
 torch==2.2.0

diff --git a/intel_extension_for_transformers/neural_chat/requirements_xpu.txt b/intel_extension_for_transformers/neural_chat/requirements_xpu.txt
@@ -17,6 +17,7 @@ pymysql
 python-dotenv
 python-multipart
 rouge_score
+sacremoses
 shortuuid
 starlette
 torch==2.1.0a0

diff --git a/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_biogpt.py b/intel_extension_for_transformers/neural_chat/tests/nightly/models/test_biogpt.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from intel_extension_for_transformers.neural_chat import build_chatbot, PipelineConfig
+import unittest
+
+class TestBioGPTModel(unittest.TestCase):
+    def setUp(self):
+        return super().setUp()
+
+    def tearDown(self) -> None:
+        return super().tearDown()
+
+    def test_run_inference(self):
+        config = PipelineConfig(
+            model_name_or_path="/tf_dataset2/models/nlp_toolkit/biogpt")
+        chatbot = build_chatbot(config=config)
+        result = chatbot.predict("COVID-19 is ")
+        print(result)
+        self.assertIn('COVID-19 is', str(result)) and self.assertIn('pandemic', str(result))
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/intel_extension_for_transformers/neural_chat/tests/requirements.txt b/intel_extension_for_transformers/neural_chat/tests/requirements.txt
@@ -60,6 +60,7 @@ qdrant-client
 rank_bm25
 resampy==0.3.1
 rouge_score
+sacremoses
 safetensors
 scikit-image==0.19.3
 scikit-learn