[NeuralChat] Support Gaudi model parallelism serving (#802)

intel · Nov 29, 2023 · 7f0090e · 7f0090e
1 parent fd74a9a
commit 7f0090e
Show file tree

Hide file tree

Showing 18 changed files with 534 additions and 15 deletions.
diff --git a/intel_extension_for_transformers/neural_chat/config.py b/intel_extension_for_transformers/neural_chat/config.py
@@ -402,6 +402,7 @@ class LoadingModelConfig:
     use_hpu_graphs: bool = False
     use_cache: bool = True
     use_deepspeed: bool = False
+    world_size: int = 1
     ipex_int8: bool = False
     use_llm_runtime: bool = False
 

diff --git a/...or_transformers/neural_chat/examples/deployment/textbot/backend/gaudi/README.md b/...or_transformers/neural_chat/examples/deployment/textbot/backend/gaudi/README.md
@@ -0,0 +1,47 @@
+This README is intended to guide you through setting up the backend for a text chatbot using the NeuralChat framework. You can deploy this text chatbot on various platforms, including Intel XEON Scalable Processors, Habana's Gaudi processors (HPU), Intel Data Center GPU and Client GPU, Nvidia Data Center GPU and Client GPU.
+
+This textbot shows how to deploy chatbot backend on Habana's Gaudi processors (HPU).
+
+# Setup Conda
+
+First, you need to install and configure the Conda environment:
+
+```shell
+# Download and install Miniconda
+wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
+bash Miniconda*.sh
+source ~/.bashrc
+```
+
+# Install Python dependencies
+
+Install dependencies using pip
+
+>**Note**: Please make sure transformers version is 4.34.1
+```bash
+pip install ../../../../../requirements_hpu.txt
+pip install transformers==4.34.1
+```
+
+# Configure the textbot.yaml
+
+You can customize the configuration file 'textbot.yaml' to match your environment setup. Here's a table to help you understand the configurable options:
+
+|  Item              | Value                                      |
+| ------------------- | --------------------------------------- |
+| host                | 127.0.0.1                              |
+| port                | 8000                                   |
+| model_name_or_path  | "meta-llama/Llama-2-7b-chat-hf"        |
+| device              | "hpu"                                  |
+| use_deepspeed       | true                                   |
+| world_size          | 8                                      |
+| tasks_list          | ['textchat']                           |
+
+
+
+# Run the TextChat server
+To start the TextChat server, use the following command:
+
+```shell
+nohup python run_text_chat.py &
+```
diff --git a/...ployment/textbot/backend/run_text_chat.py → ...nt/textbot/backend/gaudi/run_text_chat.py b/...ployment/textbot/backend/run_text_chat.py → ...nt/textbot/backend/gaudi/run_text_chat.py
diff --git a/...nsion_for_transformers/neural_chat/examples/deployment/textbot/backend/gaudi/textbot.yaml b/...nsion_for_transformers/neural_chat/examples/deployment/textbot/backend/gaudi/textbot.yaml
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This is the parameter configuration file for NeuralChat Serving.
+
+#################################################################################
+#                             SERVER SETTING                                    #
+#################################################################################
+host: 0.0.0.0
+port: 8000
+
+model_name_or_path: "Phind/Phind-CodeLlama-34B-v2"
+device: "hpu"
+use_deepspeed: true
+world_size: 8
+
+# task choices = ['textchat', 'voicechat', 'retrieval', 'text2image', 'finetune']
+tasks_list: ['textchat']
diff --git a/...ples/deployment/textbot/backend/README.md → ...deployment/textbot/backend/xeon/README.md b/...ples/deployment/textbot/backend/README.md → ...deployment/textbot/backend/xeon/README.md
@@ -1,5 +1,6 @@
 This README is intended to guide you through setting up the backend for a text chatbot using the NeuralChat framework. You can deploy this text chatbot on various platforms, including Intel XEON Scalable Processors, Habana's Gaudi processors (HPU), Intel Data Center GPU and Client GPU, Nvidia Data Center GPU and Client GPU.
 
+This textbot shows how to deploy chatbot backend on Intel XEON Scalable Processors.
 
 # Setup Conda
 

diff --git a/...xamples/deployment/textbot/backend/run.sh → ...es/deployment/textbot/backend/xeon/run.sh b/...xamples/deployment/textbot/backend/run.sh → ...es/deployment/textbot/backend/xeon/run.sh
diff --git a/...on_for_transformers/neural_chat/examples/deployment/textbot/backend/xeon/run_text_chat.py b/...on_for_transformers/neural_chat/examples/deployment/textbot/backend/xeon/run_text_chat.py
@@ -0,0 +1,26 @@
+# !/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from intel_extension_for_transformers.neural_chat import NeuralChatServerExecutor
+
+def main():
+    server_executor = NeuralChatServerExecutor()
+    server_executor(config_file="./textbot.yaml", log_file="./textbot.log")
+
+if __name__ == "__main__":
+    main()
diff --git a/...s/deployment/textbot/backend/textbot.yaml → ...loyment/textbot/backend/xeon/textbot.yaml b/...s/deployment/textbot/backend/textbot.yaml → ...loyment/textbot/backend/xeon/textbot.yaml