enable dolly-v2-3b cpp model (#105)

Signed-off-by: intellinjun <jun.lin@intel.com> Co-authored-by: Dong, Bo <bo1.dong@intel.com> Co-authored-by: VincyZhang <wenxin.zhang@intel.com>
intel · Aug 9, 2023 · 96d4245 · 96d4245
1 parent 8b2dcb6
commit 96d4245
Show file tree

Hide file tree

Showing 7 changed files with 942 additions and 0 deletions.
diff --git a/intel_extension_for_transformers/backends/neural_engine/graph/README.md b/intel_extension_for_transformers/backends/neural_engine/graph/README.md
@@ -38,6 +38,11 @@ python scripts/convert_gptneox.py  ${input_model_name_or_path} --outtype f32 --o
 
 ./build/bin/quant_gptneox --model_file ${output_path}/ne-f32.bin --out_file ${output_path}/ne-q4_j.bin --bits 4
 
+# convert the pytorch dolly-v2-3b model to llama.cpp format
+python scripts/convert_dolly.py  ${input_model_name_or_path} --outtype f32 --outfile ${output_path}
+
+./build/bin/quant_dolly --model_file ${output_path}/ne-f32.bin --out_file ${output_path}/ne-q4_j.bin --bits 4
+
 # convert the pytorch mpt model to llama.cpp format
 python scripts/convert_mpt.py ${input_model_name_or_path} --outtype f32 --outfile ${output_path}
 
@@ -74,3 +79,7 @@ cp scripts/gptj_binding.py build
 cd build
 python gptj_binding.py
 ```
+
+### Supported model
+Now we supports [GPT-NeoX](https://github.com/EleutherAI/gpt-neox), [LLaMA](https://github.com/facebookresearch/llama),[Dolly-v2-3b](https://huggingface.co/databricks/dolly-v2-3b), [MPT](https://huggingface.co/mosaicml/mpt-7b), [FALCON](https://huggingface.co/tiiuae/falcon-7b), [STARCODER](https://huggingface.co/bigcode/starcoder), [GPT-J](https://huggingface.co/docs/transformers/model_doc/gptj).
+
diff --git a/intel_extension_for_transformers/backends/neural_engine/graph/application/CMakeLists.txt b/intel_extension_for_transformers/backends/neural_engine/graph/application/CMakeLists.txt
@@ -33,3 +33,4 @@ add_subdirectory(ChatMPT)
 add_subdirectory(ChatFALCON)
 add_subdirectory(ChatSTARCODER)
 add_subdirectory(ChatGPTJ)
+add_subdirectory(ChatDOLLY)
diff --git a/...ension_for_transformers/backends/neural_engine/graph/application/ChatDOLLY/CMakeLists.txt b/...ension_for_transformers/backends/neural_engine/graph/application/ChatDOLLY/CMakeLists.txt
@@ -0,0 +1,29 @@
+#  Copyright (c) 2023 Intel Corporation
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+set(TARGET main_dolly)
+add_executable_w_warning(${TARGET} main_dolly.cpp)
+target_link_libraries(${TARGET} PUBLIC ne_layers gptneox common ${CMAKE_THREAD_LIBS_INIT})
+target_compile_features(${TARGET} PRIVATE cxx_std_11)
+if(TARGET BUILD_INFO)
+  add_dependencies(${TARGET} BUILD_INFO)
+endif()
+
+set(TARGET quant_dolly)
+add_executable_w_warning(${TARGET} quant_dolly.cpp)
+target_link_libraries(${TARGET} PUBLIC ne_layers gptneox common ${CMAKE_THREAD_LIBS_INIT})
+target_compile_features(${TARGET} PRIVATE cxx_std_11)
+if(TARGET BUILD_INFO)
+  add_dependencies(${TARGET} BUILD_INFO)
+endif()