[Backend] support ipu in paddle inference backend. (#437)

* feat(ipu): add ipu support for paddle_infer backend. * fix(): remove unused env. * fix(ipu): simplify user API for IPU. * fix(cmake): fix merge conflict error in CMakeList. Co-authored-by: Jason <jiangjiajun@baidu.com>
PaddlePaddle · Oct 30, 2022 · ede59af · ede59af
1 parent ee2c613
commit ede59af
Show file tree

Hide file tree

Showing 23 changed files with 457 additions and 115 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -52,6 +52,7 @@ endif()
 ############################# Basic Options for FastDeploy ################################
 option(ENABLE_PADDLE_FRONTEND "Whether to enable PaddlePaddle frontend to support load paddle model in fastdeploy." ON)
 option(WITH_GPU "Whether WITH_GPU=ON, will enable onnxruntime-gpu/paddle-infernce-gpu/poros-gpu" OFF)
+option(WITH_IPU "Whether WITH_IPU=ON, will enable paddle-infernce-ipu" OFF)
 option(ENABLE_ORT_BACKEND "Whether to enable onnxruntime backend." OFF)
 option(ENABLE_TRT_BACKEND "Whether to enable tensorrt backend." OFF)
 option(ENABLE_PADDLE_BACKEND "Whether to enable paddle backend." OFF)
@@ -323,6 +324,10 @@ else()
   set(BUILD_CUDA_SRC OFF)
 endif()
 
+if(WITH_IPU)
+  add_definitions(-DWITH_IPU)
+endif()
+
 if(ENABLE_TRT_BACKEND)
   if(APPLE OR ANDROID OR IOS)
     message(FATAL_ERROR "Cannot enable tensorrt backend in mac/ios/android os, please set -DENABLE_TRT_BACKEND=OFF.")

diff --git a/README_CN.md b/README_CN.md
diff --git a/README_EN.md b/README_EN.md
diff --git a/cmake/paddle_inference.cmake b/cmake/paddle_inference.cmake
@@ -54,7 +54,7 @@ if(WIN32)
     set(PADDLEINFERENCE_FILE "paddle_inference-win-x64-gpu-${PADDLEINFERENCE_VERSION}.zip")
   else()
     set(PADDLEINFERENCE_FILE "paddle_inference-win-x64-${PADDLEINFERENCE_VERSION}.zip")
-  endif()  
+  endif()
 elseif(APPLE)
   if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "arm64")
     message(FATAL_ERROR "Paddle Backend doesn't support Mac OSX with Arm64 now.")
@@ -71,6 +71,9 @@ else()
     if(WITH_GPU)
         set(PADDLEINFERENCE_FILE "paddle_inference-linux-x64-gpu-trt-${PADDLEINFERENCE_VERSION}.tgz")
     endif()
+    if (WITH_IPU)
+        set(PADDLEINFERENCE_FILE "paddle_inference-linux-x64-ipu-${PADDLEINFERENCE_VERSION}.tgz")
+    endif()
   endif()
 endif()
 set(PADDLEINFERENCE_URL "${PADDLEINFERENCE_URL_BASE}${PADDLEINFERENCE_FILE}")
@@ -85,7 +88,7 @@ ExternalProject_Add(
   BUILD_COMMAND ""
   UPDATE_COMMAND ""
   INSTALL_COMMAND
-    ${CMAKE_COMMAND} -E copy_directory ${PADDLEINFERENCE_SOURCE_DIR} ${PADDLEINFERENCE_INSTALL_DIR}
+	${CMAKE_COMMAND} -E copy_directory ${PADDLEINFERENCE_SOURCE_DIR} ${PADDLEINFERENCE_INSTALL_DIR}
   BUILD_BYPRODUCTS ${PADDLEINFERENCE_COMPILE_LIB})
 
 if(UNIX AND (NOT APPLE) AND (NOT ANDROID))

diff --git a/docs/README_CN.md b/docs/README_CN.md
@@ -7,6 +7,7 @@
 - [预编译库下载安装](cn/build_and_install/download_prebuilt_libraries.md)
 - [GPU部署环境编译安装](cn/build_and_install/gpu.md)
 - [CPU部署环境编译安装](cn/build_and_install/cpu.md)
+- [IPU部署环境编译安装](cn/build_and_install/ipu.md)
 - [Jetson部署环境编译安装](cn/build_and_install/jetson.md)
 - [Android平台部署环境编译安装](cn/build_and_install/android.md)
 

diff --git a/docs/README_EN.md b/docs/README_EN.md
@@ -1,4 +1,4 @@
-[简体中文](README_CN.md)| English 
+[简体中文](README_CN.md)| English
 
 # Tutorials
 
@@ -7,6 +7,7 @@
 - [How to Install FastDeploy Prebuilt Libraries](en/build_and_install/download_prebuilt_libraries.md)
 - [How to Build and Install FastDeploy Library on GPU Platform](en/build_and_install/gpu.md)
 - [How to Build and Install FastDeploy Library on CPU Platform](en/build_and_install/cpu.md)
+- [How to Build and Install FastDeploy Library on IPU Platform](en/build_and_install/ipu.md)
 - [How to Build and Install FastDeploy Library on  Nvidia Jetson Platform](en/build_and_install/jetson.md)
 - [How to Build and Install FastDeploy Library on Android Platform](en/build_and_install/android.md)
 

diff --git a/docs/cn/build_and_install/ipu.md b/docs/cn/build_and_install/ipu.md
@@ -0,0 +1,51 @@
+
+# IPU部署库编译
+
+FastDeploy当前在IPU环境仅支持Linux下的Paddle Inference后端推理。
+
+## C++ SDK编译安装
+
+Linux编译需满足
+- gcc/g++ >= 5.4(推荐8.2)
+- cmake >= 3.16.0, < 3.23.0
+- popart >= 3.0.0
+
+```bash
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy
+mkdir build && cd build
+cmake .. -DENABLE_PADDLE_BACKEND=ON \
+         -DWITH_IPU=ON \
+         -DCMAKE_INSTALL_PREFIX=${PWD}/compiled_fastdeploy_sdk \
+         -DENABLE_VISION=ON
+make -j8
+make install
+```
+
+编译完成后，即在`CMAKE_INSTALL_PREFIX`指定的目录下生成C++推理库
+
+
+## Python编译安装
+
+Linux编译过程同样需要满足
+- gcc/g++ >= 5.4(推荐8.2)
+- cmake >= 3.16.0, < 3.23.0
+- popart >= 3.0.0
+- python >= 3.6
+
+所有编译选项通过环境变量导入
+
+```bash
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/python
+export ENABLE_VISION=ON
+export ENABLE_PADDLE_BACKEND=ON
+export WITH_IPU=ON
+
+python setup.py build
+python setup.py bdist_wheel
+```
+
+编译完成即会在`FastDeploy/python/dist`目录下生成编译后的`wheel`包，直接pip install即可
+
+编译过程中，如若修改编译参数，为避免带来缓存影响，可删除`FastDeploy/python`目录下的`build`和`.setuptools-cmake-build`两个子目录后再重新编译
diff --git a/docs/en/build_and_install/ipu.md b/docs/en/build_and_install/ipu.md
@@ -0,0 +1,52 @@
+
+# How to Build IPU Deployment Environment
+
+FastDeploy only supports Paddle Inference in the IPU environment.
+
+## How to Build and Install C++ SDK
+
+Prerequisite for Compiling on Linux:
+
+- gcc/g++ >= 5.4 (8.2 is recommended)
+- cmake >= 3.16.0, < 3.23.0
+- popart >= 3.0.0
+
+```bash
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy
+mkdir build && cd build
+cmake .. -DENABLE_PADDLE_BACKEND=ON \
+         -DWITH_IPU=ON \
+         -DCMAKE_INSTALL_PREFIX=${PWD}/compiled_fastdeploy_sdk \
+         -DENABLE_VISION=ON
+make -j8
+make install
+```
+
+Once compiled, the C++ inference library is generated in the directory specified by `CMAKE_INSTALL_PREFIX`
+
+## How to Build and Install Python SDK
+
+Prerequisite for Compiling on Linux:
+
+- gcc/g++ >= 5.4 (8.2 is recommended)
+- cmake >= 3.16.0, < 3.23.0
+- popart >= 3.0.0
+- python >= 3.6
+
+All compilation options are imported via environment variables
+
+```bash
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/python
+export ENABLE_VISION=ON
+export ENABLE_PADDLE_BACKEND=ON
+export WITH_IPU=ON
+
+python setup.py build
+python setup.py bdist_wheel
+```
+
+The compiled `wheel` package will be generated in the `FastDeploy/python/dist` directory once finished. Users can pip-install it directly.
+
+During the compilation, if developers want to change the compilation parameters, it is advisable to delete the `build` and `.setuptools-cmake-build` subdirectories in the `FastDeploy/python` to avoid the possible impact from cache, and then recompile.
diff --git a/examples/runtime/python/infer_paddle_paddle_inference.py b/examples/runtime/python/infer_paddle_paddle_inference.py
@@ -33,6 +33,10 @@
 # 如需使用GPU，使用如下注释代码
 # option.use_gpu(0)
 
+# **** IPU 配置 ***
+# 如需使用IPU，使用如下注释代码
+# option.use_ipu()
+
 # 初始化构造runtime
 runtime = fd.Runtime(option)
 

diff --git a/examples/vision/classification/paddleclas/cpp/infer.cc b/examples/vision/classification/paddleclas/cpp/infer.cc
@@ -70,6 +70,32 @@ void GpuInfer(const std::string& model_dir, const std::string& image_file) {
   std::cout << res.Str() << std::endl;
 }
 
+void IpuInfer(const std::string& model_dir, const std::string& image_file) {
+  auto model_file = model_dir + sep + "inference.pdmodel";
+  auto params_file = model_dir + sep + "inference.pdiparams";
+  auto config_file = model_dir + sep + "inference_cls.yaml";
+
+  auto option = fastdeploy::RuntimeOption();
+  option.UseIpu();
+  auto model = fastdeploy::vision::classification::PaddleClasModel(
+      model_file, params_file, config_file, option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::ClassifyResult res;
+  if (!model.Predict(&im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+
+  // print res
+  std::cout << res.Str() << std::endl;
+}
+
 void TrtInfer(const std::string& model_dir, const std::string& image_file) {
   auto model_file = model_dir + sep + "inference.pdmodel";
   auto params_file = model_dir + sep + "inference.pdiparams";
@@ -113,6 +139,8 @@ int main(int argc, char* argv[]) {
     GpuInfer(argv[1], argv[2]);
   } else if (std::atoi(argv[3]) == 2) {
     TrtInfer(argv[1], argv[2]);
+  } else if (std::atoi(argv[3]) == 3) {
+    IpuInfer(argv[1], argv[2]);
   }
   return 0;
 }
diff --git a/examples/vision/classification/paddleclas/python/README.md b/examples/vision/classification/paddleclas/python/README.md
@@ -23,6 +23,8 @@ python infer.py --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg -
 python infer.py --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg --device gpu --topk 1
 # GPU上使用TensorRT推理 （注意：TensorRT推理第一次运行，有序列化模型的操作，有一定耗时，需要耐心等待）
 python infer.py --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg --device gpu --use_trt True --topk 1
+# IPU推理（注意：IPU推理首次运行会有序列化模型的操作，有一定耗时，需要耐心等待）
+python infer.py --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg --device ipu --topk 1
 ```
 
 运行完成后返回结果如下所示

diff --git a/examples/vision/classification/paddleclas/python/infer.py b/examples/vision/classification/paddleclas/python/infer.py
@@ -17,7 +17,7 @@ def parse_arguments():
         "--device",
         type=str,
         default='cpu',
-        help="Type of inference device, support 'cpu' or 'gpu'.")
+        help="Type of inference device, support 'cpu' or 'gpu' or 'ipu'.")
     parser.add_argument(
         "--use_trt",
         type=ast.literal_eval,
@@ -32,6 +32,9 @@ def build_option(args):
     if args.device.lower() == "gpu":
         option.use_gpu()
 
+    if args.device.lower() == "ipu":
+        option.use_ipu()
+
     if args.use_trt:
         option.use_trt_backend()
     return option

diff --git a/fastdeploy/backends/paddle/paddle_backend.cc b/fastdeploy/backends/paddle/paddle_backend.cc
@@ -42,6 +42,21 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
       FDWARNING << "The FastDeploy is not compiled with TensorRT backend, so will fallback to GPU with Paddle Inference Backend." << std::endl;
 #endif
     }
+  } else if (option.use_ipu) {
+#ifdef WITH_IPU
+    config_.EnableIpu(option.ipu_option.ipu_device_num,
+                      option.ipu_option.ipu_micro_batch_size,
+                      option.ipu_option.ipu_enable_pipelining,
+                      option.ipu_option.ipu_batches_per_step);
+    config_.SetIpuConfig(option.ipu_option.ipu_enable_fp16,
+                         option.ipu_option.ipu_replica_num,
+                         option.ipu_option.ipu_available_memory_proportion,
+                         option.ipu_option.ipu_enable_half_partial);
+#else
+    FDWARNING << "The FastDeploy is not compiled with IPU backend, so will "
+                 "fallback to CPU with Paddle Inference Backend."
+              << std::endl;
+#endif
   } else {
     config_.DisableGpu();
     if (option.enable_mkldnn) {

diff --git a/fastdeploy/backends/paddle/paddle_backend.h b/fastdeploy/backends/paddle/paddle_backend.h
@@ -31,6 +31,17 @@
 
 namespace fastdeploy {
 
+struct IpuOption {
+  int ipu_device_num;
+  int ipu_micro_batch_size;
+  bool ipu_enable_pipelining;
+  int ipu_batches_per_step;
+  bool ipu_enable_fp16;
+  int ipu_replica_num;
+  float ipu_available_memory_proportion;
+  bool ipu_enable_half_partial;
+};
+
 struct PaddleBackendOption {
 #ifdef WITH_GPU
   bool use_gpu = true;
@@ -47,6 +58,13 @@ struct PaddleBackendOption {
   bool collect_shape = false;
 #endif
 
+#ifdef WITH_IPU
+  bool use_ipu = true;
+  IpuOption ipu_option;
+#else
+  bool use_ipu = false;
+#endif
+
   int mkldnn_cache_size = 1;
   int cpu_thread_num = 8;
   // initialize memory size(MB) for GPU

diff --git a/fastdeploy/core/fd_type.cc b/fastdeploy/core/fd_type.cc
@@ -53,6 +53,9 @@ std::string Str(const Device& d) {
     case Device::GPU:
       out = "Device::GPU";
       break;
+    case Device::IPU:
+      out = "Device::IPU";
+      break;
     default:
       out = "Device::UNKOWN";
   }

diff --git a/fastdeploy/core/fd_type.h b/fastdeploy/core/fd_type.h
@@ -22,7 +22,7 @@
 
 namespace fastdeploy {
 
-enum FASTDEPLOY_DECL Device { CPU, GPU };
+enum FASTDEPLOY_DECL Device { CPU, GPU, IPU };
 
 FASTDEPLOY_DECL std::string Str(const Device& d);
 
@@ -51,9 +51,10 @@ enum FASTDEPLOY_DECL FDDataType {
   INT8
 };
 
-FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,const Device& d);
+FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, const Device& d);
 
-FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,const FDDataType& fdt);
+FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,
+                                         const FDDataType& fdt);
 
 FASTDEPLOY_DECL std::string Str(const FDDataType& fdt);