-
Notifications
You must be signed in to change notification settings - Fork 1
/
CMakeLists.txt
133 lines (106 loc) · 5.18 KB
/
CMakeLists.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
message(WARNING "
=============> SM VERSION SETTING <===============
By default tiny-tensorrt will compile with compatibility with nvidia's 10x and 20x device(1080ti, 2080 etc, SM_60-SM75), so it might cause compile error under other device like jestion nano(SM_53)
It would be better that you specify with your device's sm version as shown below, usage:
cmake -DSM_VERSION=53 ..
Fermi (CUDA 3.2 until CUDA 8) (deprecated from CUDA 9):
SM20 or SM_20, compute_30 – Older cards such as GeForce 400, 500, 600, GT-630
Kepler (CUDA 5 and later):
SM30 or SM_30, compute_30 – Kepler architecture (generic – Tesla K40/K80, GeForce 700, GT-730)
Adds support for unified memory programming
SM35 or SM_35, compute_35 – More specific Tesla K40
Adds support for dynamic parallelism. Shows no real benefit over SM30 in my experience.
SM37 or SM_37, compute_37 – More specific Tesla K80
Adds a few more registers. Shows no real benefit over SM30 in my experience
Maxwell (CUDA 6 and later):
SM50 or SM_50, compute_50 – Tesla/Quadro M series
SM52 or SM_52, compute_52 – Quadro M6000 , GeForce 900, GTX-970, GTX-980, GTX Titan X
SM53 or SM_53, compute_53 – Tegra (Jetson) TX1 / Tegra X1, Drive CX, Drive PX, Jetson Nano
Pascal (CUDA 8 and later)
SM60 or SM_60, compute_60 – Quadro GP100, Tesla P100, DGX-1 (Generic Pascal)
SM61 or SM_61, compute_61 – GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030, Titan Xp, Tesla P40, Tesla P4, Discrete GPU on the NVIDIA Drive PX2
SM62 or SM_62, compute_62 – Integrated GPU on the NVIDIA Drive PX2, Tegra (Jetson) TX2
Volta (CUDA 9 and later)
SM70 or SM_70, compute_70 – DGX-1 with Volta, Tesla V100, GTX 1180 (GV104), Titan V, Quadro GV100
SM72 or SM_72, compute_72 – Jetson AGX Xavier, Drive AGX Pegasus, Xavier NX
Turing (CUDA 10 and later)
SM75 or SM_75, compute_75 – GTX/RTX Turing – GTX 1660 Ti, RTX 2060, RTX 2070, RTX 2080, Titan RTX, Quadro RTX 4000, Quadro RTX 5000, Quadro RTX 6000, Quadro RTX 8000, Quadro T1000/T2000, Tesla T4
Ampere (CUDA 11 and later)
SM80 or SM_80, compute_80 – Tesla A100 (GA100), NVIDIA DGX-A100, RTX Ampere – RTX 3080
SM86 or SM_86, compute_86 – (from CUDA 11.1 onwards) Tesla GA10x cards, RTX Ampere – RTX 3080, GA102 – RTX 3090, RTX A6000, RTX A40
Hopper – NVIDIA H100 (GH100)
======================================================
")
message(STATUS "
=============> USAGE <===============
cmake -DSM_VERSION=xx -DBUILD_PYTHON=ON/OFF -DBUILD_TEST=ON/OFF ..
=====================================
")
cmake_minimum_required(VERSION 3.0)
project(infer_demo VERSION 7.2.2)
set(CMAKE_CXX_FLAGS "-std=c++11")
option(BUILD_TEST "compile test" ON)
# CUDA
find_package(CUDA REQUIRED)
include(cmake/CUDA_utils.cmake)
set(SM_VERSION "" CACHE STRING "Description")
if(SM_VERSION)
set(CUDA_targeted_archs ${SM_VERSION})
CUDA_get_gencode_args(CUDA_gencode_flags ${CUDA_targeted_archs})
else()
# Discover what architectures does nvcc support
CUDA_find_supported_arch_values(CUDA_supported_archs ${CUDA_known_archs})
message(STATUS "CUDA supported archs: ${CUDA_supported_archs}")
set(CUDA_TARGET_ARCHS_SORTED ${CUDA_TARGET_ARCHS})
list(SORT CUDA_TARGET_ARCHS_SORTED)
CUDA_find_supported_arch_values(CUDA_targeted_archs ${CUDA_TARGET_ARCHS_SORTED})
message(STATUS "CUDA targeted archs: ${CUDA_targeted_archs}")
if (NOT CUDA_targeted_archs)
message(FATAL_ERROR "None of the provided CUDA architectures ({${CUDA_TARGET_ARCHS}}) is supported by nvcc, use one or more of: ${CUDA_supported_archs}")
endif()
CUDA_get_gencode_args(CUDA_gencode_flags ${CUDA_targeted_archs})
endif()
# Add ptx & bin flags for cuda
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} ${CUDA_gencode_flags}")
include_directories(include)
include_directories(./plugin)
include_directories(include/spdlog)
# Zlib
find_package(ZLIB REQUIRED)
include_directories(${ZLIB_INCLUDE_DIRS})
# TensorRT
set(TENSORRT_ROOT /usr/local/TensorRT-7.2.1.6)
find_path(TENSORRT_INCLUDE_DIR NvInfer.h
HINTS ${TENSORRT_ROOT} ${CUDA_TOOLKIT_ROOT_DIR}
PATH_SUFFIXES include)
MESSAGE(STATUS "Found TensorRT headers at ${TENSORRT_INCLUDE_DIR}")
# OpenCV
find_package(OpenCV PATHS /home/nvidia/data/zhangbo/library/opencv-4.5.1 NO_DEFAULT_PATH)
include_directories(OpenCV_INCLUDE_DIRS)
# add tinytrt library
link_directories(./lib)
## build plugin
file(GLOB_RECURSE trt_source
plugin/*.cu
plugin/*.cpp
)
cuda_add_library(plugin SHARED ${trt_source})
target_compile_options(plugin PUBLIC -std=c++11 -Wall -Wfloat-conversion)
set_target_properties(plugin PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(plugin PROPERTIES PUBLIC_HEADER Trt.h)
## custom test
if(BUILD_TEST)
file(GLOB test_source
./test.cpp
)
add_executable(infer_demo ${test_source})
target_compile_options(infer_demo PUBLIC -std=c++11 -Wall -Wfloat-conversion)
target_link_libraries(infer_demo tinytrt)
target_link_libraries(infer_demo plugin)
target_link_libraries(infer_demo nvinfer)
target_link_libraries(infer_demo nvinfer_plugin)
target_link_libraries(infer_demo nvparsers)
target_link_libraries(infer_demo nvonnxparser)
target_link_libraries(infer_demo ${ZLIB_LIBRARIES})
target_link_libraries(infer_demo ${OpenCV_LIBS})
endif()