zhiqwang · liuzhuang1024 · Jan 30, 2022 · Jan 31, 2022 · Jan 31, 2022
diff --git a/tools/tensorrt_int8_calibrating.py b/tools/tensorrt_int8_calibrating.py
@@ -0,0 +1,332 @@
+#
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+
+sys.path.append("./")
+
+import argparse
+import logging
+import traceback
+
+import numpy as np
+import pycuda.autoinit  # noqa
+import pycuda.driver as cuda
+import tensorrt as trt
+from yolort.v5.utils.datasets import LoadImages
+
+logging.basicConfig(level=logging.INFO)
+logging.getLogger("EngineBuilder").setLevel(logging.INFO)
+log = logging.getLogger("EngineBuilder")
+
+
+# Define some parameters
+img_size = [320, 320]
+stride = 32
+score_thresh = 0.35
+iou_thresh = 0.45
+detections_per_img = 100
+half = False
+img_source = "val2017/"
+
+
+class ImageBatcher:
+    def __init__(self, calib_shape=None, calib_dtype=None) -> None:
+        self.dataset = LoadImages(img_source, img_size=img_size, stride=stride, auto=False)
+        self.dtype = calib_dtype
+        self.batch_size = 1
+        self.shape = (self.batch_size, 3, *calib_shape)
+        self.num_images = len(self.dataset)
+        self.image_index = 0
+
+    def get_batch(
+        self,
+    ):
+        return iter(self.dataset)
+
+
+class EngineCalibrator(trt.IInt8EntropyCalibrator2):
+    """
+    Implements the INT8 Entropy Calibrator 2.
+    """
+
+    def __init__(self, cache_file):
+        """
+
+        Args:
+            cache_file: The location of the cache file.
+        """
+        super().__init__()
+        self.cache_file = cache_file
+        self.image_batcher: ImageBatcher = None
+        self.batch_allocation = None
+        self.batch_generator = None
+
+    def set_image_batcher(self, image_batcher: ImageBatcher):
+        """
+        Define the image batcher to use, if any. If using only the cache file, an
+        image batcher doesn't need to be defined.
+
+        Args:
+            image_batcher: The ImageBatcher object
+        """
+        self.image_batcher = image_batcher
+        size = int(np.dtype(self.image_batcher.dtype).itemsize * np.prod(self.image_batcher.shape))
+        self.batch_allocation = cuda.mem_alloc(size)
+        self.batch_generator = self.image_batcher.get_batch()
+
+    def get_batch_size(self):
+        """
+        Overrides from trt.IInt8EntropyCalibrator2.
+        Get the batch size to use for calibration.
+
+        Returns:
+            Batch size.
+        """
+        if self.image_batcher:
+            return self.image_batcher.batch_size
+        return 1
+
+    def get_batch(self, names):
+        """
+        Overrides from trt.IInt8EntropyCalibrator2.
+        Get the next batch to use for calibration, as a list of device memory pointers.
+
+        Args:
+            names: The names of the inputs, if useful to define the order of inputs.
+
+        Returns:
+            A list of int-casted memory pointers.
+        """
+        if not self.image_batcher:
+            return None
+
+        log.info("Calibrating image ...")
+        try:
+            path, image, img_raw, _, s = next(self.batch_generator)
+            image = image[np.newaxis, :, :, :]
+            batch, _, _, _ = image.shape
+            self.image_batcher.image_index += 1
+
+            log.info(
+                "Calibrating image {} / {}".format(
+                    self.image_batcher.image_index, self.image_batcher.num_images
+                )
+            )
+            cuda.memcpy_htod(self.batch_allocation, np.ascontiguousarray(batch))
+            return [int(self.batch_allocation)]
+        except StopIteration:
+            log.info("Finished calibration batches")
+            return None
+        except Exception:
+            traceback.print_exc()
+
+    def read_calibration_cache(self):
+        """
+        Overrides from trt.IInt8EntropyCalibrator2.
+        Read the calibration cache file stored on disk, if it exists.
+
+        Returns:
+            The contents of the cache file, if any.
+        """
+        if os.path.exists(self.cache_file):
+            with open(self.cache_file, "rb") as f:
+                log.info("Using calibration cache file: {}".format(self.cache_file))
+                return f.read()
+
+    def write_calibration_cache(self, cache):
+        """
+        Overrides from trt.IInt8EntropyCalibrator2.
+        Store the calibration cache to a file on disk.
+
+        Args:
+            cache: The contents of the calibration cache to store.
+        """
+        with open(self.cache_file, "wb") as f:
+            log.info("Writing calibration cache data to: {}".format(self.cache_file))
+            f.write(cache)
+
+
+class EngineBuilder:
+    """
+    Parses an ONNX graph and builds a TensorRT engine from it.
+    """
+
+    def __init__(self, verbose=False):
+        """
+
+        Args:
+            verbose: If enabled, a higher verbosity level will be set on the TensorRT logger.
+        """
+        self.trt_logger = trt.Logger(trt.Logger.INFO)
+        if verbose:
+            self.trt_logger.min_severity = trt.Logger.Severity.VERBOSE
+
+        trt.init_libnvinfer_plugins(self.trt_logger, namespace="")
+
+        self.builder = trt.Builder(self.trt_logger)
+        self.config = self.builder.create_builder_config()
+        self.config.max_workspace_size = 8 * (2 ** 30)  # 8 GB
+
+        self.batch_size = None
+        self.network = None
+        self.parser = None
+
+    def create_network(self, onnx_path):
+        """
+        Parse the ONNX graph and create the corresponding TensorRT network definition.
+
+        Args:
+            onnx_path: The path to the ONNX graph to load.
+        """
+        network_flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+
+        self.network = self.builder.create_network(network_flags)
+        self.parser = trt.OnnxParser(self.network, self.trt_logger)
+
+        onnx_path = os.path.realpath(onnx_path)
+        with open(onnx_path, "rb") as f:
+            if not self.parser.parse(f.read()):
+                log.error("Failed to load ONNX file: {}".format(onnx_path))
+                for error in range(self.parser.num_errors):
+                    log.error(self.parser.get_error(error))
+                sys.exit(1)
+
+        inputs = [self.network.get_input(i) for i in range(self.network.num_inputs)]
+        outputs = [self.network.get_output(i) for i in range(self.network.num_outputs)]
+
+        log.info("Network Description")
+        for input in inputs:
+            self.batch_size = input.shape[0]
+            log.info("Input '{}' with shape {} and dtype {}".format(input.name, input.shape, input.dtype))
+        for output in outputs:
+            log.info("Output '{}' with shape {} and dtype {}".format(output.name, output.shape, output.dtype))
+        assert self.batch_size > 0
+        self.builder.max_batch_size = self.batch_size
+
+    def create_engine(
+        self,
+        engine_path,
+        precision,
+        calib_input=None,
+        calib_cache=None,
+        calib_num_images=25000,
+        calib_batch_size=8,
+        calib_preprocessor=None,
+    ):
+        """
+        Build the TensorRT engine and serialize it to disk.
+
+        Args:
+            engine_path: The path where to serialize the engine to.
+            precision: The datatype to use for the engine, either 'fp32', 'fp16' or 'int8'.
+            calib_input: The path to a directory holding the calibration images.
+            calib_cache: The path where to write the calibration cache to, or if it already
+                exists, load it from.
+            calib_num_images: The maximum number of images to use for calibration.
+            calib_batch_size: The batch size to use for the calibration process.
+            calib_preprocessor: The ImageBatcher preprocessor algorithm to use.
+        """
+        engine_path = os.path.realpath(engine_path)
+        engine_dir = os.path.dirname(engine_path)
+        os.makedirs(engine_dir, exist_ok=True)
+        log.info("Building {} Engine in {}".format(precision, engine_path))
+
+        inputs = [self.network.get_input(i) for i in range(self.network.num_inputs)]
+
+        if precision == "fp16":
+            if not self.builder.platform_has_fast_fp16:
+                log.warning("FP16 is not supported natively on this platform/device")
+            else:
+                self.config.set_flag(trt.BuilderFlag.FP16)
+        elif precision == "int8":
+            if not self.builder.platform_has_fast_int8:
+                log.warning("INT8 is not supported natively on this platform/device")
+            else:
+                self.config.set_flag(trt.BuilderFlag.INT8)
+                self.config.int8_calibrator = EngineCalibrator(calib_cache)
+                if not os.path.exists(calib_cache):
+                    calib_shape = [calib_batch_size] + list(inputs[0].shape[1:])
+                    calib_dtype = trt.nptype(inputs[0].dtype)
+                    self.config.int8_calibrator.set_image_batcher(ImageBatcher(calib_shape, calib_dtype))
+
+        with self.builder.build_engine(self.network, self.config) as engine:
+            with open(engine_path, "wb") as f:
+                log.info("Serializing engine to file: {:}".format(engine_path))
+                f.write(engine.serialize())
+
+
+def main(args):
+    builder = EngineBuilder(args.verbose)
+    builder.create_network(args.onnx)
+    builder.create_engine(
+        args.engine,
+        args.precision,
+        args.calib_input,
+        args.calib_cache,
+        args.calib_num_images,
+        args.calib_batch_size,
+        args.calib_preprocessor,
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-o", "--onnx", help="The input ONNX model file to load")
+    parser.add_argument("-e", "--engine", help="The output path for the TRT engine")
+    parser.add_argument(
+        "-p",
+        "--precision",
+        default="fp16",
+        choices=["fp32", "fp16", "int8"],
+        help="The precision mode to build in, either 'fp32', 'fp16' or 'int8', default: 'fp16'",
+    )
+    parser.add_argument("-v", "--verbose", action="store_true", help="Enable more verbose log output")
+    parser.add_argument("--calib_input", help="The directory holding images to use for calibration")
+    parser.add_argument(
+        "--calib_cache",
+        default="./calibration.cache",
+        help="The file path for INT8 calibration cache to use, default: ./calibration.cache",
+    )
+    parser.add_argument(
+        "--calib_num_images",
+        default=10,
+        type=int,
+        help="The maximum number of images to use for calibration, default: 25000",
+    )
+    parser.add_argument(
+        "--calib_batch_size",
+        default=1,
+        type=int,
+        help="The batch size for the calibration process, default: 1",
+    )
+    parser.add_argument(
+        "--calib_preprocessor",
+        default="V2",
+        choices=["V1", "V1MS", "V2"],
+        help="Set the calibration image preprocessor to use, either 'V2', 'V1' or 'V1MS', default: V2",
+    )
+    args = parser.parse_args()
+    if not all([args.onnx, args.engine]):
+        parser.print_help()
+        log.error("These arguments are required: --onnx and --engine")
+        sys.exit(1)
+    if args.precision == "int8" and not any([args.calib_input, args.calib_cache]):
+        parser.print_help()
+        log.error("When building in int8 precision, either --calib_input or --calib_cache are required")
+        sys.exit(1)
+    main(args)