Optimize parameters

wang-xinyu · May 15, 2024 · 7c1c303 · 7c1c303
1 parent 7f3c224
commit 7c1c303
Show file tree

Hide file tree

Showing 3 changed files with 23 additions and 15 deletions.
diff --git a/yolov8/yolov8_det_trt.py b/yolov8/yolov8_det_trt.py
@@ -16,6 +16,9 @@
 
 CONF_THRESH = 0.5
 IOU_THRESHOLD = 0.4
+POSE_NUM = 17 * 3
+DET_NUM = 6
+SEG_NUM = 32
 
 
 def get_img_path_batches(batch_size, img_dir):
@@ -121,6 +124,7 @@ def __init__(self, engine_file_path):
         self.cuda_outputs = cuda_outputs
         self.bindings = bindings
         self.batch_size = engine.max_batch_size
+        self.det_output_length = host_outputs[0].shape[0]
 
     def infer(self, raw_image_generator):
         threading.Thread.__init__(self)
@@ -129,7 +133,6 @@ def infer(self, raw_image_generator):
         # Restore
         stream = self.stream
         context = self.context
-        engine = self.engine  # noqa: F841
         host_inputs = self.host_inputs
         cuda_inputs = self.cuda_inputs
         host_outputs = self.host_outputs
@@ -167,7 +170,8 @@ def infer(self, raw_image_generator):
         # Do postprocess
         for i in range(self.batch_size):
             result_boxes, result_scores, result_classid = self.post_process(
-                output[i * 89001: (i + 1) * 89001], batch_origin_h[i], batch_origin_w[i]
+                output[i * self.det_output_length: (i + 1) * self.det_output_length], batch_origin_h[i],
+                batch_origin_w[i]
             )
             # Draw rectangles and labels on the original image
             for j in range(len(result_boxes)):
@@ -287,12 +291,12 @@ def post_process(self, output, origin_h, origin_w):
             result_scores: finally scores, a numpy, each element is the score correspoing to box
             result_classid: finally classid, a numpy, each element is the classid correspoing to box
         """
+        num_values_per_detection = DET_NUM + SEG_NUM + POSE_NUM
         # Get the num of boxes detected
         num = int(output[0])
         # Reshape to a two dimentional ndarray
         # pred = np.reshape(output[1:], (-1, 38))[:num, :]
-
-        pred = np.reshape(output[1:], (-1, 89))[:num, :]
+        pred = np.reshape(output[1:], (-1, num_values_per_detection))[:num, :]
         # Do nms
         boxes = self.non_max_suppression(pred, origin_h, origin_w, conf_thres=CONF_THRESH, nms_thres=IOU_THRESHOLD)
         result_boxes = boxes[:, :4] if len(boxes) else np.array([])
@@ -327,7 +331,8 @@ def bbox_iou(self, box1, box2, x1y1x2y2=True):
         inter_rect_x2 = np.minimum(b1_x2, b2_x2)
         inter_rect_y2 = np.minimum(b1_y2, b2_y2)
         # Intersection area
-        inter_area = np.clip(inter_rect_x2 - inter_rect_x1 + 1, 0, None) * np.clip(inter_rect_y2 - inter_rect_y1 + 1, 0, None)  # noqa: E501
+        inter_area = np.clip(inter_rect_x2 - inter_rect_x1 + 1, 0, None) * np.clip(inter_rect_y2 - inter_rect_y1 + 1, 0,
+                                                                                   None)  # noqa: E501
         # Union Area
         b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
         b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)

diff --git a/yolov8/yolov8_pose_trt.py b/yolov8/yolov8_pose_trt.py
@@ -14,10 +14,11 @@
 import pycuda.driver as cuda
 import tensorrt as trt
 
-
 CONF_THRESH = 0.5
 IOU_THRESHOLD = 0.4
-
+POSE_NUM = 17 * 3
+DET_NUM = 6
+SEG_NUM = 32
 keypoint_pairs = [
     (0, 1), (0, 2), (0, 5), (0, 6), (1, 2),
     (1, 3), (2, 4), (5, 6), (5, 7), (5, 11),
@@ -128,7 +129,7 @@ def __init__(self, engine_file_path):
         self.cuda_outputs = cuda_outputs
         self.bindings = bindings
         self.batch_size = engine.max_batch_size
-        self.det_output_size = 89001
+        self.det_output_size = host_outputs[0].shape[0]
 
     def infer(self, raw_image_generator):
         threading.Thread.__init__(self)
@@ -329,7 +330,7 @@ def post_process(self, output, origin_h, origin_w):
             each element represents keypoints for a box, shaped as (#keypoints, 3)
         """
         # Number of values per detection: 38 base values + 17 keypoints * 3 values each
-        num_values_per_detection = 38 + 17 * 3
+        num_values_per_detection = DET_NUM + SEG_NUM + POSE_NUM
         # Get the number of boxes detected
         num = int(output[0])
         # Reshape to a two-dimensional ndarray with the full detection shape
@@ -344,7 +345,7 @@ def post_process(self, output, origin_h, origin_w):
         result_boxes = boxes[:, :4] if len(boxes) else np.array([])
         result_scores = boxes[:, 4] if len(boxes) else np.array([])
         result_classid = boxes[:, 5] if len(boxes) else np.array([])
-        result_keypoints = boxes[:, -51:] if len(boxes) else np.array([])
+        result_keypoints = boxes[:, -POSE_NUM:] if len(boxes) else np.array([])
 
         # Return the post-processed results including keypoints
         return result_boxes, result_scores, result_classid, result_keypoints
@@ -404,11 +405,11 @@ def non_max_suppression(self, prediction, origin_h, origin_w, conf_thres=0.5, nm
         # Trandform bbox from [center_x, center_y, w, h] to [x1, y1, x2, y2]
         res_array = np.copy(boxes)
         box_pred_deep_copy = np.copy(boxes[:, :4])
-        keypoints_pred_deep_copy = np.copy(boxes[:, -51:])
+        keypoints_pred_deep_copy = np.copy(boxes[:, -POSE_NUM:])
         res_box, res_keypoints = self.xywh2xyxy_with_keypoints(
             origin_h, origin_w, box_pred_deep_copy, keypoints_pred_deep_copy)
         res_array[:, :4] = res_box
-        res_array[:, -51:] = res_keypoints
+        res_array[:, -POSE_NUM:] = res_keypoints
         # clip the coordinates
         res_array[:, 0] = np.clip(res_array[:, 0], 0, origin_w - 1)
         res_array[:, 2] = np.clip(res_array[:, 2], 0, origin_w - 1)

diff --git a/yolov8/yolov8_seg_trt.py b/yolov8/yolov8_seg_trt.py
@@ -16,6 +16,9 @@
 
 CONF_THRESH = 0.5
 IOU_THRESHOLD = 0.4
+POSE_NUM = 17 * 3
+DET_NUM = 6
+SEG_NUM = 32
 
 
 def get_img_path_batches(batch_size, img_dir):
@@ -128,7 +131,7 @@ def __init__(self, engine_file_path):
         self.seg_w = int(self.input_w / 4)
         self.seg_h = int(self.input_h / 4)
         self.seg_c = int(self.seg_output_length / (self.seg_w * self.seg_w))
-        self.det_row_output_length = self.seg_c + 6 + 51
+        self.det_row_output_length = self.seg_c + DET_NUM + POSE_NUM
 
         # Draw mask
         self.colors_obj = Colors()
@@ -140,7 +143,6 @@ def infer(self, raw_image_generator):
         # Restore
         stream = self.stream
         context = self.context
-        engine = self.engine  # noqa: F841
         host_inputs = self.host_inputs
         cuda_inputs = self.cuda_inputs
         host_outputs = self.host_outputs
@@ -321,7 +323,7 @@ def post_process(self, output, origin_h, origin_w):
         result_boxes = boxes[:, :4] if len(boxes) else np.array([])
         result_scores = boxes[:, 4] if len(boxes) else np.array([])
         result_classid = boxes[:, 5] if len(boxes) else np.array([])
-        result_proto_coef = boxes[:, 6:38] if len(boxes) else np.array([])
+        result_proto_coef = boxes[:, DET_NUM:int(DET_NUM + SEG_NUM)] if len(boxes) else np.array([])
         return result_boxes, result_scores, result_classid, result_proto_coef
 
     def bbox_iou(self, box1, box2, x1y1x2y2=True):