fixed divergent headpose when at extreme of image field of view

this now requires head pose estimation to publish in image coordinate system this also requires the gaze estimation to know the ros_frame following headpose estimation in image coordinate system (as phi+theta inputs required for gaze estimation network) modified launch files respectively
Tobias-Fischer · Nov 1, 2019 · 9cde812 · 9cde812
1 parent 059c8ac
commit 9cde812
Show file tree

Hide file tree

Showing 6 changed files with 31 additions and 25 deletions.
diff --git a/rt_gene/launch/estimate_gaze.launch b/rt_gene/launch/estimate_gaze.launch
@@ -1,16 +1,17 @@
 <launch>
     <arg name="pytorch_num_threads" default="8" />
     <arg name="use_face_encoding_tracker" default="True" />
-    <arg name="face_encoding_threshold" default="0.6"/>
-    <arg name="start_rviz" default="False"/>
+    <arg name="face_encoding_threshold" default="0.6" />
+    <arg name="start_rviz" default="False" />
     <arg name="visualise_headpose" default="True" />
     <arg name="visualise_eyepose" default="True" />
 
-    <arg name="video_namespace" default="/kinect2/hd"/>
-    <arg name="video_image_topic" default="image_color_rect"/>
-    <arg name="video_info_topic" default="camera_info"/>
+    <arg name="video_namespace" default="/kinect2/hd" />
+    <arg name="video_image_topic" default="image_color_rect" />
+    <arg name="video_info_topic" default="camera_info" />
 
-    <arg name="ros_tf_frame" default="/kinect2_nonrotated_link"/>
+    <arg name="camera_frame" default="/kinect2_link" />
+    <arg name="ros_frame" default="/kinect2_ros_frame" />
 
     <arg name="group_name" default="gaze" />
     <arg name="tf_prefix" default="$(arg group_name)" />
@@ -26,7 +27,7 @@
           <param name="device_id_facedetection"  value="$(arg device_id_facedetection)" />
           <param name="use_face_encoding_tracker" value="$(arg use_face_encoding_tracker)" />
           <param name="face_encoding_threshold" value="$(arg face_encoding_threshold)" />
-          <param name="ros_tf_frame" value="$(arg ros_tf_frame)" />
+          <param name="camera_frame" value="$(arg camera_frame)" />
           <param name="tf_prefix" value="$(arg tf_prefix)" />
           <param name="visualise_headpose" value="$(arg visualise_headpose)" />
 
@@ -36,12 +37,13 @@
 
       <node pkg="rt_gene" type="estimate_gaze.py" name="estimate_gaze_twoeyes" output="screen">
           <rosparam param="model_files">['model_nets/Model_allsubjects1.h5']</rosparam>
-          <!-- rosparam param="model_files">['model_nets/all_subjects_mpii_prl_utmv_0_02.h5', 'model_nets/all_subjects_mpii_prl_utmv_1_02.h5', 'model_nets/all_subjects_mpii_prl_utmv_2_02.h5', 'model_nets/all_subjects_mpii_prl_utmv_3_02.h5']</rosparam-->
+          <!-- rosparam param="model_files">['model_nets/all_subjects_mpii_prl_utmv_0_02.h5', 'model_nets/all_subjects_mpii_prl_utmv_1_02.h5', 'model_nets/all_subjects_mpii_prl_utmv_2_02.h5', 'model_nets/all_subjects_mpii_prl_utmv_3_02.h5']</rosparam -->
           <param name="tf_prefix" value="$(arg tf_prefix)" />/
-          <param name="ros_tf_frame" value="$(arg ros_tf_frame)" />
+          <param name="ros_tf_frame" value="$(arg ros_frame)" />
           <param name="device_id_gazeestimation"  value="$(arg device_id_gazeestimation)" />
           <param name="visualise_eyepose" value="$(arg visualise_eyepose)" />
-      </node>      
+      </node>
+
     </group>
 
     <node pkg="rviz" type="rviz" name="rviz_test" args="-d $(find rt_gene)/rviz_cfg/gaze_following.rviz" if="$(arg start_rviz)" />

diff --git a/rt_gene/launch/start_kinect.launch b/rt_gene/launch/start_kinect.launch
@@ -7,6 +7,6 @@
     </include>
     <!-- publish nonrotated link for Kinect -->
     <node pkg="tf2_ros" type="static_transform_publisher" name="kinect2_link_broadcaster"
-          args="0.0 0.0 0.0 -0.5 0.5 -0.5 0.5 /kinect2_nonrotated_link /kinect2_link" />
+          args="0.0 0.0 0.0 -0.5 0.5 -0.5 0.5 /kinect2_ros_frame /kinect2_link" />
 </launch>
 
diff --git a/rt_gene/launch/start_webcam.launch b/rt_gene/launch/start_webcam.launch
@@ -7,7 +7,7 @@
   <arg name="video_info_topic" default="camera_info"/>
 
   <arg name="rgb_frame_id" default="/kinect2_link"/>
-  <arg name="rgb_frame_id_ros" default="/kinect2_nonrotated_link"/> <!-- must match ros_tf_frame in estimate_gaze.launch -->
+  <arg name="rgb_frame_id_ros" default="/kinect2_ros_frame"/>
   <arg name="publish_ros_static_frame" default="True"/>
 
   <node pkg="tf2_ros" type="static_transform_publisher" name="ros_static_transform_publisher"

diff --git a/rt_gene/scripts/estimate_gaze.py b/rt_gene/scripts/estimate_gaze.py
@@ -39,7 +39,7 @@ def __init__(self, device_id_gaze, model_files):
 
         self.tf_prefix = rospy.get_param("~tf_prefix", "gaze")
         self.headpose_frame = self.tf_prefix + "/head_pose_estimated"
-        self.ros_tf_frame = rospy.get_param("~ros_tf_frame", "/kinect2_nonrotated_link")
+        self.ros_tf_frame = rospy.get_param("~ros_tf_frame", "/kinect2_ros_frame")
 
         self.image_subscriber = rospy.Subscriber("/subjects/images", MSG_SubjectImagesList, self.image_callback, queue_size=3, buff_size=2**24)
         self.subjects_gaze_img = rospy.Publisher("/subjects/gazeimages", Image, queue_size=3)

diff --git a/rt_gene/scripts/extract_landmarks_node.py b/rt_gene/scripts/extract_landmarks_node.py
@@ -39,7 +39,7 @@ def __init__(self, img_proc=None):
         self.bridge = CvBridge()
         self.__subject_bridge = SubjectListBridge()
 
-        self.ros_tf_frame = rospy.get_param("~ros_tf_frame", "/kinect2_nonrotated_link")
+        self.camera_frame = rospy.get_param("~camera_frame", "/kinect2_link")
 
         self.tf_broadcaster = TransformBroadcaster()
         self.tf_prefix = rospy.get_param("~tf_prefix", default="gaze")
@@ -145,10 +145,11 @@ def get_head_pose(self, landmarks, subject_id):
         dist_coeffs = self.img_proc.distortionCoeffs()
 
         try:
-            success, rotation_vector_unstable, translation_vector_unstable = cv2.solvePnP(self.model_points,
-                                                                                          landmarks.reshape(len(self.model_points), 1, 2),
-                                                                                          cameraMatrix=camera_matrix,
-                                                                                          distCoeffs=dist_coeffs, flags=cv2.SOLVEPNP_DLS)
+            success, rodrigues_rotation, translation_vector, _ = cv2.solvePnPRansac(self.model_points,
+                                                                                    landmarks.reshape(len(self.model_points), 1, 2),
+                                                                                    cameraMatrix=camera_matrix,
+                                                                                    distCoeffs=dist_coeffs, flags=cv2.SOLVEPNP_DLS)
+
         except cv2.error as e:
             print('Could not estimate head pose', e)
             return False, None, None
@@ -157,12 +158,17 @@ def get_head_pose(self, landmarks, subject_id):
             print('Could not estimate head pose')
             return False, None, None
 
-        rotation_vector, translation_vector = self.apply_kalman_filter_head_pose(subject_id, rotation_vector_unstable, translation_vector_unstable)
+        # this is generic point stabiliser, the underlying representation doesn't matter
+        rotation_vector, translation_vector = self.apply_kalman_filter_head_pose(subject_id, rodrigues_rotation, translation_vector / 1000.0)
 
-        translation_vector = np.array([translation_vector[2], -translation_vector[0], -translation_vector[1]]) / 1000.0
-        rotation_vector_swapped = [-rotation_vector[2], -rotation_vector[0] + self.head_pitch, rotation_vector[1] + np.pi]
+        _rotation_matrix, _ = cv2.Rodrigues(rotation_vector)
+        _rotation_matrix = np.matmul(_rotation_matrix, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]]))
+        _m = np.zeros((4, 4))
+        _m[:3, :3] = _rotation_matrix
+        _m[3, 3] = 1
+        _rpy_rotation = np.array(transformations.euler_from_matrix(_m)).reshape(rodrigues_rotation.shape)
 
-        return success, rotation_vector_swapped, translation_vector
+        return success, _rpy_rotation, translation_vector
 
     def apply_kalman_filter_head_pose(self, subject_id, rotation_vector_unstable, translation_vector_unstable):
         stable_pose = []
@@ -185,7 +191,7 @@ def publish_subject_list(self, timestamp, subjects):
     def publish_pose(self, timestamp, nose_center_3d_tf, head_rpy, subject_id):
         self.tf_broadcaster.sendTransform(nose_center_3d_tf, transformations.quaternion_from_euler(*head_rpy), timestamp,
                                           self.tf_prefix + "/head_pose_estimated" + str(subject_id),
-                                          self.ros_tf_frame)
+                                          self.camera_frame)
 
     def update_subject_tracker(self, color_img):
         faceboxes = self.get_face_bb(color_img)

diff --git a/rt_gene/src/rt_gene/extract_landmarks_method_base.py b/rt_gene/src/rt_gene/extract_landmarks_method_base.py
@@ -66,8 +66,6 @@ def get_full_model_points(self, model_points_file=None):
                 raw_value.append(line)
         model_points = np.array(raw_value, dtype=np.float32)
         model_points = np.reshape(model_points, (3, -1)).T
-        # model_points *= 4
-        model_points[:, -1] *= -1
 
         # index the expansion of the model based.
         model_points = model_points * (self.interpupillary_distance * self.model_size_rescale)