Skip to content

Commit

Permalink
fixed divergent headpose when at extreme of image field of view
Browse files Browse the repository at this point in the history
this now requires head pose estimation to publish in image coordinate system
this also requires the gaze estimation to know the ros_frame following headpose estimation in image coordinate system (as phi+theta inputs required for gaze estimation network)

modified launch files respectively
  • Loading branch information
ahmed-alhindawi committed Nov 1, 2019
1 parent 059c8ac commit 9cde812
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 25 deletions.
22 changes: 12 additions & 10 deletions rt_gene/launch/estimate_gaze.launch
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
<launch>
<arg name="pytorch_num_threads" default="8" />
<arg name="use_face_encoding_tracker" default="True" />
<arg name="face_encoding_threshold" default="0.6"/>
<arg name="start_rviz" default="False"/>
<arg name="face_encoding_threshold" default="0.6" />
<arg name="start_rviz" default="False" />
<arg name="visualise_headpose" default="True" />
<arg name="visualise_eyepose" default="True" />

<arg name="video_namespace" default="/kinect2/hd"/>
<arg name="video_image_topic" default="image_color_rect"/>
<arg name="video_info_topic" default="camera_info"/>
<arg name="video_namespace" default="/kinect2/hd" />
<arg name="video_image_topic" default="image_color_rect" />
<arg name="video_info_topic" default="camera_info" />

<arg name="ros_tf_frame" default="/kinect2_nonrotated_link"/>
<arg name="camera_frame" default="/kinect2_link" />
<arg name="ros_frame" default="/kinect2_ros_frame" />

<arg name="group_name" default="gaze" />
<arg name="tf_prefix" default="$(arg group_name)" />
Expand All @@ -26,7 +27,7 @@
<param name="device_id_facedetection" value="$(arg device_id_facedetection)" />
<param name="use_face_encoding_tracker" value="$(arg use_face_encoding_tracker)" />
<param name="face_encoding_threshold" value="$(arg face_encoding_threshold)" />
<param name="ros_tf_frame" value="$(arg ros_tf_frame)" />
<param name="camera_frame" value="$(arg camera_frame)" />
<param name="tf_prefix" value="$(arg tf_prefix)" />
<param name="visualise_headpose" value="$(arg visualise_headpose)" />

Expand All @@ -36,12 +37,13 @@

<node pkg="rt_gene" type="estimate_gaze.py" name="estimate_gaze_twoeyes" output="screen">
<rosparam param="model_files">['model_nets/Model_allsubjects1.h5']</rosparam>
<!-- rosparam param="model_files">['model_nets/all_subjects_mpii_prl_utmv_0_02.h5', 'model_nets/all_subjects_mpii_prl_utmv_1_02.h5', 'model_nets/all_subjects_mpii_prl_utmv_2_02.h5', 'model_nets/all_subjects_mpii_prl_utmv_3_02.h5']</rosparam-->
<!-- rosparam param="model_files">['model_nets/all_subjects_mpii_prl_utmv_0_02.h5', 'model_nets/all_subjects_mpii_prl_utmv_1_02.h5', 'model_nets/all_subjects_mpii_prl_utmv_2_02.h5', 'model_nets/all_subjects_mpii_prl_utmv_3_02.h5']</rosparam -->
<param name="tf_prefix" value="$(arg tf_prefix)" />/
<param name="ros_tf_frame" value="$(arg ros_tf_frame)" />
<param name="ros_tf_frame" value="$(arg ros_frame)" />
<param name="device_id_gazeestimation" value="$(arg device_id_gazeestimation)" />
<param name="visualise_eyepose" value="$(arg visualise_eyepose)" />
</node>
</node>

</group>

<node pkg="rviz" type="rviz" name="rviz_test" args="-d $(find rt_gene)/rviz_cfg/gaze_following.rviz" if="$(arg start_rviz)" />
Expand Down
2 changes: 1 addition & 1 deletion rt_gene/launch/start_kinect.launch
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@
</include>
<!-- publish nonrotated link for Kinect -->
<node pkg="tf2_ros" type="static_transform_publisher" name="kinect2_link_broadcaster"
args="0.0 0.0 0.0 -0.5 0.5 -0.5 0.5 /kinect2_nonrotated_link /kinect2_link" />
args="0.0 0.0 0.0 -0.5 0.5 -0.5 0.5 /kinect2_ros_frame /kinect2_link" />
</launch>

2 changes: 1 addition & 1 deletion rt_gene/launch/start_webcam.launch
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
<arg name="video_info_topic" default="camera_info"/>

<arg name="rgb_frame_id" default="/kinect2_link"/>
<arg name="rgb_frame_id_ros" default="/kinect2_nonrotated_link"/> <!-- must match ros_tf_frame in estimate_gaze.launch -->
<arg name="rgb_frame_id_ros" default="/kinect2_ros_frame"/>
<arg name="publish_ros_static_frame" default="True"/>

<node pkg="tf2_ros" type="static_transform_publisher" name="ros_static_transform_publisher"
Expand Down
2 changes: 1 addition & 1 deletion rt_gene/scripts/estimate_gaze.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def __init__(self, device_id_gaze, model_files):

self.tf_prefix = rospy.get_param("~tf_prefix", "gaze")
self.headpose_frame = self.tf_prefix + "/head_pose_estimated"
self.ros_tf_frame = rospy.get_param("~ros_tf_frame", "/kinect2_nonrotated_link")
self.ros_tf_frame = rospy.get_param("~ros_tf_frame", "/kinect2_ros_frame")

self.image_subscriber = rospy.Subscriber("/subjects/images", MSG_SubjectImagesList, self.image_callback, queue_size=3, buff_size=2**24)
self.subjects_gaze_img = rospy.Publisher("/subjects/gazeimages", Image, queue_size=3)
Expand Down
26 changes: 16 additions & 10 deletions rt_gene/scripts/extract_landmarks_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def __init__(self, img_proc=None):
self.bridge = CvBridge()
self.__subject_bridge = SubjectListBridge()

self.ros_tf_frame = rospy.get_param("~ros_tf_frame", "/kinect2_nonrotated_link")
self.camera_frame = rospy.get_param("~camera_frame", "/kinect2_link")

self.tf_broadcaster = TransformBroadcaster()
self.tf_prefix = rospy.get_param("~tf_prefix", default="gaze")
Expand Down Expand Up @@ -145,10 +145,11 @@ def get_head_pose(self, landmarks, subject_id):
dist_coeffs = self.img_proc.distortionCoeffs()

try:
success, rotation_vector_unstable, translation_vector_unstable = cv2.solvePnP(self.model_points,
landmarks.reshape(len(self.model_points), 1, 2),
cameraMatrix=camera_matrix,
distCoeffs=dist_coeffs, flags=cv2.SOLVEPNP_DLS)
success, rodrigues_rotation, translation_vector, _ = cv2.solvePnPRansac(self.model_points,
landmarks.reshape(len(self.model_points), 1, 2),
cameraMatrix=camera_matrix,
distCoeffs=dist_coeffs, flags=cv2.SOLVEPNP_DLS)

except cv2.error as e:
print('Could not estimate head pose', e)
return False, None, None
Expand All @@ -157,12 +158,17 @@ def get_head_pose(self, landmarks, subject_id):
print('Could not estimate head pose')
return False, None, None

rotation_vector, translation_vector = self.apply_kalman_filter_head_pose(subject_id, rotation_vector_unstable, translation_vector_unstable)
# this is generic point stabiliser, the underlying representation doesn't matter
rotation_vector, translation_vector = self.apply_kalman_filter_head_pose(subject_id, rodrigues_rotation, translation_vector / 1000.0)

translation_vector = np.array([translation_vector[2], -translation_vector[0], -translation_vector[1]]) / 1000.0
rotation_vector_swapped = [-rotation_vector[2], -rotation_vector[0] + self.head_pitch, rotation_vector[1] + np.pi]
_rotation_matrix, _ = cv2.Rodrigues(rotation_vector)
_rotation_matrix = np.matmul(_rotation_matrix, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]]))
_m = np.zeros((4, 4))
_m[:3, :3] = _rotation_matrix
_m[3, 3] = 1
_rpy_rotation = np.array(transformations.euler_from_matrix(_m)).reshape(rodrigues_rotation.shape)

return success, rotation_vector_swapped, translation_vector
return success, _rpy_rotation, translation_vector

def apply_kalman_filter_head_pose(self, subject_id, rotation_vector_unstable, translation_vector_unstable):
stable_pose = []
Expand All @@ -185,7 +191,7 @@ def publish_subject_list(self, timestamp, subjects):
def publish_pose(self, timestamp, nose_center_3d_tf, head_rpy, subject_id):
self.tf_broadcaster.sendTransform(nose_center_3d_tf, transformations.quaternion_from_euler(*head_rpy), timestamp,
self.tf_prefix + "/head_pose_estimated" + str(subject_id),
self.ros_tf_frame)
self.camera_frame)

def update_subject_tracker(self, color_img):
faceboxes = self.get_face_bb(color_img)
Expand Down
2 changes: 0 additions & 2 deletions rt_gene/src/rt_gene/extract_landmarks_method_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,6 @@ def get_full_model_points(self, model_points_file=None):
raw_value.append(line)
model_points = np.array(raw_value, dtype=np.float32)
model_points = np.reshape(model_points, (3, -1)).T
# model_points *= 4
model_points[:, -1] *= -1

# index the expansion of the model based.
model_points = model_points * (self.interpupillary_distance * self.model_size_rescale)
Expand Down

0 comments on commit 9cde812

Please sign in to comment.