diff --git a/README.md b/README.md index bc76251..b54eff1 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,4 @@ # posenetv2-pythontf This is a Python and Tensorflow implementation of Posenet v2 released by Google in TensorflowJS. + + diff --git a/TFJS Model URL b/TFJS Model URL new file mode 100644 index 0000000..170f2dc --- /dev/null +++ b/TFJS Model URL @@ -0,0 +1,14 @@ +MOBILENET FULL PRECISION STRIDE 16 MODEL +# MODEL LINK +https://storage.googleapis.com/tfjs-models/savedmodel/posenet/mobilenet/float/100/model-stride16.json + +MOBILENET FULL PRECISION STRIDE 8 MODEL +https://storage.googleapis.com/tfjs-models/savedmodel/posenet/mobilenet/float/100/model-stride8.json + +# MODEL WEIGHT +https://storage.googleapis.com/tfjs-models/savedmodel/posenet/mobilenet/float/100/group1-shard1of4.bin +https://storage.googleapis.com/tfjs-models/savedmodel/posenet/mobilenet/float/100/group1-shard2of4.bin +https://storage.googleapis.com/tfjs-models/savedmodel/posenet/mobilenet/float/100/group1-shard43f4.bin +https://storage.googleapis.com/tfjs-models/savedmodel/posenet/mobilenet/float/100/group1-shard4of4.bin + +#MORE LINKS WILL BE UPDATED SOON \ No newline at end of file diff --git a/images/0002_c3s1_068642_02.jpg b/images/0002_c3s1_068642_02.jpg new file mode 100644 index 0000000..634e2af Binary files /dev/null and b/images/0002_c3s1_068642_02.jpg differ diff --git a/main.py b/main.py new file mode 100644 index 0000000..4b1ec6d --- /dev/null +++ b/main.py @@ -0,0 +1,108 @@ +import tensorflow as tf +import cv2 +import time +import argparse +import os + +import posenet + + +MODEL_DIR = './models' +DEBUG_OUTPUT = False + +parser = argparse.ArgumentParser() +parser.add_argument('--model', type=str, default='model-resnet_v2') +parser.add_argument('--output_stride', type=int, default=16) +parser.add_argument('--scale_factor', type=float, default=1.0) +parser.add_argument('--notxt', action='store_true') +parser.add_argument('--image_dir', type=str, default='./images') +parser.add_argument('--output_dir', type=str, default='./output') +args = parser.parse_args() + +def load_model(model_name, sess, model_dir=MODEL_DIR): + model_path = os.path.join(model_dir, '%s.pb' % model_name) + if not os.path.exists(model_path): + print('Cannot find model file %s' % model_path) + + with tf.gfile.GFile(model_path, 'rb') as f: + graph_def = tf.GraphDef() + graph_def.ParseFromString(f.read()) + sess.graph.as_default() + tf.import_graph_def(graph_def, name='') + + if DEBUG_OUTPUT: + graph_nodes = [n for n in graph_def.node] + names = [] + for t in graph_nodes: + names.append(t.name) + print('Loaded graph node:', t.name) + #For Mobilenet Version + offsets = sess.graph.get_tensor_by_name('MobilenetV1/offset_2/BiasAdd:0') + displacement_fwd = sess.graph.get_tensor_by_name('MobilenetV1/displacement_fwd_2/BiasAdd:0') + displacement_bwd = sess.graph.get_tensor_by_name('MobilenetV1/displacement_bwd_2/BiasAdd:0') + heatmaps = sess.graph.get_tensor_by_name('MobilenetV1/heatmap_2/BiasAdd:0') + # For Resnet50 Version + # offsets = sess.graph.get_tensor_by_name('float_short_offsets:0') + # displacement_fwd = sess.graph.get_tensor_by_name('resnet_v1_50/displacement_fwd_2/BiasAdd:0') + # displacement_bwd = sess.graph.get_tensor_by_name('resnet_v1_50/displacement_bwd_2/BiasAdd:0') + # heatmaps = sess.graph.get_tensor_by_name('float_heatmaps:0') + + return [heatmaps, offsets, displacement_fwd, displacement_bwd] + + +def main(): + + with tf.Session() as sess: + model_outputs = load_model(args.model, sess) + output_stride = args.output_stride #16 #Change it according to the model + + if args.output_dir: + if not os.path.exists(args.output_dir): + os.makedirs(args.output_dir) + + filenames = [ + f.path for f in os.scandir(args.image_dir) if f.is_file() and f.path.endswith(('.png', '.jpg'))] + + start = time.time() + for f in filenames: + input_image, draw_image, output_scale = posenet.read_imgfile( + f, scale_factor=args.scale_factor, output_stride=output_stride) + + heatmaps_result, offsets_result, displacement_fwd_result, displacement_bwd_result = sess.run( + model_outputs, + feed_dict={'sub_2:0': input_image} + ) + + pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multiple_poses( + heatmaps_result.squeeze(axis=0), + offsets_result.squeeze(axis=0), + displacement_fwd_result.squeeze(axis=0), + displacement_bwd_result.squeeze(axis=0), + output_stride=output_stride, + max_pose_detections=10, + min_pose_score=0.25) + + keypoint_coords *= output_scale + + if args.output_dir: + draw_image = posenet.draw_skel_and_kp( + draw_image, pose_scores, keypoint_scores, keypoint_coords, + min_pose_score=0.25, min_part_score=0.25) + + cv2.imwrite(os.path.join(args.output_dir, os.path.relpath(f, args.image_dir)), draw_image) + + if not args.notxt: + print() + print("Results for image: %s" % f) + for pi in range(len(pose_scores)): + if pose_scores[pi] == 0.: + break + print('Pose #%d, score = %f' % (pi, pose_scores[pi])) + for ki, (s, c) in enumerate(zip(keypoint_scores[pi, :], keypoint_coords[pi, :, :])): + print('Keypoint %s, score = %f, coord = %s' % (posenet.PART_NAMES[ki], s, c)) + + print('Average FPS:', len(filenames) / (time.time() - start)) + + +if __name__ == "__main__": + main() diff --git a/models/model-mobilenet_v2.pb b/models/model-mobilenet_v2.pb new file mode 100644 index 0000000..2033403 Binary files /dev/null and b/models/model-mobilenet_v2.pb differ diff --git a/output/0002_c3s1_068642_02.jpg b/output/0002_c3s1_068642_02.jpg new file mode 100644 index 0000000..af4cf80 Binary files /dev/null and b/output/0002_c3s1_068642_02.jpg differ diff --git a/posenet/__init__.py b/posenet/__init__.py new file mode 100644 index 0000000..e1c8ad6 --- /dev/null +++ b/posenet/__init__.py @@ -0,0 +1,3 @@ +from posenet.constants import * +from posenet.decode_multi import decode_multiple_poses +from posenet.utils import * diff --git a/posenet/__pycache__/__init__.cpython-37.pyc b/posenet/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000..366b5ba Binary files /dev/null and b/posenet/__pycache__/__init__.cpython-37.pyc differ diff --git a/posenet/__pycache__/constants.cpython-37.pyc b/posenet/__pycache__/constants.cpython-37.pyc new file mode 100644 index 0000000..1d00aa2 Binary files /dev/null and b/posenet/__pycache__/constants.cpython-37.pyc differ diff --git a/posenet/__pycache__/decode.cpython-37.pyc b/posenet/__pycache__/decode.cpython-37.pyc new file mode 100644 index 0000000..b3baf63 Binary files /dev/null and b/posenet/__pycache__/decode.cpython-37.pyc differ diff --git a/posenet/__pycache__/decode_multi.cpython-37.pyc b/posenet/__pycache__/decode_multi.cpython-37.pyc new file mode 100644 index 0000000..a96b76c Binary files /dev/null and b/posenet/__pycache__/decode_multi.cpython-37.pyc differ diff --git a/posenet/__pycache__/utils.cpython-37.pyc b/posenet/__pycache__/utils.cpython-37.pyc new file mode 100644 index 0000000..95a8194 Binary files /dev/null and b/posenet/__pycache__/utils.cpython-37.pyc differ diff --git a/posenet/constants.py b/posenet/constants.py new file mode 100644 index 0000000..bae1ed3 --- /dev/null +++ b/posenet/constants.py @@ -0,0 +1,63 @@ + +PART_NAMES = [ + "nose", "leftEye", "rightEye", "leftEar", "rightEar", "leftShoulder", + "rightShoulder", "leftElbow", "rightElbow", "leftWrist", "rightWrist", + "leftHip", "rightHip", "leftKnee", "rightKnee", "leftAnkle", "rightAnkle" +] + +NUM_KEYPOINTS = len(PART_NAMES) + +PART_IDS = {pn: pid for pid, pn in enumerate(PART_NAMES)} + +CONNECTED_PART_NAMES = [ + ("leftHip", "leftShoulder"), ("leftElbow", "leftShoulder"), + ("leftElbow", "leftWrist"), ("leftHip", "leftKnee"), + ("leftKnee", "leftAnkle"), ("rightHip", "rightShoulder"), + ("rightElbow", "rightShoulder"), ("rightElbow", "rightWrist"), + ("rightHip", "rightKnee"), ("rightKnee", "rightAnkle"), + ("leftShoulder", "rightShoulder"), ("leftHip", "rightHip") +] + +CONNECTED_PART_INDICES = [(PART_IDS[a], PART_IDS[b]) for a, b in CONNECTED_PART_NAMES] + +LOCAL_MAXIMUM_RADIUS = 1 + +POSE_CHAIN = [ + ("nose", "leftEye"), ("leftEye", "leftEar"), ("nose", "rightEye"), + ("rightEye", "rightEar"), ("nose", "leftShoulder"), + ("leftShoulder", "leftElbow"), ("leftElbow", "leftWrist"), + ("leftShoulder", "leftHip"), ("leftHip", "leftKnee"), + ("leftKnee", "leftAnkle"), ("nose", "rightShoulder"), + ("rightShoulder", "rightElbow"), ("rightElbow", "rightWrist"), + ("rightShoulder", "rightHip"), ("rightHip", "rightKnee"), + ("rightKnee", "rightAnkle") +] + +PARENT_CHILD_TUPLES = [(PART_IDS[parent], PART_IDS[child]) for parent, child in POSE_CHAIN] + +PART_CHANNELS = [ + 'left_face', + 'right_face', + 'right_upper_leg_front', + 'right_lower_leg_back', + 'right_upper_leg_back', + 'left_lower_leg_front', + 'left_upper_leg_front', + 'left_upper_leg_back', + 'left_lower_leg_back', + 'right_feet', + 'right_lower_leg_front', + 'left_feet', + 'torso_front', + 'torso_back', + 'right_upper_arm_front', + 'right_upper_arm_back', + 'right_lower_arm_back', + 'left_lower_arm_front', + 'left_upper_arm_front', + 'left_upper_arm_back', + 'left_lower_arm_back', + 'right_hand', + 'right_lower_arm_front', + 'left_hand' +] \ No newline at end of file diff --git a/posenet/decode.py b/posenet/decode.py new file mode 100644 index 0000000..920ebf5 --- /dev/null +++ b/posenet/decode.py @@ -0,0 +1,69 @@ +import numpy as np + +from posenet.constants import * + + +def traverse_to_targ_keypoint( + edge_id, source_keypoint, target_keypoint_id, scores, offsets, output_stride, displacements +): + height = scores.shape[0] + width = scores.shape[1] + + source_keypoint_indices = np.clip( + np.round(source_keypoint / output_stride), a_min=0, a_max=[height - 1, width - 1]).astype(np.int32) + + displaced_point = source_keypoint + displacements[ + source_keypoint_indices[0], source_keypoint_indices[1], edge_id] + + displaced_point_indices = np.clip( + np.round(displaced_point / output_stride), a_min=0, a_max=[height - 1, width - 1]).astype(np.int32) + + score = scores[displaced_point_indices[0], displaced_point_indices[1], target_keypoint_id] + + image_coord = displaced_point_indices * output_stride + offsets[ + displaced_point_indices[0], displaced_point_indices[1], target_keypoint_id] + + return score, image_coord + + +def decode_pose( + root_score, root_id, root_image_coord, + scores, + offsets, + output_stride, + displacements_fwd, + displacements_bwd +): + num_parts = scores.shape[2] + num_edges = len(PARENT_CHILD_TUPLES) + + instance_keypoint_scores = np.zeros(num_parts) + instance_keypoint_coords = np.zeros((num_parts, 2)) + instance_keypoint_scores[root_id] = root_score + instance_keypoint_coords[root_id] = root_image_coord + + for edge in reversed(range(num_edges)): + target_keypoint_id, source_keypoint_id = PARENT_CHILD_TUPLES[edge] + if (instance_keypoint_scores[source_keypoint_id] > 0.0 and + instance_keypoint_scores[target_keypoint_id] == 0.0): + score, coords = traverse_to_targ_keypoint( + edge, + instance_keypoint_coords[source_keypoint_id], + target_keypoint_id, + scores, offsets, output_stride, displacements_bwd) + instance_keypoint_scores[target_keypoint_id] = score + instance_keypoint_coords[target_keypoint_id] = coords + + for edge in range(num_edges): + source_keypoint_id, target_keypoint_id = PARENT_CHILD_TUPLES[edge] + if (instance_keypoint_scores[source_keypoint_id] > 0.0 and + instance_keypoint_scores[target_keypoint_id] == 0.0): + score, coords = traverse_to_targ_keypoint( + edge, + instance_keypoint_coords[source_keypoint_id], + target_keypoint_id, + scores, offsets, output_stride, displacements_fwd) + instance_keypoint_scores[target_keypoint_id] = score + instance_keypoint_coords[target_keypoint_id] = coords + + return instance_keypoint_scores, instance_keypoint_coords diff --git a/posenet/decode_multi.py b/posenet/decode_multi.py new file mode 100644 index 0000000..cdcb60e --- /dev/null +++ b/posenet/decode_multi.py @@ -0,0 +1,154 @@ +from posenet.decode import * +from posenet.constants import * +import time +import scipy.ndimage as ndi + + +def within_nms_radius(poses, squared_nms_radius, point, keypoint_id): + for _, _, pose_coord in poses: + if np.sum((pose_coord[keypoint_id] - point) ** 2) <= squared_nms_radius: + return True + return False + + +def within_nms_radius_fast(pose_coords, squared_nms_radius, point): + if not pose_coords.shape[0]: + return False + return np.any(np.sum((pose_coords - point) ** 2, axis=1) <= squared_nms_radius) + + +def get_instance_score( + existing_poses, squared_nms_radius, + keypoint_scores, keypoint_coords): + not_overlapped_scores = 0. + for keypoint_id in range(len(keypoint_scores)): + if not within_nms_radius( + existing_poses, squared_nms_radius, + keypoint_coords[keypoint_id], keypoint_id): + not_overlapped_scores += keypoint_scores[keypoint_id] + return not_overlapped_scores / len(keypoint_scores) + + +def get_instance_score_fast( + exist_pose_coords, + squared_nms_radius, + keypoint_scores, keypoint_coords): + + if exist_pose_coords.shape[0]: + s = np.sum((exist_pose_coords - keypoint_coords) ** 2, axis=2) > squared_nms_radius + not_overlapped_scores = np.sum(keypoint_scores[np.all(s, axis=0)]) + else: + not_overlapped_scores = np.sum(keypoint_scores) + return not_overlapped_scores / len(keypoint_scores) + + +def score_is_max_in_local_window(keypoint_id, score, hmy, hmx, local_max_radius, scores): + height = scores.shape[0] + width = scores.shape[1] + + y_start = max(hmy - local_max_radius, 0) + y_end = min(hmy + local_max_radius + 1, height) + x_start = max(hmx - local_max_radius, 0) + x_end = min(hmx + local_max_radius + 1, width) + + for y in range(y_start, y_end): + for x in range(x_start, x_end): + if scores[y, x, keypoint_id] > score: + return False + return True + + +def build_part_with_score(score_threshold, local_max_radius, scores): + parts = [] + height = scores.shape[0] + width = scores.shape[1] + num_keypoints = scores.shape[2] + + for hmy in range(height): + for hmx in range(width): + for keypoint_id in range(num_keypoints): + score = scores[hmy, hmx, keypoint_id] + if score < score_threshold: + continue + if score_is_max_in_local_window(keypoint_id, score, hmy, hmx, + local_max_radius, scores): + parts.append(( + score, keypoint_id, np.array((hmy, hmx)) + )) + return parts + + +def build_part_with_score_fast(score_threshold, local_max_radius, scores): + parts = [] + num_keypoints = scores.shape[2] + lmd = 2 * local_max_radius + 1 + + # NOTE it seems faster to iterate over the keypoints and perform maximum_filter + # on each subarray vs doing the op on the full score array with size=(lmd, lmd, 1) + for keypoint_id in range(num_keypoints): + kp_scores = scores[:, :, keypoint_id].copy() + kp_scores[kp_scores < score_threshold] = 0. + max_vals = ndi.maximum_filter(kp_scores, size=lmd, mode='constant') + max_loc = np.logical_and(kp_scores == max_vals, kp_scores > 0) + max_loc_idx = max_loc.nonzero() + for y, x in zip(*max_loc_idx): + parts.append(( + scores[y, x, keypoint_id], + keypoint_id, + np.array((y, x)) + )) + + return parts + + +def decode_multiple_poses( + scores, offsets, displacements_fwd, displacements_bwd, output_stride, + max_pose_detections=10, score_threshold=0.5, nms_radius=20, min_pose_score=0.5): + + pose_count = 0 + pose_scores = np.zeros(max_pose_detections) + pose_keypoint_scores = np.zeros((max_pose_detections, NUM_KEYPOINTS)) + pose_keypoint_coords = np.zeros((max_pose_detections, NUM_KEYPOINTS, 2)) + + squared_nms_radius = nms_radius ** 2 + + scored_parts = build_part_with_score_fast(score_threshold, LOCAL_MAXIMUM_RADIUS, scores) + scored_parts = sorted(scored_parts, key=lambda x: x[0], reverse=True) + + # change dimensions from (h, w, x) to (h, w, x//2, 2) to allow return of complete coord array + height = scores.shape[0] + width = scores.shape[1] + offsets = offsets.reshape(height, width, 2, -1).swapaxes(2, 3) + displacements_fwd = displacements_fwd.reshape(height, width, 2, -1).swapaxes(2, 3) + displacements_bwd = displacements_bwd.reshape(height, width, 2, -1).swapaxes(2, 3) + + for root_score, root_id, root_coord in scored_parts: + root_image_coords = root_coord * output_stride + offsets[ + root_coord[0], root_coord[1], root_id] + + if within_nms_radius_fast( + pose_keypoint_coords[:pose_count, root_id, :], squared_nms_radius, root_image_coords): + continue + + keypoint_scores, keypoint_coords = decode_pose( + root_score, root_id, root_image_coords, + scores, offsets, output_stride, + displacements_fwd, displacements_bwd) + + pose_score = get_instance_score_fast( + pose_keypoint_coords[:pose_count, :, :], squared_nms_radius, keypoint_scores, keypoint_coords) + + # NOTE this isn't in the original implementation, but it appears that by initially ordering by + # part scores, and having a max # of detections, we can end up populating the returned poses with + # lower scored poses than if we discard 'bad' ones and continue (higher pose scores can still come later). + # Set min_pose_score to 0. to revert to original behaviour + if min_pose_score == 0. or pose_score >= min_pose_score: + pose_scores[pose_count] = pose_score + pose_keypoint_scores[pose_count, :] = keypoint_scores + pose_keypoint_coords[pose_count, :, :] = keypoint_coords + pose_count += 1 + + if pose_count >= max_pose_detections: + break + + return pose_scores, pose_keypoint_scores, pose_keypoint_coords diff --git a/posenet/utils.py b/posenet/utils.py new file mode 100644 index 0000000..88dab53 --- /dev/null +++ b/posenet/utils.py @@ -0,0 +1,101 @@ +import cv2 +import numpy as np + +import posenet.constants + + +def valid_resolution(width, height, output_stride=16): + target_width = (int(width) // output_stride) * output_stride + 1 + target_height = (int(height) // output_stride) * output_stride + 1 + return target_width, target_height + + +def _process_input(source_img, scale_factor=1.0, output_stride=16): + target_width, target_height = valid_resolution( + source_img.shape[1] * scale_factor, source_img.shape[0] * scale_factor, output_stride=output_stride) + scale = np.array([source_img.shape[0] / target_height, source_img.shape[1] / target_width]) + + input_img = cv2.resize(source_img, (target_width, target_height), interpolation=cv2.INTER_LINEAR) + input_img = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB).astype(np.float32) + input_img = input_img * (2.0 / 255.0) - 1.0 + input_img = input_img.reshape(1, target_height, target_width, 3) + return input_img, source_img, scale + + +def read_cap(cap, scale_factor=1.0, output_stride=16): + res, img = cap.read() + if not res: + raise IOError("webcam failure") + return _process_input(img, scale_factor, output_stride) + + +def read_imgfile(path, scale_factor=1.0, output_stride=16): + img = cv2.imread(path) + return _process_input(img, scale_factor, output_stride) + + +def draw_keypoints( + img, instance_scores, keypoint_scores, keypoint_coords, + min_pose_confidence=0.5, min_part_confidence=0.5): + cv_keypoints = [] + for ii, score in enumerate(instance_scores): + if score < min_pose_confidence: + continue + for ks, kc in zip(keypoint_scores[ii, :], keypoint_coords[ii, :, :]): + if ks < min_part_confidence: + continue + cv_keypoints.append(cv2.KeyPoint(kc[1], kc[0], 10. * ks)) + out_img = cv2.drawKeypoints(img, cv_keypoints, outImage=np.array([])) + return out_img + + +def get_adjacent_keypoints(keypoint_scores, keypoint_coords, min_confidence=0.1): + results = [] + for left, right in posenet.CONNECTED_PART_INDICES: + if keypoint_scores[left] < min_confidence or keypoint_scores[right] < min_confidence: + continue + results.append( + np.array([keypoint_coords[left][::-1], keypoint_coords[right][::-1]]).astype(np.int32), + ) + return results + + +def draw_skeleton( + img, instance_scores, keypoint_scores, keypoint_coords, + min_pose_confidence=0.5, min_part_confidence=0.5): + out_img = img + adjacent_keypoints = [] + for ii, score in enumerate(instance_scores): + if score < min_pose_confidence: + continue + new_keypoints = get_adjacent_keypoints( + keypoint_scores[ii, :], keypoint_coords[ii, :, :], min_part_confidence) + adjacent_keypoints.extend(new_keypoints) + out_img = cv2.polylines(out_img, adjacent_keypoints, isClosed=False, color=(255, 255, 0)) + return out_img + + +def draw_skel_and_kp( + img, instance_scores, keypoint_scores, keypoint_coords, + min_pose_score=0.5, min_part_score=0.5): + out_img = img + adjacent_keypoints = [] + cv_keypoints = [] + for ii, score in enumerate(instance_scores): + if score < min_pose_score: + continue + + new_keypoints = get_adjacent_keypoints( + keypoint_scores[ii, :], keypoint_coords[ii, :, :], min_part_score) + adjacent_keypoints.extend(new_keypoints) + + for ks, kc in zip(keypoint_scores[ii, :], keypoint_coords[ii, :, :]): + if ks < min_part_score: + continue + cv_keypoints.append(cv2.KeyPoint(kc[1], kc[0], 10. * ks)) + + out_img = cv2.drawKeypoints( + out_img, cv_keypoints, outImage=np.array([]), color=(255, 255, 0), + flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS) + out_img = cv2.polylines(out_img, adjacent_keypoints, isClosed=False, color=(255, 255, 0)) + return out_img