add doc and finish school project

xinyuangui · xinyuangui · commit 5ad8bf64dbbf · 2019-04-28T22:28:09.000-04:00
diff --git a/README.md b/README.md
@@ -4,6 +4,10 @@ This project can be divided into two steps:
 1. Get static training model.
 2. Use training model to recognize names in videos.
 
+## Dockerfile
+
+* The `Dockerfile.gpu` contains one Dockerfile which can run the `dlib` library to detect and recognize faces.
+
 ## Training Step
 
 ### Main Idea
@@ -28,6 +32,13 @@ python3 train_cli.py train-with-video <your-video-location> <username>
 python3 recognize_cli.py recognize-faces-in-video <your-video-location>
 ```
 
-## TODO
+## Doc
+
+* There is one detail report in `doc` folder.
+
+## Bash script example
 
-* Increase the processing speed to make the recognition real-time.
+* `test_experiment_cli.sh`: Detect faces in the video and save as images
+* `test_recognize_cli.sh`: Recognize faces in the video and output result video
+* `test_recognize_faces_in_images.sh`: Recognize faces in the image
+* `test_train_cli.sh`: Train with video
diff --git a/apis.py b/apis.py
@@ -37,7 +37,8 @@ def _raw_face_locations(img, number_of_times_to_upsample=1, model='cnn'):
     :return: a list of dlib 'rect' objects of found face locations
     """
     if model == 'cnn':
-        return cnn_face_detector(img, number_of_times_to_upsample)
+        rectangles = cnn_face_detector(img, number_of_times_to_upsample)
+        return [rectangle.rect for rectangle in rectangles]
     else:# hog
         return face_detector(img, number_of_times_to_upsample)
 
@@ -49,11 +50,11 @@ def _raw_face_landmarks(face_image, face_locations=None, model='large'):
     :param face_image: image to search
     :param face_location: optionally provide a list of face locations to search
     :param model: 'large' (default) uses 68 points, 'small' uses 5 points which is faster
-    :return: alist of dicts of face features locations
+    :return: a list of dicts of face features locations
     """
     if face_locations == None:
         face_locations = _raw_face_locations(img=face_image)
-        face_locations = [face_location.rect for face_location in face_locations]
+        face_locations = [face_location for face_location in face_locations]
     else:
         face_locations = [_css_to_rect(face_location) for face_location in face_locations]
 
@@ -95,6 +96,7 @@ def ransac_mean(features, ratio_threshold=0.9, dist_threshold=0.4):
         pick_feature = features[pick_index]
         inliers = np.linalg.norm(features - pick_feature, axis=1) < dist_threshold
         inliers_count = np.sum(inliers)
+        print('{}, {}'.format(inliers_count, feature_count))
         if inliers_count >= ratio_threshold * feature_count:
             inliers = features[inliers]
             print('final iterations: {}, inliers_count: {}'.format(i, inliers_count))
@@ -115,7 +117,7 @@ def recognize_faces_in_images(face_image, features, dist_threshold=0.4):
         If no match, min_dist_index = None
     """
     face_locations = _raw_face_locations(img=face_image)
-    face_locations = [_rect_to_css(face_location.rect) for face_location in face_locations]
+    face_locations = [_rect_to_css(face_location) for face_location in face_locations]
 
     face_features = face_encodings(face_image, face_locations)
     result = [None] * len(face_features)
diff --git a/doc/report.pdf b/doc/report.pdf
diff --git a/experiment.py b/experiment.py
@@ -0,0 +1,92 @@
+import click
+import apis
+import cv2
+import numpy as np
+import os
+import multiprocessing
+
+@click.group()
+def experiment():
+    pass
+
+@click.command()
+@click.argument('video-location')
+@click.argument('output-location')
+@click.option('--frame-count', default=100, help='number of frames to calculate')
+def test_hog_cnn(video_location, output_location, frame_count):
+    """
+    This function will read one video. Split it into different frames. 
+    Detect faces in these frames and output result to output folder.
+    """
+    cap = cv2.VideoCapture(video_location)
+    if not cap.isOpened():
+        click.echo('cannot open this video', err=True)
+        return
+    video_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) - 1)
+    height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
+    width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
+    frame_count = min(frame_count, video_length)
+    seperate = int(video_length / frame_count)
+    frame_list = [None] * frame_count
+    result_index = 0
+    frame_index = 0
+    while cap.isOpened():
+        ret, frame = cap.read()
+        if frame_index % seperate == 0 and result_index < frame_count:
+            frame_list[result_index] = frame
+            result_index = result_index + 1
+        frame_index = frame_index + 1
+        # if there is no more frames left
+        if frame_index >= video_length:
+            cap.release()
+            break
+    # if the frame_list is not filled
+    if not result_index == frame_count:
+        frame_list = frame_list[:result_index]
+    # Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses)
+    frame_list = [frame[:, :, ::-1] for frame in frame_list]
+
+    # firstly use cnn
+    print('start using cnn to detect faces')
+    cnn_faces = [detect_faces_cnn(frame, index) for index, frame in enumerate(frame_list)]
+    print('finish detecting faces in cnn')
+
+    # use hog to detect
+    print('start using hog to detect faces')
+    arguments = [[frame, index] for index, frame in enumerate(frame_list)]
+    with multiprocessing.Pool(processes=4) as pool:
+        hog_faces = pool.starmap(detect_faces_hog, arguments)
+
+    # draw cnn rectangles
+    print('begin writing to files')
+    index = 0
+    for faces in cnn_faces:
+        frame = frame_list[index][:,:,::-1]
+        for top, right, bottom, left in faces:
+            cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
+        index += 1
+    index = 0
+    for faces in hog_faces:
+        frame = frame_list[index][:,:,::-1]
+        for top, right, bottom, left in faces:
+            cv2.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 2)
+        cv2.imwrite(os.path.join(output_location, '{}.png'.format(index)), frame)
+        index += 1
+    
+
+def detect_faces_cnn(frame, index):
+    print('cnn {} begins'.format(index))
+    face_locations_rect = apis._raw_face_locations(frame, model='cnn')
+    return [apis._rect_to_css(rect) for rect in face_locations_rect]
+
+
+def detect_faces_hog(frame, index):
+    print('hog {} begins'.format(index))
+    face_locations_rect = apis._raw_face_locations(frame, model='hog')
+    return [apis._rect_to_css(rect) for rect in face_locations_rect]
+
+experiment.add_command(test_hog_cnn)
+
+
+if __name__ == "__main__":
+    experiment()
diff --git a/recognize_cli.py b/recognize_cli.py
@@ -4,6 +4,7 @@
 import numpy as np
 import os
 import glob
+import time
 
 @click.group()
 def recognize():
@@ -71,7 +72,53 @@ def get_username(full_path):
     output_movie.release()
 
 
+@click.command()
+@click.argument('image-folder')
+@click.option('--saved-feature-loc', default='data/trained_features', help='the folder to save trained feature')
+@click.option('--save-result-folder', default='data/result_images', help='the folder we save result video')
+def recognize_faces_in_images(image_folder, saved_feature_loc, save_result_folder):
+    def get_username(full_path):
+        """
+        get username from full-path
+        """
+        basename = os.path.basename(full_path)
+        return basename.split('.')[0]
+    feature_files = glob.glob(os.path.join(saved_feature_loc, '*.npy'))
+    features = [np.load(feature_file) for feature_file in feature_files]
+    usernames = [get_username(feature_file) for feature_file in feature_files]
+
+    file_formats = ('*.png', '*.jpg', '*.jpeg')
+    img_files = []
+    for file_format in file_formats:
+        img_files.extend(glob.glob(os.path.join(image_folder, file_format)))
+    for img_file in img_files:
+        recognize_faces_in_image(img_file, features, usernames, os.path.join(save_result_folder, os.path.basename(img_file)))
+
+
+def recognize_faces_in_image(image_location, features, usernames, result_location):
+    # read image
+    img = cv2.imread(image_location)
+    img = img[:,:,::-1]
+
+    start = time.time()
+    face_info_tuple = apis.recognize_faces_in_images(img, features)
+    img = img[:,:,::-1]
+    # draw rectangles
+    for top, right, bottom, left, username in face_info_tuple:
+        if username is None:
+            username = 'unknown'
+        else:
+            username = usernames[username]
+        cv2.rectangle(img, (left, top), (right, bottom), (0, 0, 255), 2)
+        font = cv2.FONT_HERSHEY_DUPLEX
+        cv2.putText(img, username, (left + 6, bottom - 6), font, 0.5, (255, 255, 255), 1)
+    end = time.time()
+    print('{} spends time: {}'.format(os.path.basename(image_location), end - start))
+    cv2.imwrite(result_location, img)
+
+
 recognize.add_command(recognize_faces_in_video)
+recognize.add_command(recognize_faces_in_images)
 
 if __name__ == "__main__":
     recognize()
diff --git a/train_cli.py b/train_cli.py
@@ -12,7 +12,7 @@ def train():
 @click.command()
 @click.argument('video-location')
 @click.argument('username')
-@click.option('--frame-count', default=100, help='number of frames to calculate')
+@click.option('--frame-count', default=200, help='number of frames to calculate')
 @click.option('--output-frame', '-o', is_flag=True, default=False, help='output frames')
 @click.option('--frame-output-loc', default='data/frames', help='the folder to output frame')
 @click.option('--saved-feature-loc', default='data/trained_features', help='the folder to save trained feature')