dronefreak
diff --git a/‎10.png
61.9 KB b/‎10.png
61.9 KB
diff --git a/‎3d.png
615 KB b/‎3d.png
615 KB
diff --git a/‎9.png
489 KB b/‎9.png
489 KB
diff --git a/‎README.md
+34 b/‎README.md
+34
diff --git a/‎calib-camera.py
+161 b/‎calib-camera.py
+161
diff --git a/‎facial_landmarks_video.py
+101 b/‎facial_landmarks_video.py
+101
diff --git a/‎image.png
254 KB b/‎image.png
254 KB
diff --git a/‎ipcv_report.pdf
5.39 MB b/‎ipcv_report.pdf
5.39 MB
diff --git a/‎plot_x.png
28.2 KB b/‎plot_x.png
28.2 KB
diff --git a/‎plot_y.png
24.2 KB b/‎plot_y.png
24.2 KB
diff --git a/‎tracking_in_3d.m
+29 b/‎tracking_in_3d.m
+29
diff --git a/‎tracking_tongue.m
+52 b/‎tracking_tongue.m
+52
@@ -0,0 +1,34 @@
+# Optical Flow based Tongue Tip Tracking in 3D
+
+We present a novel method for tracking the tip of tongues in 3-dimensions for medical applications. The repository also includes a report that explains the algorithm in detail and the motivation behind the project.
+
+
+## Requirements
+
+* `MATLAB 2018b` (It could also work on lower/higher versions) 
+* `Python3 Packages` such as opencv, matplotlib, dlib, imutils
+
+## Python3 Implementation
+
+We first use CLNF models to detect faces in an image frame. The model also predicts the face orientation, which helps us in evaluating a 3D box aorund the face, the corrdinates of which are later used for localizing the tongue in 3D.
+
+Constrained Local Neural Fields have been explained in depth in the original research article [here](https://arxiv.org/pdf/1611.08657.pdf)
+Face counting has been included in the code. The finetuned model can be downloaded from [here]()
+```
+python3 facial_landmarks_video.py --shape-predictor shape_predictor_68_face_landmarks_finetuned.dat
+
+```
+![Shape Detector](image.png)
+
+A 3D visualization has been shown below. This is what we use later.
+
+![3D](3d.png)
+
+## MATLAB Implementation
+There are two MATLAB codes in this repo. You need to execute `tracking_tongue.m` first and then `tracking_in_3d.m`. You would need to change the name of the video file for execution. This code is meant for video-based tongue tracking and can be extended to real-time video feeds.
+
+![3D-R](10.png)
+The 3D visualization should look something like this. These are the 3D coordinates of the tongue tip, which is tracked using Gunner Farneback's Optical Flow Method. The detected tongue tips are shown below:
+![3D-R](9.png)
+
+For any queries, please feel free to contact me at `[email protected]`
@@ -0,0 +1,161 @@
+#!/usr/bin/env python
+
+"""
+From https://opencv-python-tutroals.readthedocs.org/en/latest/py_tutorials/py_calib3d/py_calibration/py_calibration.html#calibration
+
+Calling:
+cameracalib.py  <folder> <image type> <num rows> <num cols> <cell dimension>
+
+like cameracalib.py folder_name png
+
+--h for help
+"""
+
+import numpy as np
+import cv2
+import glob
+import sys
+import argparse
+
+#---------------------- SET THE PARAMETERS
+nRows = 8
+nCols = 8
+dimension = 20 #- mm
+
+
+workingFolder   = "./camera_01"
+imageType       = 'jpg'
+#------------------------------------------
+
+# termination criteria
+criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, dimension, 0.001)
+
+# prepare object points, like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0)
+objp = np.zeros((nRows*nCols,3), np.float32)
+objp[:,:2] = np.mgrid[0:nCols,0:nRows].T.reshape(-1,2)
+
+# Arrays to store object points and image points from all the images.
+objpoints = [] # 3d point in real world space
+imgpoints = [] # 2d points in image plane.
+
+if len(sys.argv) < 6:
+        print("\n Not enough inputs are provided. Using the default values.\n\n" \
+              " type -h for help")
+else:
+    workingFolder   = sys.argv[1]
+    imageType       = sys.argv[2]
+    nRows           = int(sys.argv[3])
+    nCols           = int(sys.argv[4])
+    dimension       = float(sys.argv[5])
+
+if '-h' in sys.argv or '--h' in sys.argv:
+    print("\n IMAGE CALIBRATION GIVEN A SET OF IMAGES")
+    print(" call: python cameracalib.py <folder> <image type> <num rows (9)> <num cols (6)> <cell dimension (25)>")
+    print("\n The script will look for every image in the provided folder and will show the pattern found." \
+          " User can skip the image pressing ESC or accepting the image with RETURN. " \
+          " At the end the end the following files are created:" \
+          "  - cameraDistortion.txt" \
+          "  - cameraMatrix.txt \n\n")
+
+    sys.exit()
+
+# Find the images files
+filename    = workingFolder + "/*." + imageType
+images      = glob.glob(filename)
+
+print(len(images))
+if len(images) < 9:
+    print("Not enough images were found: at least 9 shall be provided!!!")
+    sys.exit()
+
+
+
+else:
+    nPatternFound = 0
+    imgNotGood = images[1]
+
+    for fname in images:
+        if 'calibresult' in fname: continue
+        #-- Read the file and convert in greyscale
+        img     = cv2.imread(fname)
+        gray    = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
+
+        print("Reading image ", fname)
+
+        # Find the chess board corners
+        ret, corners = cv2.findChessboardCorners(gray, (nCols,nRows),None)
+
+        # If found, add object points, image points (after refining them)
+        if ret == True:
+            print("Pattern found! Press ESC to skip or ENTER to accept")
+            #--- Sometimes, Harris cornes fails with crappy pictures, so
+            corners2 = cv2.cornerSubPix(gray,corners,(11,11),(-1,-1),criteria)
+
+            # Draw and display the corners
+            cv2.drawChessboardCorners(img, (nCols,nRows), corners2,ret)
+            cv2.imshow('img',img)
+            # cv2.waitKey(0)
+            k = cv2.waitKey(0) & 0xFF
+            if k == 27: #-- ESC Button
+                print("Image Skipped")
+                imgNotGood = fname
+                continue
+
+            print("Image accepted")
+            nPatternFound += 1
+            objpoints.append(objp)
+            imgpoints.append(corners2)
+
+            # cv2.waitKey(0)
+        else:
+            imgNotGood = fname
+
+
+cv2.destroyAllWindows()
+
+if (nPatternFound > 1):
+    print("Found %d good images" % (nPatternFound))
+    ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(objpoints, imgpoints, gray.shape[::-1],None,None)
+
+    # Undistort an image
+    img = cv2.imread(imgNotGood)
+    h,  w = img.shape[:2]
+    print("Image to undistort: ", imgNotGood)
+    newcameramtx, roi=cv2.getOptimalNewCameraMatrix(mtx,dist,(w,h),1,(w,h))
+
+    # undistort
+    mapx,mapy = cv2.initUndistortRectifyMap(mtx,dist,None,newcameramtx,(w,h),5)
+    dst = cv2.remap(img,mapx,mapy,cv2.INTER_LINEAR)
+
+    # crop the image
+    x,y,w,h = roi
+    dst = dst[y:y+h, x:x+w]
+    print("ROI: ", x, y, w, h)
+
+    cv2.imwrite(workingFolder + "/calibresult.png",dst)
+    print("Calibrated picture saved as calibresult.png")
+    print("Calibration Matrix: ")
+    print(mtx)
+    print("Disortion: ", dist)
+
+    #--------- Save result
+    filename = workingFolder + "/cameraMatrix.txt"
+    np.savetxt(filename, mtx, delimiter=',')
+    filename = workingFolder + "/cameraDistortion.txt"
+    np.savetxt(filename, dist, delimiter=',')
+
+    mean_error = 0
+    for i in xrange(len(objpoints)):
+        imgpoints2, _ = cv2.projectPoints(objpoints[i], rvecs[i], tvecs[i], mtx, dist)
+        error = cv2.norm(imgpoints[i],imgpoints2, cv2.NORM_L2)/len(imgpoints2)
+        mean_error += error
+
+    print("total error: ", mean_error/len(objpoints))
+
+else:
+    print("In order to calibrate you need at least 9 good pictures... try again")
+
+
+
+
+
@@ -0,0 +1,101 @@
+# USAGE
+# python facial_landmarks_video.py --shape-predictor shape_predictor_68_face_landmarks_finetuned.dat
+from imutils import face_utils
+import numpy as np
+import argparse
+import imutils
+import dlib
+import cv2
+
+# construct the argument parser and parse the arguments
+ap = argparse.ArgumentParser()
+ap.add_argument("-p", "--shape-predictor", required=True,
+	help="path to facial landmark predictor")
+args = vars(ap.parse_args())
+
+# initialize dlib's face detector (HOG-based) and then create
+# the facial landmark predictor
+detector = dlib.get_frontal_face_detector()
+predictor = dlib.shape_predictor(args["shape_predictor"])
+
+# define the arrays for further appending the coordinates
+mouth_array_x = []
+mouth_array_y = []
+
+cap = cv2.VideoCapture('proefpersoon 2_M.avi')
+frame_count = 0
+frame_count_arr = []
+while(True):
+	# Capture image-by-image
+	ret, image = cap.read()
+	image = imutils.resize(image, width=500)
+	# Our operations on the image come here
+	#image = imutils.resize(image, width=500)
+	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+ 
+# detect faces in the grayscale image
+	rects = detector(gray, 1)
+
+	for (i, rect) in enumerate(rects):
+	# determine the facial landmarks for the face region, then
+	# convert the facial landmark (x, y)-coordinates to a NumPy
+	# array
+		shape = predictor(gray, rect)
+		shape = face_utils.shape_to_np(shape)
+		for (x, y) in shape[48:49]:
+			mouth_array_x.append(x)
+			frame_count = frame_count + 1
+			frame_count_arr.append(frame_count)
+			mouth_array_y.append(y)
+
+		# convert dlib's rectangle to a OpenCV-style bounding box
+		# [i.e., (x, y, w, h)], then draw the face bounding box
+		(x, y, w, h) = face_utils.rect_to_bb(rect)
+		cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
+
+		# show the face number
+		cv2.putText(image, "Face #{}".format(i + 1), (x - 10, y - 10),
+			cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+
+		# loop over the (x, y)-coordinates for the facial landmarks
+		# and draw them on the image
+		for (x, y) in shape:
+			cv2.circle(image, (x, y), 3, (0, 0, 255), -1)
+	cv2.imwrite('image.png',image)
+	cv2.imshow('image',image)
+	if cv2.waitKey(1) & 0xFF == ord('q'):
+		break
+
+# When everything done, release the capture
+cap.release()
+cv2.destroyAllWindows()
+
+print(len(frame_count_arr))
+print(len(mouth_array_x))
+
+# Plotting the results for estimation
+
+import matplotlib
+import matplotlib.pyplot as plt
+from scipy.signal import medfilt, find_peaks
+
+y = mouth_array_y
+x = mouth_array_x/np.sum(mouth_array_x)
+peak_estimates = find_peaks(x)
+
+print (peak_estimates[0])
+array_len = len(peak_estimates[0])
+
+fig = plt.figure()
+ax = plt.subplot(111)
+ax.plot(frame_count_arr,medfilt(x), label='Relative Motion of X-Coordinates')
+plt.title('Graphical Representation')
+ax.legend()
+fig.savefig('plot_x.png')
+
+fig = plt.figure()
+ax = plt.subplot(111)
+ax.plot(frame_count_arr,medfilt(y), label='Relative Motion of Y-Coordinates')
+plt.title('Graphical Representation')
+ax.legend()
+fig.savefig('plot_y.png')
@@ -0,0 +1,29 @@
+clearvars -except cameraParams camPoses mpoints lpoints rpoints
+
+figure;
+plotCamera(camPoses, 'Size', 0.2);
+
+for i=1:length(mpoints)
+points = [lpoints(i,1),lpoints(i,2);mpoints(i,1),mpoints(i,2);rpoints(i,1),rpoints(i,2)];
+viewIDs = [1 2 3];
+tracks(i) = pointTrack(viewIDs,points);
+xyzPoints(i,:) = triangulateMultiview(tracks(i), camPoses, cameraParams); 
+end
+grid on
+% Refine the 3-D world points and camera poses.
+ [xyzPoints, camPoses, reprojectionErrors] = bundleAdjustment(xyzPoints, ...
+         tracks, camPoses, cameraParams);
+    
+% Display the dense 3-D world points.
+pcshow(xyzPoints, 'VerticalAxis', 'y', 'VerticalAxisDir', 'down', ...
+    'MarkerSize', 45);
+hold on
+% Specify the viewing volume.
+loc1 = camPoses.Location{1};
+xlim([loc1(1)-5, loc1(1)+4]);
+ylim([loc1(2)-5, loc1(2)+4]);
+zlim([loc1(3)-1, loc1(3)+20]);
+camorbit(0, -30);
+
+title('Dense Reconstruction');
+hold off
@@ -0,0 +1,52 @@
+close all
+clear variables
+
+
+opticFlow = opticalFlowFarneback;
+
+ 
+r = [367.5 350.5 361 365];  %midtest
+%r = [135.5 479.5 367 261]; %ltest
+%r= [595.5 431.5 412 302]; %rtest
+frameRGB = readFrame(vidReader);
+frameRGB = imcrop(frameRGB,r);
+frameGray = rgb2gray(frameRGB);
+pointTracker = vision.PointTracker('MaxBidirectionalError',1);
+
+cp=1;
+rp=1;
+initialize(pointTracker,[cp rp],frameGray);
+count=1;
+while hasFrame(vidReader)
+    frameRGB1 = readFrame(vidReader);
+    frameRGB = imcrop(frameRGB1,r);
+    frameGray = rgb2gray(frameRGB);
+    frameGray=imsharpen(frameGray);
+    %frameGray = adapthisteq(frameGray,'clipLimit',0.01,'Distribution','rayleigh');
+    flow = estimateFlow(opticFlow,frameGray);
+    [row,col]=find(flow.Magnitude == max(flow.Magnitude(:)));
+    imshow(frameGray)
+    if max(flow.Magnitude(:))>=6
+        cp=col;
+        rp=row;
+        release(pointTracker);
+        initialize(pointTracker,[cp rp],frameGray)
+        
+    else
+     
+        [pt,point_validity] = pointTracker(frameGray);
+        cp=pt(1);
+        rp=pt(2);
+    end
+    hold on
+    plot(cp, rp, 'ro', 'MarkerSize', 30);
+
+    hold off
+    drawnow
+
+    prevc=cp;
+    prevr=rp;
+    mpoints(count,1)=cp;
+    mpoints(count,2)=rp;
+    count=count+1;
+end