Skip to content

Commit 89c1cef

Browse files
committed
first commit
0 parents  commit 89c1cef

12 files changed

+377
-0
lines changed

10.png

61.9 KB
Loading

3d.png

615 KB
Loading

9.png

489 KB
Loading

README.md

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Optical Flow based Tongue Tip Tracking in 3D
2+
3+
We present a novel method for tracking the tip of tongues in 3-dimensions for medical applications. The repository also includes a report that explains the algorithm in detail and the motivation behind the project.
4+
5+
6+
## Requirements
7+
8+
* `MATLAB 2018b` (It could also work on lower/higher versions)
9+
* `Python3 Packages` such as opencv, matplotlib, dlib, imutils
10+
11+
## Python3 Implementation
12+
13+
We first use CLNF models to detect faces in an image frame. The model also predicts the face orientation, which helps us in evaluating a 3D box aorund the face, the corrdinates of which are later used for localizing the tongue in 3D.
14+
15+
Constrained Local Neural Fields have been explained in depth in the original research article [here](https://arxiv.org/pdf/1611.08657.pdf)
16+
Face counting has been included in the code. The finetuned model can be downloaded from [here]()
17+
```
18+
python3 facial_landmarks_video.py --shape-predictor shape_predictor_68_face_landmarks_finetuned.dat
19+
20+
```
21+
![Shape Detector](image.png)
22+
23+
A 3D visualization has been shown below. This is what we use later.
24+
25+
![3D](3d.png)
26+
27+
## MATLAB Implementation
28+
There are two MATLAB codes in this repo. You need to execute `tracking_tongue.m` first and then `tracking_in_3d.m`. You would need to change the name of the video file for execution. This code is meant for video-based tongue tracking and can be extended to real-time video feeds.
29+
30+
![3D-R](10.png)
31+
The 3D visualization should look something like this. These are the 3D coordinates of the tongue tip, which is tracked using Gunner Farneback's Optical Flow Method. The detected tongue tips are shown below:
32+
![3D-R](9.png)
33+
34+
For any queries, please feel free to contact me at `[email protected]`

calib-camera.py

+161
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
#!/usr/bin/env python
2+
3+
"""
4+
From https://opencv-python-tutroals.readthedocs.org/en/latest/py_tutorials/py_calib3d/py_calibration/py_calibration.html#calibration
5+
6+
Calling:
7+
cameracalib.py <folder> <image type> <num rows> <num cols> <cell dimension>
8+
9+
like cameracalib.py folder_name png
10+
11+
--h for help
12+
"""
13+
14+
import numpy as np
15+
import cv2
16+
import glob
17+
import sys
18+
import argparse
19+
20+
#---------------------- SET THE PARAMETERS
21+
nRows = 8
22+
nCols = 8
23+
dimension = 20 #- mm
24+
25+
26+
workingFolder = "./camera_01"
27+
imageType = 'jpg'
28+
#------------------------------------------
29+
30+
# termination criteria
31+
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, dimension, 0.001)
32+
33+
# prepare object points, like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0)
34+
objp = np.zeros((nRows*nCols,3), np.float32)
35+
objp[:,:2] = np.mgrid[0:nCols,0:nRows].T.reshape(-1,2)
36+
37+
# Arrays to store object points and image points from all the images.
38+
objpoints = [] # 3d point in real world space
39+
imgpoints = [] # 2d points in image plane.
40+
41+
if len(sys.argv) < 6:
42+
print("\n Not enough inputs are provided. Using the default values.\n\n" \
43+
" type -h for help")
44+
else:
45+
workingFolder = sys.argv[1]
46+
imageType = sys.argv[2]
47+
nRows = int(sys.argv[3])
48+
nCols = int(sys.argv[4])
49+
dimension = float(sys.argv[5])
50+
51+
if '-h' in sys.argv or '--h' in sys.argv:
52+
print("\n IMAGE CALIBRATION GIVEN A SET OF IMAGES")
53+
print(" call: python cameracalib.py <folder> <image type> <num rows (9)> <num cols (6)> <cell dimension (25)>")
54+
print("\n The script will look for every image in the provided folder and will show the pattern found." \
55+
" User can skip the image pressing ESC or accepting the image with RETURN. " \
56+
" At the end the end the following files are created:" \
57+
" - cameraDistortion.txt" \
58+
" - cameraMatrix.txt \n\n")
59+
60+
sys.exit()
61+
62+
# Find the images files
63+
filename = workingFolder + "/*." + imageType
64+
images = glob.glob(filename)
65+
66+
print(len(images))
67+
if len(images) < 9:
68+
print("Not enough images were found: at least 9 shall be provided!!!")
69+
sys.exit()
70+
71+
72+
73+
else:
74+
nPatternFound = 0
75+
imgNotGood = images[1]
76+
77+
for fname in images:
78+
if 'calibresult' in fname: continue
79+
#-- Read the file and convert in greyscale
80+
img = cv2.imread(fname)
81+
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
82+
83+
print("Reading image ", fname)
84+
85+
# Find the chess board corners
86+
ret, corners = cv2.findChessboardCorners(gray, (nCols,nRows),None)
87+
88+
# If found, add object points, image points (after refining them)
89+
if ret == True:
90+
print("Pattern found! Press ESC to skip or ENTER to accept")
91+
#--- Sometimes, Harris cornes fails with crappy pictures, so
92+
corners2 = cv2.cornerSubPix(gray,corners,(11,11),(-1,-1),criteria)
93+
94+
# Draw and display the corners
95+
cv2.drawChessboardCorners(img, (nCols,nRows), corners2,ret)
96+
cv2.imshow('img',img)
97+
# cv2.waitKey(0)
98+
k = cv2.waitKey(0) & 0xFF
99+
if k == 27: #-- ESC Button
100+
print("Image Skipped")
101+
imgNotGood = fname
102+
continue
103+
104+
print("Image accepted")
105+
nPatternFound += 1
106+
objpoints.append(objp)
107+
imgpoints.append(corners2)
108+
109+
# cv2.waitKey(0)
110+
else:
111+
imgNotGood = fname
112+
113+
114+
cv2.destroyAllWindows()
115+
116+
if (nPatternFound > 1):
117+
print("Found %d good images" % (nPatternFound))
118+
ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(objpoints, imgpoints, gray.shape[::-1],None,None)
119+
120+
# Undistort an image
121+
img = cv2.imread(imgNotGood)
122+
h, w = img.shape[:2]
123+
print("Image to undistort: ", imgNotGood)
124+
newcameramtx, roi=cv2.getOptimalNewCameraMatrix(mtx,dist,(w,h),1,(w,h))
125+
126+
# undistort
127+
mapx,mapy = cv2.initUndistortRectifyMap(mtx,dist,None,newcameramtx,(w,h),5)
128+
dst = cv2.remap(img,mapx,mapy,cv2.INTER_LINEAR)
129+
130+
# crop the image
131+
x,y,w,h = roi
132+
dst = dst[y:y+h, x:x+w]
133+
print("ROI: ", x, y, w, h)
134+
135+
cv2.imwrite(workingFolder + "/calibresult.png",dst)
136+
print("Calibrated picture saved as calibresult.png")
137+
print("Calibration Matrix: ")
138+
print(mtx)
139+
print("Disortion: ", dist)
140+
141+
#--------- Save result
142+
filename = workingFolder + "/cameraMatrix.txt"
143+
np.savetxt(filename, mtx, delimiter=',')
144+
filename = workingFolder + "/cameraDistortion.txt"
145+
np.savetxt(filename, dist, delimiter=',')
146+
147+
mean_error = 0
148+
for i in xrange(len(objpoints)):
149+
imgpoints2, _ = cv2.projectPoints(objpoints[i], rvecs[i], tvecs[i], mtx, dist)
150+
error = cv2.norm(imgpoints[i],imgpoints2, cv2.NORM_L2)/len(imgpoints2)
151+
mean_error += error
152+
153+
print("total error: ", mean_error/len(objpoints))
154+
155+
else:
156+
print("In order to calibrate you need at least 9 good pictures... try again")
157+
158+
159+
160+
161+

facial_landmarks_video.py

+101
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
# USAGE
2+
# python facial_landmarks_video.py --shape-predictor shape_predictor_68_face_landmarks_finetuned.dat
3+
from imutils import face_utils
4+
import numpy as np
5+
import argparse
6+
import imutils
7+
import dlib
8+
import cv2
9+
10+
# construct the argument parser and parse the arguments
11+
ap = argparse.ArgumentParser()
12+
ap.add_argument("-p", "--shape-predictor", required=True,
13+
help="path to facial landmark predictor")
14+
args = vars(ap.parse_args())
15+
16+
# initialize dlib's face detector (HOG-based) and then create
17+
# the facial landmark predictor
18+
detector = dlib.get_frontal_face_detector()
19+
predictor = dlib.shape_predictor(args["shape_predictor"])
20+
21+
# define the arrays for further appending the coordinates
22+
mouth_array_x = []
23+
mouth_array_y = []
24+
25+
cap = cv2.VideoCapture('proefpersoon 2_M.avi')
26+
frame_count = 0
27+
frame_count_arr = []
28+
while(True):
29+
# Capture image-by-image
30+
ret, image = cap.read()
31+
image = imutils.resize(image, width=500)
32+
# Our operations on the image come here
33+
#image = imutils.resize(image, width=500)
34+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
35+
36+
# detect faces in the grayscale image
37+
rects = detector(gray, 1)
38+
39+
for (i, rect) in enumerate(rects):
40+
# determine the facial landmarks for the face region, then
41+
# convert the facial landmark (x, y)-coordinates to a NumPy
42+
# array
43+
shape = predictor(gray, rect)
44+
shape = face_utils.shape_to_np(shape)
45+
for (x, y) in shape[48:49]:
46+
mouth_array_x.append(x)
47+
frame_count = frame_count + 1
48+
frame_count_arr.append(frame_count)
49+
mouth_array_y.append(y)
50+
51+
# convert dlib's rectangle to a OpenCV-style bounding box
52+
# [i.e., (x, y, w, h)], then draw the face bounding box
53+
(x, y, w, h) = face_utils.rect_to_bb(rect)
54+
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
55+
56+
# show the face number
57+
cv2.putText(image, "Face #{}".format(i + 1), (x - 10, y - 10),
58+
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
59+
60+
# loop over the (x, y)-coordinates for the facial landmarks
61+
# and draw them on the image
62+
for (x, y) in shape:
63+
cv2.circle(image, (x, y), 3, (0, 0, 255), -1)
64+
cv2.imwrite('image.png',image)
65+
cv2.imshow('image',image)
66+
if cv2.waitKey(1) & 0xFF == ord('q'):
67+
break
68+
69+
# When everything done, release the capture
70+
cap.release()
71+
cv2.destroyAllWindows()
72+
73+
print(len(frame_count_arr))
74+
print(len(mouth_array_x))
75+
76+
# Plotting the results for estimation
77+
78+
import matplotlib
79+
import matplotlib.pyplot as plt
80+
from scipy.signal import medfilt, find_peaks
81+
82+
y = mouth_array_y
83+
x = mouth_array_x/np.sum(mouth_array_x)
84+
peak_estimates = find_peaks(x)
85+
86+
print (peak_estimates[0])
87+
array_len = len(peak_estimates[0])
88+
89+
fig = plt.figure()
90+
ax = plt.subplot(111)
91+
ax.plot(frame_count_arr,medfilt(x), label='Relative Motion of X-Coordinates')
92+
plt.title('Graphical Representation')
93+
ax.legend()
94+
fig.savefig('plot_x.png')
95+
96+
fig = plt.figure()
97+
ax = plt.subplot(111)
98+
ax.plot(frame_count_arr,medfilt(y), label='Relative Motion of Y-Coordinates')
99+
plt.title('Graphical Representation')
100+
ax.legend()
101+
fig.savefig('plot_y.png')

image.png

254 KB
Loading

ipcv_report.pdf

5.39 MB
Binary file not shown.

plot_x.png

28.2 KB
Loading

plot_y.png

24.2 KB
Loading

tracking_in_3d.m

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
clearvars -except cameraParams camPoses mpoints lpoints rpoints
2+
3+
figure;
4+
plotCamera(camPoses, 'Size', 0.2);
5+
6+
for i=1:length(mpoints)
7+
points = [lpoints(i,1),lpoints(i,2);mpoints(i,1),mpoints(i,2);rpoints(i,1),rpoints(i,2)];
8+
viewIDs = [1 2 3];
9+
tracks(i) = pointTrack(viewIDs,points);
10+
xyzPoints(i,:) = triangulateMultiview(tracks(i), camPoses, cameraParams);
11+
end
12+
grid on
13+
% Refine the 3-D world points and camera poses.
14+
[xyzPoints, camPoses, reprojectionErrors] = bundleAdjustment(xyzPoints, ...
15+
tracks, camPoses, cameraParams);
16+
17+
% Display the dense 3-D world points.
18+
pcshow(xyzPoints, 'VerticalAxis', 'y', 'VerticalAxisDir', 'down', ...
19+
'MarkerSize', 45);
20+
hold on
21+
% Specify the viewing volume.
22+
loc1 = camPoses.Location{1};
23+
xlim([loc1(1)-5, loc1(1)+4]);
24+
ylim([loc1(2)-5, loc1(2)+4]);
25+
zlim([loc1(3)-1, loc1(3)+20]);
26+
camorbit(0, -30);
27+
28+
title('Dense Reconstruction');
29+
hold off

tracking_tongue.m

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
close all
2+
clear variables
3+
4+
5+
opticFlow = opticalFlowFarneback;
6+
7+
8+
r = [367.5 350.5 361 365]; %midtest
9+
%r = [135.5 479.5 367 261]; %ltest
10+
%r= [595.5 431.5 412 302]; %rtest
11+
frameRGB = readFrame(vidReader);
12+
frameRGB = imcrop(frameRGB,r);
13+
frameGray = rgb2gray(frameRGB);
14+
pointTracker = vision.PointTracker('MaxBidirectionalError',1);
15+
16+
cp=1;
17+
rp=1;
18+
initialize(pointTracker,[cp rp],frameGray);
19+
count=1;
20+
while hasFrame(vidReader)
21+
frameRGB1 = readFrame(vidReader);
22+
frameRGB = imcrop(frameRGB1,r);
23+
frameGray = rgb2gray(frameRGB);
24+
frameGray=imsharpen(frameGray);
25+
%frameGray = adapthisteq(frameGray,'clipLimit',0.01,'Distribution','rayleigh');
26+
flow = estimateFlow(opticFlow,frameGray);
27+
[row,col]=find(flow.Magnitude == max(flow.Magnitude(:)));
28+
imshow(frameGray)
29+
if max(flow.Magnitude(:))>=6
30+
cp=col;
31+
rp=row;
32+
release(pointTracker);
33+
initialize(pointTracker,[cp rp],frameGray)
34+
35+
else
36+
37+
[pt,point_validity] = pointTracker(frameGray);
38+
cp=pt(1);
39+
rp=pt(2);
40+
end
41+
hold on
42+
plot(cp, rp, 'ro', 'MarkerSize', 30);
43+
44+
hold off
45+
drawnow
46+
47+
prevc=cp;
48+
prevr=rp;
49+
mpoints(count,1)=cp;
50+
mpoints(count,2)=rp;
51+
count=count+1;
52+
end

0 commit comments

Comments
 (0)