diff --git a/README.md b/README.md index 6a9b43c3..7c69c721 100644 --- a/README.md +++ b/README.md @@ -17,8 +17,6 @@ In this final project, you will implement the missing parts in the schematic. To * Linux: make is installed by default on most Linux distros * Mac: [install Xcode command line tools to get make](https://developer.apple.com/xcode/features/) * Windows: [Click here for installation instructions](http://gnuwin32.sourceforge.net/packages/make.htm) -* Git LFS - * Weight files are handled using [LFS](https://git-lfs.github.com/) * OpenCV >= 4.1 * This must be compiled from source using the `-D OPENCV_ENABLE_NONFREE=ON` cmake flag for testing the SIFT and SURF detectors. * The OpenCV 4.1.0 source code can be found [here](https://github.com/opencv/opencv/tree/4.1.0) @@ -32,4 +30,4 @@ In this final project, you will implement the missing parts in the schematic. To 1. Clone this repo. 2. Make a build directory in the top level project directory: `mkdir build && cd build` 3. Compile: `cmake .. && make` -4. Run it: `./3D_object_tracking`. +4. Run it: `./3D_object_tracking`. \ No newline at end of file diff --git a/src/FinalProject_Camera.cpp b/src/FinalProject_Camera.cpp index fd222e89..34a644e6 100644 --- a/src/FinalProject_Camera.cpp +++ b/src/FinalProject_Camera.cpp @@ -129,18 +129,13 @@ int main(int argc, const char *argv[]) clusterLidarWithROI((dataBuffer.end()-1)->boundingBoxes, (dataBuffer.end() - 1)->lidarPoints, shrinkFactor, P_rect_00, R_rect_00, RT); // Visualize 3D objects - bVis = true; + bVis = false; if(bVis) { show3DObjects((dataBuffer.end()-1)->boundingBoxes, cv::Size(4.0, 20.0), cv::Size(2000, 2000), true); } - bVis = false; cout << "#4 : CLUSTER LIDAR POINT CLOUD done" << endl; - - - // REMOVE THIS LINE BEFORE PROCEEDING WITH THE FINAL PROJECT - continue; // skips directly to the next image without processing what comes beneath /* DETECT IMAGE KEYPOINTS */ @@ -150,7 +145,7 @@ int main(int argc, const char *argv[]) // extract 2D keypoints from current image vector keypoints; // create empty feature list for current image - string detectorType = "SHITOMASI"; + string detectorType = "FAST"; if (detectorType.compare("SHITOMASI") == 0) { @@ -158,7 +153,7 @@ int main(int argc, const char *argv[]) } else { - //... + detKeypointsModern(keypoints, imgGray, detectorType, false); } // optional : limit number of keypoints (helpful for debugging and learning) @@ -184,7 +179,7 @@ int main(int argc, const char *argv[]) /* EXTRACT KEYPOINT DESCRIPTORS */ cv::Mat descriptors; - string descriptorType = "BRISK"; // BRISK, BRIEF, ORB, FREAK, AKAZE, SIFT + string descriptorType = "BRIEF"; // BRISK, BRIEF, ORB, FREAK, AKAZE, SIFT descKeypoints((dataBuffer.end() - 1)->keypoints, (dataBuffer.end() - 1)->cameraImg, descriptors, descriptorType); // push descriptors for current frame to end of data buffer diff --git a/src/camFusion.hpp b/src/camFusion.hpp index eca20a51..4786a38d 100644 --- a/src/camFusion.hpp +++ b/src/camFusion.hpp @@ -11,11 +11,10 @@ void clusterLidarWithROI(std::vector &boundingBoxes, std::vector &lidarPoints, float shrinkFactor, cv::Mat &P_rect_xx, cv::Mat &R_rect_xx, cv::Mat &RT); void clusterKptMatchesWithROI(BoundingBox &boundingBox, std::vector &kptsPrev, std::vector &kptsCurr, std::vector &kptMatches); void matchBoundingBoxes(std::vector &matches, std::map &bbBestMatches, DataFrame &prevFrame, DataFrame &currFrame); - void show3DObjects(std::vector &boundingBoxes, cv::Size worldSize, cv::Size imageSize, bool bWait=true); - void computeTTCCamera(std::vector &kptsPrev, std::vector &kptsCurr, std::vector kptMatches, double frameRate, double &TTC, cv::Mat *visImg=nullptr); void computeTTCLidar(std::vector &lidarPointsPrev, - std::vector &lidarPointsCurr, double frameRate, double &TTC); + std::vector &lidarPointsCurr, double frameRate, double &TTC); #endif /* camFusion_hpp */ + diff --git a/src/camFusion_Student.cpp b/src/camFusion_Student.cpp index 4e6ab1f9..0f0f3ffe 100644 --- a/src/camFusion_Student.cpp +++ b/src/camFusion_Student.cpp @@ -4,7 +4,6 @@ #include #include #include - #include "camFusion.hpp" #include "dataStructures.h" @@ -107,11 +106,11 @@ void show3DObjects(std::vector &boundingBoxes, cv::Size worldSize, cv::rectangle(topviewImg, cv::Point(left, top), cv::Point(right, bottom),cv::Scalar(0,0,0), 2); // augment object with some key data - char str1[200], str2[200]; + char str1[200], str2[200], str3[200]; sprintf(str1, "id=%d, #pts=%d", it1->boxID, (int)it1->lidarPoints.size()); - putText(topviewImg, str1, cv::Point2f(left-250, bottom+50), cv::FONT_ITALIC, 2, currColor); + putText(topviewImg, str1, cv::Point2f(left-250, bottom+50), cv::FONT_ITALIC, 1, currColor); sprintf(str2, "xmin=%2.2f m, yw=%2.2f m", xwmin, ywmax-ywmin); - putText(topviewImg, str2, cv::Point2f(left-250, bottom+125), cv::FONT_ITALIC, 2, currColor); + putText(topviewImg, str2, cv::Point2f(left-250, bottom+125), cv::FONT_ITALIC, 1, currColor); } // plot distance markers @@ -129,35 +128,202 @@ void show3DObjects(std::vector &boundingBoxes, cv::Size worldSize, cv::imshow(windowName, topviewImg); if(bWait) - { + { cv::waitKey(0); // wait for key to be pressed } } - // associate a given bounding box with the keypoints it contains void clusterKptMatchesWithROI(BoundingBox &boundingBox, std::vector &kptsPrev, std::vector &kptsCurr, std::vector &kptMatches) { - // ... -} + double distance_mean = 0.0; + double size = 0.0; + for (auto it = kptMatches.begin(); it != kptMatches.end(); it++) + { + cv::KeyPoint prev_point = kptsPrev[it->queryIdx]; + cv::KeyPoint curr_point = kptsCurr[it->trainIdx]; + + if (boundingBox.roi.contains(curr_point.pt)) + { + distance_mean += cv::norm(curr_point.pt - prev_point.pt); + size += 1; + } + } + distance_mean = distance_mean / size; + // Filtering + for (auto it = kptMatches.begin(); it != kptMatches.end(); it++) + { + cv::KeyPoint prev_point = kptsPrev[it->queryIdx]; + cv::KeyPoint curr_point = kptsCurr[it->trainIdx]; + + if(boundingBox.roi.contains(curr_point.pt)) + { + double curr_distance = cv::norm(curr_point.pt - prev_point.pt); + + if (curr_distance < distance_mean * 1.3) + { + boundingBox.keypoints.push_back(curr_point); + boundingBox.kptMatches.push_back(*it); + } + } + } +} // Compute time-to-collision (TTC) based on keypoint correspondences in successive images -void computeTTCCamera(std::vector &kptsPrev, std::vector &kptsCurr, +void computeTTCCamera(std::vector &kptsPrev, std::vector &kptsCurr, std::vector kptMatches, double frameRate, double &TTC, cv::Mat *visImg) { - // ... -} + // compute distance ratios between all matched keypoints + vector distRatios; // stores the distance ratios for all keypoints between curr. and prev. frame + for (auto it1 = kptMatches.begin(); it1 != kptMatches.end() - 1; ++it1) + { // outer kpt. loop + + // get current keypoint and its matched partner in the prev. frame + cv::KeyPoint kpOuterCurr = kptsCurr.at(it1->trainIdx); + cv::KeyPoint kpOuterPrev = kptsPrev.at(it1->queryIdx); + + for (auto it2 = kptMatches.begin() + 1; it2 != kptMatches.end(); ++it2) + { // inner kpt.-loop + + double minDist = 100.0; // min. required distance + + // get next keypoint and its matched partner in the prev. frame + cv::KeyPoint kpInnerCurr = kptsCurr.at(it2->trainIdx); + cv::KeyPoint kpInnerPrev = kptsPrev.at(it2->queryIdx); + // compute distances and distance ratios + double distCurr = cv::norm(kpOuterCurr.pt - kpInnerCurr.pt); + double distPrev = cv::norm(kpOuterPrev.pt - kpInnerPrev.pt); + + if (distPrev > std::numeric_limits::epsilon() && distCurr >= minDist) + { // avoid division by zero + + double distRatio = distCurr / distPrev; + distRatios.push_back(distRatio); + } + } // eof inner loop over all matched kpts + } // eof outer loop over all matched kpts + + // only continue if list of distance ratios is not empty + if (distRatios.size() == 0) + { + TTC = NAN; + return; + } + + std::sort(distRatios.begin(), distRatios.end()); + long medIndex = floor(distRatios.size() / 2.0); + double medDistRatio = distRatios.size() % 2 == 0 ? (distRatios[medIndex - 1] + distRatios[medIndex]) / 2.0 : distRatios[medIndex]; // compute median dist. ratio to remove outlier influence + + double dT = 1 / frameRate; + TTC = -dT / (1 - medDistRatio); + + cout << "-------------------------------" << endl; + cout << "CAMERA TTC" << endl; + cout << "Camera time to colision: " << TTC << " s" << endl; + //cout << "Dist curr: " << distCurr << " m" << endl; + //cout << "Dist prev: " << distPrev << " m" << endl; + cout << "-------------------------------" << endl; +} void computeTTCLidar(std::vector &lidarPointsPrev, std::vector &lidarPointsCurr, double frameRate, double &TTC) { - // ... -} + // aux variables + double dT = 1.0 / frameRate; // time between two measurements [s] + + // find closest distance to Lidar points + double minXPrev = 1e9, minXCurr = 1e9; + vector prev_vector; + for (auto it = lidarPointsPrev.begin(); it != lidarPointsPrev.end(); it++) + { + prev_vector.push_back(it->x); + } + sort(prev_vector.begin(), prev_vector.end()); + minXPrev = prev_vector[prev_vector.size()*1/5]; + + vector curr_vector; + for (auto it = lidarPointsCurr.begin(); it != lidarPointsCurr.end(); it++) + { + curr_vector.push_back(it->x); + } + sort(curr_vector.begin(), curr_vector.end()); + minXCurr = curr_vector[curr_vector.size()*1/5]; + + // Compute TTC + TTC = minXCurr* dT / (minXPrev-minXCurr); + + cout << "-------------------------------" << endl; + cout << "LIDAR TTC" << endl; + cout << "Lidar time to colision: " << TTC << " s" << endl; + cout << "minXPrev: " << minXPrev << " m" << endl; + cout << "minXCurr: " << minXCurr << " m" << endl; + cout << "-------------------------------" << endl; +} void matchBoundingBoxes(std::vector &matches, std::map &bbBestMatches, DataFrame &prevFrame, DataFrame &currFrame) { - // ... + int prevBoxSize = prevFrame.boundingBoxes.size(); + int currBoxSize = currFrame.boundingBoxes.size(); + int prevCurrBoxScore [prevBoxSize][currBoxSize] = {}; + + // iterate point matches and count matching score + for (auto it = matches.begin(); it != matches.end()-1; it++) + { + // previous point + cv::KeyPoint prev_key_point = prevFrame.keypoints[it->queryIdx]; + cv::Point prevPoint = cv::Point(prev_key_point.pt.x, prev_key_point.pt.y); + + // current point + cv::KeyPoint curr_key_point = currFrame.keypoints[it->trainIdx]; + cv::Point currPoint = cv::Point(curr_key_point.pt.x, curr_key_point.pt.y); + + // get corresponding box + std::vector prev_box_id_list, curr_box_id_list; + for (int i = 0; i < prevBoxSize; i++) + { + if (prevFrame.boundingBoxes[i].roi.contains(prevPoint)) + { + prev_box_id_list.push_back(i); + } + } + + for(int j = 0; j < currBoxSize; j++) + { + if (currFrame.boundingBoxes[j].roi.contains(currPoint)) + { + curr_box_id_list.push_back(j); + } + } + + // add count to box score + for (int i:prev_box_id_list) + { + for (int j:curr_box_id_list) + { + prevCurrBoxScore[i][j] += 1; + } + } + } + + // for each box find the one with highest score + for (int i = 0; i < prevBoxSize; i++) + { + int max_score = 0; + int best_idx = 0; + + for (int j = 0; j < currBoxSize; j++) + { + if (prevCurrBoxScore[i][j] > max_score) + { + max_score = prevCurrBoxScore [i][j]; + best_idx = j; + } + } + + bbBestMatches[i] = best_idx; + } + } diff --git a/src/lidarData.cpp b/src/lidarData.cpp index a7c331ff..93b5ecc8 100644 --- a/src/lidarData.cpp +++ b/src/lidarData.cpp @@ -121,7 +121,7 @@ void showLidarImgOverlay(cv::Mat &img, std::vector &lidarPoints, cv: Y = P_rect_xx * R_rect_xx * RT * X; cv::Point pt; - + // pixel coordinates pt.x = Y.at(0, 0) / Y.at(2, 0); pt.y = Y.at(1, 0) / Y.at(2, 0); diff --git a/src/matching2D_Student.cpp b/src/matching2D_Student.cpp index 6103686a..75ce8ecb 100644 --- a/src/matching2D_Student.cpp +++ b/src/matching2D_Student.cpp @@ -19,7 +19,14 @@ void matchDescriptors(std::vector &kPtsSource, std::vector &kPtsSource, std::vector> knn_matches; + double t = (double)cv::getTickCount(); + matcher->knnMatch(descSource, descRef, knn_matches, 2); + + t = ((double)cv::getTickCount()-t)/cv::getTickFrequency(); + cout << "(KNN) with n = " << knn_matches.size() << "matches in " << 1000*t/1.0 << "ms" << endl; + + double minDescDistRatio = 0.8; + for (auto it = knn_matches.begin(); it != knn_matches.end(); ++it) + { + if ((*it)[0].distance < minDescDistRatio*(*it)[1].distance) + { + matches.push_back((*it)[0]); + } + } } } @@ -49,11 +70,33 @@ void descKeypoints(vector &keypoints, cv::Mat &img, cv::Mat &descr extractor = cv::BRISK::create(threshold, octaves, patternScale); } - else + else if (descriptorType.compare("AKAZE") == 0) { - - //... + extractor = cv::AKAZE::create(); + } + else if (descriptorType.compare("BRIEF") == 0 ) + { + extractor = cv::xfeatures2d::BriefDescriptorExtractor::create(); + } + else if (descriptorType.compare("ORB") == 0 ) + { + extractor = cv::ORB::create (); + } + + /* + else if (descriptorType.compare("BRIEF) == 0) + { + extractor = cv::xfeatures2d::BriefDescriptorExtractor::create(); + } + else if (descriptorType.compare("ORB") == 0) + { + extractor = cv::ORB::create(); } + else if (descriptorType.compare("FREAK") == 0) + { + extractor = cv::xfeatures3d::FREAK::create(); + } + */ // perform feature description double t = (double)cv::getTickCount(); @@ -101,4 +144,85 @@ void detKeypointsShiTomasi(vector &keypoints, cv::Mat &img, bool b imshow(windowName, visImage); cv::waitKey(0); } +} + +void detKeypointsModern(std::vector& keypoints, cv::Mat& img, std::string detectorType, bool bVis) +{ + + cv::Mat visImage; + double t; + cv::Ptr detector; + //cout << detectorType << "\n"; + + // STRING-BASED SELECTION + if (detectorType.compare("AKAZE") == 0) + { + + //AKAZE PARAMETERS + /* + int descriptor_type = AKAZE::DESCRIPTOR_MLDB; + int descriptor_size = 0; + int descriptor_channels = 3, + float threshold = 0.001f; + int nOctaveLayers = 4; + int diffusivity = KAZE::DIFF_PM_G2; + */ + + detector = cv::AKAZE::create(); + //cv::Ptr detector = cv::AKAZE::create( descriptor_type, descriptor_size, descriptor_channels, + // threshold, nOctaveLayers, diffusivity ); + } + else if (detectorType.compare("BRISK") == 0) + { + int threshold = 30; // FAST/AGAST detection threshold score. + int octaves = 3; // detection octaves (use 0 to do single scale) + float patternScale = 1.0f; // apply this scale to the pattern used for sampling the neighbourhood of a keypoint. + + detector = cv::BRISK::create(threshold, octaves, patternScale); + } + else if (detectorType.compare("FAST") == 0) + { + int blockSize = 4; + double t; + int threshold = 30; + bool bNMS = true; + + cv::FastFeatureDetector::DetectorType type = cv::FastFeatureDetector::TYPE_9_16; + detector = cv::FastFeatureDetector::create(threshold, bNMS, type); + } + //else if (descriptorType.compare("HARRIS") == 0) + //{ + // + //} + else if (detectorType.compare("ORB") == 0) + { + detector = cv::ORB::create(); + } + else if (detectorType.compare("SIFT") == 0) + { + // SIFT PARAMETERS + int nfeatures = 0; + int nOctaveLayers = 3; + double contrastThreshold = 0.04; + double edgeThreshold = 10; + double sigma = 1.6; + + detector = cv::xfeatures2d::SIFT::create(nfeatures, nOctaveLayers, contrastThreshold, edgeThreshold, sigma); + } + + t = (double)cv::getTickCount(); + detector->detect(img, keypoints); + t = ((double)cv::getTickCount() - t) / cv::getTickFrequency(); + + cout << detectorType << " detection with n=" << keypoints.size() << " keypoints in " << 1000 * t / 1.0 << " ms" << endl; + + if (bVis) + { + visImage = img.clone(); + cv::drawKeypoints(img, keypoints, visImage, cv::Scalar::all(-1), cv::DrawMatchesFlags::DRAW_RICH_KEYPOINTS); + string windowName = "Detection Results"; + cv::namedWindow(windowName, 2); + imshow(windowName, visImage); + cv::waitKey(0); + } } \ No newline at end of file