OpenCV on iOS: False matching with SurfFeatureDetector and FlannBasedMatcher - ios

I am trying to use OpenCV's feature detection tools in order to decide whether a small sample image exists in a larger scene image or not.
I used the code from here as a reference (without the homography part).
UIImage *sceneImage, *objectImage1;
cv::Mat sceneImageMat, objectImageMat1;
cv::vector<cv::KeyPoint> sceneKeypoints, objectKeypoints1;
cv::Mat sceneDescriptors, objectDescriptors1;
cv::SurfFeatureDetector *surfDetector;
cv::FlannBasedMatcher flannMatcher;
cv::vector<cv::DMatch> matches;
int minHessian;
double minDistMultiplier;
minHessian = 400;
minDistMultiplier= 3;
surfDetector = new cv::SurfFeatureDetector(minHessian);
sceneImage = [UIImage imageNamed:#"twitter_scene.png"];
objectImage1 = [UIImage imageNamed:#"twitter.png"];
sceneImageMat = cv::Mat(sceneImage.size.height, sceneImage.size.width, CV_8UC1);
objectImageMat1 = cv::Mat(objectImage1.size.height, objectImage1.size.width, CV_8UC1);
cv::cvtColor([sceneImage CVMat], sceneImageMat, CV_RGB2GRAY);
cv::cvtColor([objectImage1 CVMat], objectImageMat1, CV_RGB2GRAY);
if (!sceneImageMat.data || !objectImageMat1.data) {
NSLog(#"NO DATA");
}
surfDetector->detect(sceneImageMat, sceneKeypoints);
surfDetector->detect(objectImageMat1, objectKeypoints1);
surfExtractor.compute(sceneImageMat, sceneKeypoints, sceneDescriptors);
surfExtractor.compute(objectImageMat1, objectKeypoints1, objectDescriptors1);
flannMatcher.match(objectDescriptors1, sceneDescriptors, matches);
double max_dist = 0; double min_dist = 100;
for( int i = 0; i < objectDescriptors1.rows; i++ )
{
double dist = matches[i].distance;
if( dist < min_dist ) min_dist = dist;
if( dist > max_dist ) max_dist = dist;
}
cv::vector<cv::DMatch> goodMatches;
for( int i = 0; i < objectDescriptors1.rows; i++ )
{
if( matches[i].distance < minDistMultiplier*min_dist )
{
goodMatches.push_back( matches[i]);
}
}
NSLog(#"Good matches found: %lu", goodMatches.size());
cv::Mat imageMatches;
cv::drawMatches(objectImageMat1, objectKeypoints1, sceneImageMat, sceneKeypoints, goodMatches, imageMatches, cv::Scalar::all(-1), cv::Scalar::all(-1),
cv::vector<char>(), cv::DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS);
for( int i = 0; i < goodMatches.size(); i++ )
{
//-- Get the keypoints from the good matches
obj.push_back( objectKeypoints1[ goodMatches[i].queryIdx ].pt );
scn.push_back( objectKeypoints1[ goodMatches[i].trainIdx ].pt );
}
cv::vector<uchar> outputMask;
cv::Mat homography = cv::findHomography(obj, scn, CV_RANSAC, 3, outputMask);
int inlierCounter = 0;
for (int i = 0; i < outputMask.size(); i++) {
if (outputMask[i] == 1) {
inlierCounter++;
}
}
NSLog(#"Inliers percentage: %d", (int)(((float)inlierCounter / (float)outputMask.size()) * 100));
cv::vector<cv::Point2f> objCorners(4);
objCorners[0] = cv::Point(0,0);
objCorners[1] = cv::Point( objectImageMat1.cols, 0 );
objCorners[2] = cv::Point( objectImageMat1.cols, objectImageMat1.rows );
objCorners[3] = cv::Point( 0, objectImageMat1.rows );
cv::vector<cv::Point2f> scnCorners(4);
cv::perspectiveTransform(objCorners, scnCorners, homography);
cv::line( imageMatches, scnCorners[0] + cv::Point2f( objectImageMat1.cols, 0), scnCorners[1] + cv::Point2f( objectImageMat1.cols, 0), cv::Scalar(0, 255, 0), 4);
cv::line( imageMatches, scnCorners[1] + cv::Point2f( objectImageMat1.cols, 0), scnCorners[2] + cv::Point2f( objectImageMat1.cols, 0), cv::Scalar( 0, 255, 0), 4);
cv::line( imageMatches, scnCorners[2] + cv::Point2f( objectImageMat1.cols, 0), scnCorners[3] + cv::Point2f( objectImageMat1.cols, 0), cv::Scalar( 0, 255, 0), 4);
cv::line( imageMatches, scnCorners[3] + cv::Point2f( objectImageMat1.cols, 0), scnCorners[0] + cv::Point2f( objectImageMat1.cols, 0), cv::Scalar( 0, 255, 0), 4);
[self.mainImageView setImage:[UIImage imageWithCVMat:imageMatches]];
This works, but I keep getting a significant amount of matches, even when the small image is not part of the larger one.
Here's an example for a good output:
And here's an example for a bad output:
Both outputs are the result of the same code. Only difference is the small sample image.
With results like this, it is impossible for me to know when a sample image is NOT in the larger image.
While doing my research, I found this stackoverflow question. I followed the answer given there, and tried the steps suggested in the "OpenCV 2 Computer Vision Application Programming Cookbook" book, but I wasn't able to make it work with images of different sizes (seems like a limitation of the cv::findFundamentalMat function).
What am I missing? Is there a way to use SurfFeatureDetector and FlannBasedMatcher to know when one sample image is a part of a larger image, and another sample image isn't? Is there a different method which is better for that purpose?
UPDATE:
I updated the code above to include the complete function I use, including trying to actually draw the homography. Plus, here are 3 images - 1 scene, and two small objects I'm trying to find in the scene. I'm getting better inlier percentages for the paw icon, and not the twitter icon, which is actually IN the scene. Plus, the homography is not drawn for some reason:
Twitter Icon
Paw Icon
Scene

Your matcher will always match every point from the smaller descriptor list to one of the larger list. You then have to look for yourself which of these matches make sense and which not. You can do this by discarding every match that exceeds a maximum allowed descriptor distance, or you can try to find a transformation matrix (e.g. with findHomography) and check if enough matches correspond to it.

It's a old post , but from a similar assignment I had to do for class. A way to remove the bad output is to check that most of the matching lines are parallel(relatively) to each other, and remove matches that point in wrong directions.

Related

Features2d + Homography not giving appropriate results

I am trying to detect an object using the SurfFeatureDetect and FLANN matcher. However, the code is not able to detect the image accurately. I have also posted the results in pictorial format.
Here's my code from the opencv tutorial website
int main(int argc, char** argv){
if (argc != 3){
readme(); return -1;
}
Mat img_object = imread(argv[1], CV_LOAD_IMAGE_GRAYSCALE);
Mat img_scene = imread(argv[2], CV_LOAD_IMAGE_GRAYSCALE);
if (!img_object.data || !img_scene.data)
{
std::cout << " --(!) Error reading images " << std::endl; return -1;
}
//-- Step 1: Detect the keypoints using SURF Detector
int minHessian = 100;
SurfFeatureDetector detector(minHessian);
std::vector<KeyPoint> keypoints_object, keypoints_scene;
detector.detect(img_object, keypoints_object);
detector.detect(img_scene, keypoints_scene);
//-- Step 2: Calculate descriptors (feature vectors)
SurfDescriptorExtractor extractor;
Mat descriptors_object, descriptors_scene;
extractor.compute(img_object, keypoints_object, descriptors_object);
extractor.compute(img_scene, keypoints_scene, descriptors_scene);
//-- Step 3: Matching descriptor vectors using FLANN matcher
FlannBasedMatcher matcher;
std::vector< DMatch > matches;
matcher.match(descriptors_object, descriptors_scene, matches);
double max_dist = 0; double min_dist = 100;
//-- Quick calculation of max and min distances between keypoints
for (int i = 0; i < descriptors_object.rows; i++)
{
double dist = matches[i].distance;
if (dist < min_dist) min_dist = dist;
if (dist > max_dist) max_dist = dist;
}
printf("-- Max dist : %f \n", max_dist);
printf("-- Min dist : %f \n", min_dist);
//-- Draw only "good" matches (i.e. whose distance is less than 3*min_dist )
std::vector< DMatch > good_matches;
for (int i = 0; i < descriptors_object.rows; i++)
{
if (matches[i].distance < 3 * min_dist)
{
good_matches.push_back(matches[i]);
}
}
Mat img_matches;
drawMatches(img_object, keypoints_object, img_scene, keypoints_scene,
good_matches, img_matches, Scalar::all(-1), Scalar::all(-1),
vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS);
//-- Localize the object
std::vector<Point2f> obj;
std::vector<Point2f> scene;
for (int i = 0; i < good_matches.size(); i++)
{
//-- Get the keypoints from the good matches
obj.push_back(keypoints_object[good_matches[i].queryIdx].pt);
scene.push_back(keypoints_scene[good_matches[i].trainIdx].pt);
}
Mat H = findHomography(obj, scene, CV_RANSAC);
//-- Get the corners from the image_1 ( the object to be "detected" )
std::vector<Point2f> obj_corners(4);
obj_corners[0] = cvPoint(0, 0); obj_corners[1] = cvPoint(img_object.cols, 0);
obj_corners[2] = cvPoint(img_object.cols, img_object.rows); obj_corners[3] = cvPoint(0, img_object.rows);
std::vector<Point2f> scene_corners(4);
perspectiveTransform(obj_corners, scene_corners, H);
//-- Draw lines between the corners (the mapped object in the scene - image_2 )
line(img_matches, scene_corners[0] + Point2f(img_object.cols, 0), scene_corners[1] + Point2f(img_object.cols, 0), Scalar(0, 255, 0), 4);
line(img_matches, scene_corners[1] + Point2f(img_object.cols, 0), scene_corners[2] + Point2f(img_object.cols, 0), Scalar(0, 255, 0), 4);
line(img_matches, scene_corners[2] + Point2f(img_object.cols, 0), scene_corners[3] + Point2f(img_object.cols, 0), Scalar(0, 255, 0), 4);
line(img_matches, scene_corners[3] + Point2f(img_object.cols, 0), scene_corners[0] + Point2f(img_object.cols, 0), Scalar(0, 255, 0), 4);
//-- Show detected matches
imshow("Good Matches & Object detection", img_matches);
waitKey(0);
return 0;}
/** #function readme */
void readme()
{
std::cout << " Usage: ./SURF_descriptor <img1> <img2>" << std::endl;}
That is a very common failure. The problem is that the homography has 8 degree of freedom (8DOF). This means that you need at least 4 correct correspondences to calculate a good homography:
As you can see, the homography has 8 parameters (the last parameter h33 is just a scale factor).
The problem arises when other than good corrspondces (inlier) you need to filter out bad correspondences (outlier). When the are more outliers than inliers (total/outliers > 50%) the RANSAC procedure cannot find the outlier and you obtain weird results.
Solutions to this problem are not easy. You could:
Use a training image with a similar out-of-plane rotation (and a similar scale) of the object in your query image.
Or, use a transformation with less degree of freedom (such as similarity transform). In this way you will need less inliers. Altho OpenCV lacks support for this simpler transformation with a robust fitting method.

Calculate similarity score between scene and template object

How can I calculate some comparable similarity score which tells me how similar the img_scene is compared to img_object.
When I render the img_matches, the homography successfully renders the boundaries of the found object in the scene, but I need some comparable score like if (score > THRESHOLD) { /* have match */ } else { /* dont have match */ }.
Mat img_scene = srcImage;
Mat img_object = _templateImage;
//-- Step 1: Detect the keypoints using SURF Detector
SurfFeatureDetector detector(_minHessian);
std::vector<KeyPoint> keypoints_object, keypoints_scene;
detector.detect(img_object, keypoints_object);
detector.detect(img_scene, keypoints_scene);
//-- Step 2: Calculate descriptors (feature vectors)
SurfDescriptorExtractor extractor;
Mat descriptors_object, descriptors_scene;
extractor.compute(img_object, keypoints_object, descriptors_object);
extractor.compute(img_scene, keypoints_scene, descriptors_scene);
if (descriptors_object.type() != descriptors_scene.type())
return;
//-- Step 3: Matching descriptor vectors using FLANN matcher
FlannBasedMatcher matcher;
std::vector<DMatch> matches;
matcher.match(descriptors_object, descriptors_scene, matches);
double max_dist = 0; double min_dist = 100;
//-- Quick calculation of max and min distances between keypoints
for (size_t i = 0; i < (size_t)descriptors_object.rows; i++ ) {
double dist = matches[i].distance;
if (dist < min_dist) min_dist = dist;
if (dist > max_dist) max_dist = dist;
}
//-- Draw only "good" matches (i.e. whose distance is less than 3*min_dist )
std::vector<DMatch> good_matches;
for(size_t i = 0; i < (size_t)descriptors_object.rows; i++) {
if (matches[i].distance < 2 * min_dist) {
good_matches.push_back(matches[i]);
}
}
if (good_matches.size() < 4)
return;
Mat img_matches;
drawMatches(img_object, keypoints_object, img_scene, keypoints_scene,
good_matches, img_matches, Scalar::all(-1), Scalar::all(-1),
vector<char>(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS);
//-- Localize the object
std::vector<Point2f> obj;
std::vector<Point2f> scene;
for (size_t i = 0; i < (size_t)good_matches.size(); i++) {
//-- Get the keypoints from the good matches
obj.push_back(keypoints_object[(size_t)good_matches[i].queryIdx].pt);
scene.push_back(keypoints_scene[(size_t)good_matches[i].trainIdx].pt);
}
vector<uchar> mask;
Mat H = findHomography(obj, scene, CV_RANSAC, 3, mask);
//-- Get the corners from the image_1 (the object to be "detected")
std::vector<Point2f> obj_corners(4);
obj_corners[0] = cvPoint(0, 0);
obj_corners[1] = cvPoint(img_object.cols, 0);
obj_corners[2] = cvPoint(img_object.cols, img_object.rows);
obj_corners[3] = cvPoint(0, img_object.rows);
std::vector<Point2f> scene_corners(4);
perspectiveTransform(obj_corners, scene_corners, H);
//-- Draw lines between the corners (the mapped object in the scene - image_2 )
line(img_matches, scene_corners[0] + Point2f(img_object.cols, 0), scene_corners[1] + Point2f(img_object.cols, 0), Scalar(0, 255, 0), 4);
line(img_matches, scene_corners[1] + Point2f(img_object.cols, 0), scene_corners[2] + Point2f(img_object.cols, 0), Scalar(0, 255, 0), 4);
line(img_matches, scene_corners[2] + Point2f(img_object.cols, 0), scene_corners[3] + Point2f(img_object.cols, 0), Scalar(0, 255, 0), 4);
line(img_matches, scene_corners[3] + Point2f(img_object.cols, 0), scene_corners[0] + Point2f(img_object.cols, 0), Scalar(0, 255, 0), 4);
UPDATE:
Here is the working solution as #mikesapi proposed:
...
//-- Draw only "good" matches (i.e. whose distance is less than 3*min_dist )
std::vector<DMatch> good_matches;
double good_matches_sum = 0.0;
for (size_t i = 0; i < matches.size(); i++ ) {
if( matches[i].distance < max(2*min_dist, 0.02) ) {
good_matches.push_back(matches[i]);
good_matches_sum += matches[i].distance;
}
}
double score = (double)good_matches_sum / (double)good_matches.size();
if (score < 0.18) {
// have match
} else {
// dont have match
}
...
A similarity score is greater if the object and scene are more similar (as opposed to a dissimilarity score, where a higher score means they are more dissimilar). Since you are using distances with FLANN (which I assume is giving you approximate euclidean distances between descriptors) a dissimilarity score is easier to generate, since euclidean distance is greater if descriptors are further apart in the descriptor space, and small if they are close together.
One simple way to generate a dissimilarity score would be to:
1. For each descriptor in the object image: calculate the minimum distance to each descriptor in the scene image.
2. Sum the (minimum) distances, and normalize by the number of descriptors in the object image.
Then you will have a single score quantifying the match between the object and the scene.

opencv, selective coloring effect

I want to make an effect on an image, such that specific color remains same, and the rest becomes grayscale. However, the output of my program is quite bad, could anyone please help me to improve my code? It breaks when the coordinates exceed 500, and the color range shown is too small. Thanks in advance
Code:
int i1=10, i2=50, a = 10;// coordinates of pixel value, whose color must remain; hue range
IplImage* GetNewImage(IplImage* frame, IplImage* imgHSV){
IplImage* imgNew = cvCreateImage(cvGetSize(frame), IPL_DEPTH_8U, 3);
IplImage* imgBW = cvCreateImage(cvGetSize(frame), IPL_DEPTH_8U, 1);
CvScalar s1;
CvScalar s2;
CvScalar s3;
s3 = cvGet2D(imgHSV, i1, i2);
cvInRangeS(imgHSV, cvScalar(s3.val[0] - 10, s3.val[1] - 50, s3.val[2] - 50), cvScalar(s3.val[0] + 10, s3.val[1] + 50, s3.val[2] + 50), imgBW);
imgNew = cvCloneImage(frame);
for (int i = 0; i < frame->height; i++) for (int j = 0; j < frame->width; j++) {
s1 = cvGet2D(imgBW, i, j);
s2 = cvGet2D(imgNew, i, j);
if (s1.val[0] == 0){
for (int k = 0; k < 3; k++){
s3.val[k] = (s2.val[0] * 0.114 + s2.val[1] * 0.587 + s2.val[2] * 0.299);//converting to grayscale
cvSet2D(imgNew, i, j, s3);
}
}
}
cvReleaseImage(&imgBW);
return imgNew;
}
drop that outdated c-api, it's dead & gone.
you already found out about inRange(), use the outcome as a mask for further processing
there's cvtColor(src,dst,CV_BGR2GRAY) already, prefer the builtin stuff to rolling your own
opencv is a computer-vision / ml library, you're abusing it as a 2nd class photoshop. - don't !

OpenCV: Retrieving color of the center of a contour

Im trying to detect the colour of a set of shapes in a black image using OpenCV, for which I use Canny detection. However the color output always comes back as black.
std::vector<std::pair<cv::Point, cv::Vec3b> > Asteroids::DetectPoints(const cv::Mat &image)
{
cv::Mat imageGray;
cv::cvtColor( image, imageGray, CV_BGR2GRAY );
cv::threshold(imageGray, imageGray, 1, 255, cv::THRESH_BINARY);
cv::Mat canny_output;
std::vector<std::vector<cv::Point> > contours;
std::vector<cv::Vec4i> hierarchy;
int thresh = 10;
// Detect edges using canny
cv::Canny( imageGray, canny_output, thresh, thresh*2, 3 );
// Find contours
cv::findContours( canny_output, contours, hierarchy, CV_RETR_LIST, CV_CHAIN_APPROX_NONE, cv::Point(0, 0) );
std::vector<std::pair<cv::Point, cv::Vec3b> > points;
for(unsigned int i = 0; i < contours.size(); i++ )
{
cv::Rect rect = cv::boundingRect(contours[i]);
std::pair<cv::Point, cv::Vec3b> posColor;
posColor.first = cv::Point( rect.tl().x + (rect.size().width / 2), rect.tl().y + (rect.size().height / 2));
posColor.second = image.at<cv::Vec3b>( posColor.first.x, posColor.first.y );
//Dont add teh entry to the list if one with the same color and position is already pressent,
//The contour detection sometimes returns duplicates
bool isInList = false;
for(unsigned int j = 0; j < points.size(); j++)
if(points[j].first == posColor.first && points[j].second == posColor.second)
isInList = true;
if(!isInList)
points.push_back( posColor );
}
return points;
}
I know it has to be an issue with the positions or something along those lines, but I cant figure out what
I might be wrong, but off the top of my head :
Shouldn't this read
posColor.second = image.at<cv::Vec3b>(posColor.first.y, posColor.first.x);
and not the other way around like you did it ?
Matrix notation, not cartesian notation ?

opencv background substraction

I have an image of the background scene and an image of the same scene with objects in front. Now I want to create a mask of the object in the foreground with background substraction. Both images are RGB.
I have already created the following code:
cv::Mat diff;
diff.create(orgImage.dims, orgImage.size, CV_8UC3);
diff = abs(orgImage-refImage);
cv::Mat mask(diff.rows, diff.cols, CV_8U, cv::Scalar(0,0,0));
//mask = (diff > 10);
for (int j=0; j<diff.rows; j++) {
// get the address of row j
//uchar* dataIn= diff.ptr<uchar>(j);
//uchar* dataOut= mask.ptr<uchar>(j);
for (int i=0; i<diff.cols; i++) {
if(diff.at<cv::Vec3b>(j,i)[0] > 30 || diff.at<cv::Vec3b>(j,i)[1] > 30 || diff.at<cv::Vec3b>(j,i)[2] > 30)
mask.at<uchar>(j,i) = 255;
}
}
I dont know if I am doing this right?
Have a look at the inRange function from OpenCV. This will allow you to set multiple thresholds at the same time for a 3 channel image.
So, to create the mask you were looking for, do the following:
inRange(diff, Scalar(30, 30, 30), Scalar(255, 255, 255), mask);
This should also be faster than trying to access each pixel yourself.
EDIT : If skin detection is what you are trying to do, I would first do skin detection, and then afterwards do background subtraction to remove the background. Otherwise, your skin detector will have to take into account the intensity shift caused by the subtraction.
Check out my other answer, about good techniques for skin detection.
EDIT :
Is this any faster?
int main(int argc, char* argv[])
{
Mat fg = imread("fg.jpg");
Mat bg = imread("bg.jpg");
cvtColor(fg, fg, CV_RGB2YCrCb);
cvtColor(bg, bg, CV_RGB2YCrCb);
Mat distance = Mat::zeros(fg.size(), CV_32F);
vector<Mat> fgChannels;
split(fg, fgChannels);
vector<Mat> bgChannels;
split(bg, bgChannels);
for(size_t i = 0; i < fgChannels.size(); i++)
{
Mat temp = abs(fgChannels[i] - bgChannels[i]);
temp.convertTo(temp, CV_32F);
distance = distance + temp;
}
Mat mask;
threshold(distance, mask, 35, 255, THRESH_BINARY);
Mat kernel5x5 = getStructuringElement(MORPH_RECT, Size(5, 5));
morphologyEx(mask, mask, MORPH_OPEN, kernel5x5);
imshow("fg", fg);
imshow("bg", bg);
imshow("mask", mask);
waitKey();
return 0;
}
This code produces this mask based on your input imagery:
Finally, here is what I get using my simple thresholding method:
Mat diff = fgYcc - bgYcc;
vector<Mat> diffChannels;
split(diff, diffChannels);
// only operating on luminance for background subtraction...
threshold(diffChannels[0], bgfgMask, 1, 255.0, THRESH_BINARY_INV);
Mat kernel5x5 = getStructuringElement(MORPH_RECT, Size(5, 5));
morphologyEx(bgfgMask, bgfgMask, MORPH_OPEN, kernel5x5);
This produce the following mask:
I think when I'm doing it like this I get the right results: (in the YCrCb colorspace) but accessing each px is slow so I need to find another algorithm
cv::Mat mask(image.rows, image.cols, CV_8U, cv::Scalar(0,0,0));
cv::Mat_<cv::Vec3b>::const_iterator itImage= image.begin<cv::Vec3b>();
cv::Mat_<cv::Vec3b>::const_iterator itend= image.end<cv::Vec3b>();
cv::Mat_<cv::Vec3b>::iterator itRef= refRoi.begin<cv::Vec3b>();
cv::Mat_<uchar>::iterator itMask= mask.begin<uchar>();
for ( ; itImage!= itend; ++itImage, ++itRef, ++itMask) {
int distance = abs((*itImage)[0]-(*itRef)[0])+
abs((*itImage)[1]-(*itRef)[1])+
abs((*itImage)[2]-(*itRef)[2]);
if(distance < 30)
*itMask = 0;
else
*itMask = 255;
}

Resources