Understaing V&J (Haar.cpp) sliding window - opencv

I'm going over the code of haar.cpp to understand the sliding window approach. Here is the code:
for( factor = 1; ; factor *= scaleFactor )
{
CvSize winSize = { cvRound(winSize0.width*factor),
cvRound(winSize0.height*factor) };
CvSize sz = { cvRound( img->cols/factor ), cvRound( img->rows/factor ) };
CvSize sz1 = { sz.width - winSize0.width + 1, sz.height - winSize0.height + 1 };
CvRect equRect = { icv_object_win_border, icv_object_win_border,
winSize0.width - icv_object_win_border*2,
winSize0.height - icv_object_win_border*2 };
CvMat img1, sum1, sqsum1, norm1, tilted1, mask1;
CvMat* _tilted = 0;
if( sz1.width <= 0 || sz1.height <= 0 )
break;
if( winSize.width > maxSize.width || winSize.height > maxSize.height )
break;
if( winSize.width < minSize.width || winSize.height < minSize.height )
continue;
img1 = cvMat( sz.height, sz.width, CV_8UC1, imgSmall->data.ptr );
sum1 = cvMat( sz.height+1, sz.width+1, CV_32SC1, sum->data.ptr );
sqsum1 = cvMat( sz.height+1, sz.width+1, CV_64FC1, sqsum->data.ptr );
if( tilted )
{
tilted1 = cvMat( sz.height+1, sz.width+1, CV_32SC1, tilted->data.ptr );
_tilted = &tilted1;
}
norm1 = cvMat( sz1.height, sz1.width, CV_32FC1, normImg ? normImg->data.ptr : 0 );
mask1 = cvMat( sz1.height, sz1.width, CV_8UC1, temp->data.ptr );
cvResize( img, &img1, CV_INTER_LINEAR );
cvIntegral( &img1, &sum1, &sqsum1, _tilted );
int ystep = factor > 2 ? 1 : 2;
const int LOCS_PER_THREAD = 1000;
int stripCount = ((sz1.width/ystep)*(sz1.height + ystep-1)/ystep + LOCS_PER_THREAD/2)/LOCS_PER_THREAD;
stripCount = std::min(std::max(stripCount, 1), 100);
#ifdef HAVE_IPP
if( use_ipp )
{
cv::Mat fsum(sum1.rows, sum1.cols, CV_32F, sum1.data.ptr, sum1.step);
cv::Mat(&sum1).convertTo(fsum, CV_32F, 1, -(1<<24));
}
else
#endif
cvSetImagesForHaarClassifierCascade( cascade, &sum1, &sqsum1, _tilted, 1. );
cv::Mat _norm1(&norm1), _mask1(&mask1);
cv::parallel_for_(cv::Range(0, stripCount),
cv::HaarDetectObjects_ScaleImage_Invoker(cascade,
(((sz1.height + stripCount - 1)/stripCount + ystep-1)/ystep)*ystep,
factor, cv::Mat(&sum1), cv::Mat(&sqsum1), &_norm1, &_mask1,
cv::Rect(equRect), allCandidates, rejectLevels, levelWeights, outputRejectLevels, &mtx));
}
}
Now, I want to make sure I got everything right. As I understand, we loop over the scales and in each scale we subsample the image and try to find objects at a fixed size (20X20 for faces), going over all the x and y locations.
The pseudo- code is:
for scale=1:ScaleMax
for X=1:width
for Y=1:height
Try do detect a face at position (x,y) and of a fixedsize of 20X20.
Is that precise or did I get something wrong?
Thanks,
Gil.

While the understanding is accurate, it is not precise.
For better precision, you should read the original paper from Viola and Jones, since all the magic is in the step "Try do detect a face at position (x,y) and of a fixedsize of 20X20"

Related

How to do flipping without using the inbuilt function flip in OpenCV?

Can anyone help me with this problem, how to do flipping of an image without using the inbuilt flipping function i.e. flip(src image, destination image , 1 or 0) in C++ using OpenCV. I am new to this software so please help.
OpenCV's flip function uses internal flipHoriz or flipVert functions.
static void
flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz )
{
int i, j, limit = (int)(((size.width + 1)/2)*esz);
AutoBuffer<int> _tab(size.width*esz);
int* tab = _tab;
for( i = 0; i < size.width; i++ )
for( size_t k = 0; k < esz; k++ )
tab[i*esz + k] = (int)((size.width - i - 1)*esz + k);
for( ; size.height--; src += sstep, dst += dstep )
{
for( i = 0; i < limit; i++ )
{
j = tab[i];
uchar t0 = src[i], t1 = src[j];
dst[i] = t1; dst[j] = t0;
}
}
}
static void
flipVert( const uchar* src0, size_t sstep, uchar* dst0, size_t dstep, Size size, size_t esz )
{
const uchar* src1 = src0 + (size.height - 1)*sstep;
uchar* dst1 = dst0 + (size.height - 1)*dstep;
size.width *= (int)esz;
for( int y = 0; y < (size.height + 1)/2; y++, src0 += sstep, src1 -= sstep,
dst0 += dstep, dst1 -= dstep )
{
int i = 0;
if( ((size_t)src0|(size_t)dst0|(size_t)src1|(size_t)dst1) % sizeof(int) == 0 )
{
for( ; i <= size.width - 16; i += 16 )
{
int t0 = ((int*)(src0 + i))[0];
int t1 = ((int*)(src1 + i))[0];
((int*)(dst0 + i))[0] = t1;
((int*)(dst1 + i))[0] = t0;
t0 = ((int*)(src0 + i))[1];
t1 = ((int*)(src1 + i))[1];
((int*)(dst0 + i))[1] = t1;
((int*)(dst1 + i))[1] = t0;
t0 = ((int*)(src0 + i))[2];
t1 = ((int*)(src1 + i))[2];
((int*)(dst0 + i))[2] = t1;
((int*)(dst1 + i))[2] = t0;
t0 = ((int*)(src0 + i))[3];
t1 = ((int*)(src1 + i))[3];
((int*)(dst0 + i))[3] = t1;
((int*)(dst1 + i))[3] = t0;
}
for( ; i <= size.width - 4; i += 4 )
{
int t0 = ((int*)(src0 + i))[0];
int t1 = ((int*)(src1 + i))[0];
((int*)(dst0 + i))[0] = t1;
((int*)(dst1 + i))[0] = t0;
}
}
for( ; i < size.width; i++ )
{
uchar t0 = src0[i];
uchar t1 = src1[i];
dst0[i] = t1;
dst1[i] = t0;
}
}
}
// you can use it with a small modification as below
void myflip( InputArray _src, OutputArray _dst, int flip_mode )
{
CV_Assert( _src.dims() <= 2 );
Size size = _src.size();
if (flip_mode < 0)
{
if (size.width == 1)
flip_mode = 0;
if (size.height == 1)
flip_mode = 1;
}
if ((size.width == 1 && flip_mode > 0) ||
(size.height == 1 && flip_mode == 0) ||
(size.height == 1 && size.width == 1 && flip_mode < 0))
{
return _src.copyTo(_dst);
}
Mat src = _src.getMat();
int type = src.type();
_dst.create( size, type );
Mat dst = _dst.getMat();
size_t esz = CV_ELEM_SIZE(type);
if( flip_mode <= 0 )
flipVert( src.ptr(), src.step, dst.ptr(), dst.step, src.size(), esz );
else
flipHoriz( src.ptr(), src.step, dst.ptr(), dst.step, src.size(), esz );
if( flip_mode < 0 )
flipHoriz( dst.ptr(), dst.step, dst.ptr(), dst.step, dst.size(), esz );
}
Assuming you have a good reason not to use OpenCV flip function, you can write your custom one.
For this example, I'll use CV_8UC3 images. I'll point out at the end how to expand this to different formats.
Let's see first how to flip an image x axis, which corresponds to cv::flip(src, dst, 1). Given an src image, the dst image will have the same y coordinate, and x coordinate as src.cols - 1 - x coordinates. In practice:
void flip_lr(const Mat3b& src, Mat3b& dst)
{
Mat3b _dst(src.rows, src.cols);
for (int r = 0; r < _dst.rows; ++r) {
for (int c = 0; c < _dst.cols; ++c) {
_dst(r, c) = src(r, src.cols - 1 - c);
}
}
dst = _dst;
}
Then, to flip around y axis (corresponding to cv::flip(src, dst, 0)), dst will have the same x coordinate, and y as src.rows - 1 - y. However, you can reuse the above-mentioned function, simply transposing the dst matrix, apply flip on x axis, and then transpose back. In practice:
dst = src.t();
flip_lr(dst, dst);
dst = dst.t();
Then, to flip both axis, corresponding to cv::flip(src, dst, -1), you need simply to combine the flip on x and y axis:
flip_lr(src, dst);
dst = dst.t();
flip_lr(dst, dst);
dst = dst.t();
You can wrap this functionality in a custom flip function that takes the same parameters as cv::flip:
void custom_flip(const Mat3b& src, Mat3b& dst, int code)
{
if (code > 0)
{ // Flip x axis
flip_lr(src, dst);
}
else if (code == 0)
{
// Flip y axis
dst = src.t();
flip_lr(dst, dst);
dst = dst.t();
}
else // code < 0
{
// Flip x and y axis
flip_lr(src, dst);
dst = dst.t();
flip_lr(dst, dst);
dst = dst.t();
}
}
Note that you can adapt this to different format simply modifing the flip_lr function, and taking care to call the appropriate version inside custom_flip, that will now accept Mat instead of Mat3b.
Full code for reference:
void flip_lr(const Mat3b& src, Mat3b& dst)
{
Mat3b _dst(src.rows, src.cols);
for (int r = 0; r < _dst.rows; ++r) {
for (int c = 0; c < _dst.cols; ++c) {
_dst(r, c) = src(r, src.cols - 1 - c);
}
}
dst = _dst;
}
void custom_flip(const Mat3b& src, Mat3b& dst, int code)
{
if (code > 0)
{ // Flip x axis
flip_lr(src, dst);
}
else if (code == 0)
{
// Flip y axis
dst = src.t();
flip_lr(dst, dst);
dst = dst.t();
}
else // code < 0
{
// Flip x and y axis
flip_lr(src, dst);
dst = dst.t();
flip_lr(dst, dst);
dst = dst.t();
}
}
int main(void)
{
Mat3b img = imread("path_to_image");
Mat3b flipped;
flip(img, flipped, -1);
Mat3b custom;
custom_flip(img, custom, -1);
imshow("OpenCV flip", flipped);
imshow("Custom flip", custom);
waitKey();
return 0;
}

Particle Filter Model for Computer Vision Tracking

I see alot of posts for particle filters for such purposes, but none of them talk about the steps. Most tutorials online are for Kinematic Models involving R,Theta movements.
I want to use a particle filter to track a simple yellow blob. It is noisy as it's underwater, and at times may be occluded. How would I implement a model for this, and what might the "move" function of the object be?
You can use optical flow in order to detect the direction of movement.
This is how I do it:
#include <stdio.h>
#include <cv.h>
#include <highgui.h>
#include <math.h>
static const double pi = 3.14159265358979323846;
inline static double square(int a)
{
return a * a;
}
inline static void allocateOnDemand( IplImage **img, CvSize size, int depth, int channels
)
{
if ( *img != NULL ) return;
*img = cvCreateImage( size, depth, channels );
if ( *img == NULL )
{
fprintf(stderr, "Error: Couldn't allocate image. Out of memory?\n");
exit(-1);
}
}
int main(void)
{
CvCapture *input_video = cvCaptureFromCAM(0);
if (input_video == NULL)
{
fprintf(stderr, "Error: Can't open video.\n");
return -1;
}
cvQueryFrame( input_video );
CvSize frame_size;
frame_size.height =
(int) cvGetCaptureProperty( input_video, CV_CAP_PROP_FRAME_HEIGHT );
frame_size.width =
(int) cvGetCaptureProperty( input_video, CV_CAP_PROP_FRAME_WIDTH );
long number_of_frames;
cvSetCaptureProperty( input_video, CV_CAP_PROP_POS_AVI_RATIO, 1. );
number_of_frames = (int) cvGetCaptureProperty( input_video, CV_CAP_PROP_POS_FRAMES );
cvSetCaptureProperty( input_video, CV_CAP_PROP_POS_FRAMES, 0. );
cvNamedWindow("Optical Flow", CV_WINDOW_AUTOSIZE);
long current_frame = 0;
while(true)
{
static IplImage *frame = NULL, *frame1 = NULL, *frame1_1C = NULL, *frame2_1C =
NULL, *eig_image = NULL, *temp_image = NULL, *pyramid1 = NULL, *pyramid2 = NULL;
cvSetCaptureProperty( input_video, CV_CAP_PROP_POS_FRAMES, current_frame );
frame = cvQueryFrame( input_video );
if (frame == NULL)
{
fprintf(stderr, "Error: Hmm. The end came sooner than we thought.\n");
return -1;
}
allocateOnDemand( &frame1_1C, frame_size, IPL_DEPTH_8U, 1 );
cvConvertImage(frame, frame1_1C, CV_CVTIMG_FLIP);
allocateOnDemand( &frame1, frame_size, IPL_DEPTH_8U, 3 );
cvConvertImage(frame, frame1, CV_CVTIMG_FLIP);
frame = cvQueryFrame( input_video );
if (frame == NULL)
{
fprintf(stderr, "Error: Hmm. The end came sooner than we thought.\n");
return -1;
}
allocateOnDemand( &frame2_1C, frame_size, IPL_DEPTH_8U, 1 );
cvConvertImage(frame, frame2_1C, CV_CVTIMG_FLIP);
allocateOnDemand( &eig_image, frame_size, IPL_DEPTH_32F, 1 );
allocateOnDemand( &temp_image, frame_size, IPL_DEPTH_32F, 1 );
CvPoint2D32f frame1_features[400];
int number_of_features;
number_of_features = 400;
cvGoodFeaturesToTrack(frame1_1C, eig_image, temp_image, frame1_features, &
number_of_features, .01, .01, NULL);
CvPoint2D32f frame2_features[400];
char optical_flow_found_feature[400];
float optical_flow_feature_error[400];
CvSize optical_flow_window = cvSize(3,3);
CvTermCriteria optical_flow_termination_criteria
= cvTermCriteria( CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 20, .3 );
allocateOnDemand( &pyramid1, frame_size, IPL_DEPTH_8U, 1 );
allocateOnDemand( &pyramid2, frame_size, IPL_DEPTH_8U, 1 );
cvCalcOpticalFlowPyrLK(frame1_1C, frame2_1C, pyramid1, pyramid2, frame1_features,
frame2_features, number_of_features, optical_flow_window, 5,
optical_flow_found_feature, optical_flow_feature_error,
optical_flow_termination_criteria, 0 );
for(int i = 0; i < number_of_features; i++)
{
if ( optical_flow_found_feature[i] == 0 ) continue;
int line_thickness; line_thickness = 1;
CvScalar line_color; line_color = CV_RGB(255,0,0);
CvPoint p,q;
p.x = (int) frame1_features[i].x;
p.y = (int) frame1_features[i].y;
q.x = (int) frame2_features[i].x;
q.y = (int) frame2_features[i].y;
double angle; angle = atan2( (double) p.y - q.y, (double) p.x - q.x );
double hypotenuse; hypotenuse = sqrt( square(p.y - q.y) + square(p.x - q.x) );
q.x = (int) (p.x - 3 * hypotenuse * cos(angle));
q.y = (int) (p.y - 3 * hypotenuse * sin(angle));
cvLine( frame1, p, q, line_color, line_thickness, CV_AA, 0 );
p.x = (int) (q.x + 9 * cos(angle + pi / 4));
p.y = (int) (q.y + 9 * sin(angle + pi / 4));
cvLine( frame1, p, q, line_color, line_thickness, CV_AA, 0 );
p.x = (int) (q.x + 9 * cos(angle - pi / 4));
p.y = (int) (q.y + 9 * sin(angle - pi / 4));
cvLine( frame1, p, q, line_color, line_thickness, CV_AA, 0 );
}
cvShowImage("Optical Flow", frame1);
int key_pressed;
key_pressed = cvWaitKey(0);
if (key_pressed == 'b' || key_pressed == 'B') current_frame--;
else current_frame++;
if (current_frame < 0) current_frame = 0;
if (current_frame >= number_of_frames - 1) current_frame = number_of_frames - 2;
}
}

Approximate photo of a simple drawing using lines

As an input I have a photo of a simple symbol, e.g.: https://www.dropbox.com/s/nrmsvfd0le0bkke/symbol.jpg
I would like to detect the straight lines in it, like points of start and ends of the lines. In this case, assuming the top left of the symbol is (0,0), the lines would be defined like this:
start end (coordinates of beginning and end of a line)
1. (0,0); (0,10) (vertical line)
2. (0,10); (15, 15)
3. (15,15); (0, 20)
4. (0,20); (0,30)
How can I do it (pereferably using OpenCV)? I though about Hough lines, but they seem to be good for perfect thin straight lines, which is not the case in a drawing. I'll probably work on binarized image, too.
Give a try on this,
Apply thinning algorithm on threshold image.
Find contours.
approxPolyDP for the found contour.
See some reference:
approxpolydp-for-edge-maps
Creating Bounding boxes and circles for contours
maybe you can work on this one.
assume a perfect binarization:
run HoughLinesP
(not implemented) try to group those detected lines
I used this code:
int main()
{
cv::Mat image = cv::imread("HoughLinesP_perfect.png");
cv::Mat gray;
cv::cvtColor(image,gray,CV_BGR2GRAY);
cv::Mat output; image.copyTo(output);
cv::Mat g_thres = gray == 0;
std::vector<cv::Vec4i> lines;
//cv::HoughLinesP( binary, lines, 1, 2*CV_PI/180, 100, 100, 50 );
// cv::HoughLinesP( h_thres, lines, 1, CV_PI/180, 100, image.cols/2, 10 );
cv::HoughLinesP( g_thres, lines, 1, CV_PI/(4*180.0), 50, image.cols/20, 10 );
for( size_t i = 0; i < lines.size(); i++ )
{
cv::line( output, cv::Point(lines[i][0], lines[i][3]),
cv::Point(lines[i][4], lines[i][3]), cv::Scalar(155,255,155), 1, 8 );
}
cv::imshow("g thres", g_thres);
cv::imwrite("HoughLinesP_out.png", output);
cv::resize(output, output, cv::Size(), 0.5,0.5);
cv::namedWindow("output"); cv::imshow("output", output);
cv::waitKey(-1);
std::cout << "finished" << std::endl;
return 0;
}
EDIT:
updated code with simple line clustering (`minimum_distance function taken from SO):
giving this result:
float minimum_distance(cv::Point2f v, cv::Point2f w, cv::Point2f p) {
// Return minimum distance between line segment vw and point p
const float l2 = cv::norm(w-v) * cv::norm(w-v); // i.e. |w-v|^2 - avoid a sqrt
if (l2 == 0.0) return cv::norm(p-v); // v == w case
// Consider the line extending the segment, parameterized as v + t (w - v).
// We find projection of point p onto the line.
// It falls where t = [(p-v) . (w-v)] / |w-v|^2
//const float t = dot(p - v, w - v) / l2;
float t = ((p-v).x * (w-v).x + (p-v).y * (w-v).y)/l2;
if (t < 0.0) return cv::norm(p-v); // Beyond the 'v' end of the segment
else if (t > 1.0) return cv::norm(p-w); // Beyond the 'w' end of the segment
const cv::Point2f projection = v + t * (w - v); // Projection falls on the segment
return cv::norm(p - projection);
}
int main()
{
cv::Mat image = cv::imread("HoughLinesP_perfect.png");
cv::Mat gray;
cv::cvtColor(image,gray,CV_BGR2GRAY);
cv::Mat output; image.copyTo(output);
cv::Mat g_thres = gray == 0;
std::vector<cv::Vec4i> lines;
cv::HoughLinesP( g_thres, lines, 1, CV_PI/(4*180.0), 50, image.cols/20, 10 );
float minDist = 100;
std::vector<cv::Vec4i> lines_filtered;
for( size_t i = 0; i < lines.size(); i++ )
{
bool keep = true;
int overwrite = -1;
cv::Point2f a(lines[i][0], lines[i][6]);
cv::Point2f b(lines[i][7], lines[i][3]);
float lengthAB = cv::norm(a-b);
for( size_t j = 0; j < lines_filtered.size(); j++ )
{
cv::Point2f c(lines_filtered[j][0], lines_filtered[j][8]);
cv::Point2f d(lines_filtered[j][9], lines_filtered[j][3]);
float distCDA = minimum_distance(c,d,a);
float distCDB = minimum_distance(c,d,b);
float lengthCD = cv::norm(c-d);
if((distCDA < minDist) && (distCDB < minDist))
{
if(lengthCD >= lengthAB)
{
keep = false;
}
else
{
overwrite = j;
}
}
}
if(keep)
{
if(overwrite >= 0)
{
lines_filtered[overwrite] = lines[i];
}
else
{
lines_filtered.push_back(lines[i]);
}
}
}
for( size_t i = 0; i < lines_filtered.size(); i++ )
{
cv::line( output, cv::Point(lines_filtered[i][0], lines_filtered[i][10]),
cv::Point(lines_filtered[i][11], lines_filtered[i][3]), cv::Scalar(155,255,155), 2, 8 );
}
cv::imshow("g thres", g_thres);
cv::imwrite("HoughLinesP_out.png", output);
cv::resize(output, output, cv::Size(), 0.5,0.5);
cv::namedWindow("output"); cv::imshow("output", output);
cv::waitKey(-1);
std::cout << "finished" << std::endl;
return 0;
}
You should try the Hough Line Transform. And here is an example from this website
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include <iostream>
using namespace cv;
using namespace std;
int main()
{
Mat src = imread("building.jpg", 0);
Mat dst, cdst;
Canny(src, dst, 50, 200, 3);
cvtColor(dst, cdst, CV_GRAY2BGR);
vector<Vec2f> lines;
// detect lines
HoughLines(dst, lines, 1, CV_PI/180, 150, 0, 0 );
// draw lines
for( size_t i = 0; i < lines.size(); i++ )
{
float rho = lines[i][0], theta = lines[i][1];
Point pt1, pt2;
double a = cos(theta), b = sin(theta);
double x0 = a*rho, y0 = b*rho;
pt1.x = cvRound(x0 + 1000*(-b));
pt1.y = cvRound(y0 + 1000*(a));
pt2.x = cvRound(x0 - 1000*(-b));
pt2.y = cvRound(y0 - 1000*(a));
line( cdst, pt1, pt2, Scalar(0,0,255), 3, CV_AA);
}
imshow("source", src);
imshow("detected lines", cdst);
waitKey();
return 0;
}
With this you should be able to tweak and get the proprieties you are looking for (vertices).

Online Face Recognition using OpenCV

I am trying to implement online face recognition using the webcam. I am using this two websites as references
shervinemami.co.cc
cognotics.com
I have few questions:
In face recognition, there are 6 steps:
Grab a frame from the camera
Detect a face within the image
Crop the frame to show just the face
Convert the frame to greyscale
Preprocess the image
Recognize the person in the image.
I am able to do the first five steps. Last step i am not able to do. I am not sure how to link step 5 to step 6.
I have already created the train.txt file and test.txt file which contains the information of the training and testing images. I have already added the functions such as learn(), doPCA() to the code...
But the point is how to use these functions in the main to recognize the image that is already preprocessed.
Need some help on it...
Attached the code below:
// Real-time.cpp : Defines the entry point for the console application.
#include "stdafx.h"
#include <cv.h>
#include <cxcore.h>
#include <highgui.h>
#include <cvaux.h>
IplImage ** faceImgArr = 0; // array of face images
CvMat * personNumTruthMat = 0; // array of person numbers
int nTrainFaces = 0; // the number of training images
int nEigens = 0; // the number of eigenvalues
IplImage * pAvgTrainImg = 0; // the average image
IplImage ** eigenVectArr = 0; // eigenvectors
CvMat * eigenValMat = 0; // eigenvalues
CvMat * projectedTrainFaceMat = 0; // projected training faces
IplImage* getCameraFrame(CvCapture* &camera);
IplImage* detectFaces( IplImage *img ,CvHaarClassifierCascade* facecascade,CvMemStorage* storage );
CvRect detectFaceInImage(IplImage *inputImg, CvHaarClassifierCascade* cascade);
IplImage* preprocess( IplImage* inputImg);
IplImage* resizeImage(const IplImage *origImg, int newWidth,
int newHeight, bool keepAspectRatio);
void learn();
void recognize();
void doPCA();
void storeTrainingData();
int loadTrainingData(CvMat ** pTrainPersonNumMat);
int findNearestNeighbor(float * projectedTestFace);
int loadFaceImgArray(char * filename);
int _tmain(int argc, _TCHAR* argv[])
{
CvCapture* camera = 0; // The camera device.
CvMemStorage *storage;
cvNamedWindow( "Realtime:", CV_WINDOW_AUTOSIZE);
char *faceCascadeFilename = "C:/OpenCV2.1/data/haarcascades/haarcascade_frontalface_alt.xml";
CvHaarClassifierCascade* faceCascade;
faceCascade = (CvHaarClassifierCascade*)cvLoad(faceCascadeFilename, 0, 0, 0);
storage = cvCreateMemStorage( 0 );
learn();
while ( cvWaitKey(10) != 27 ) // Quit on "Escape" key
{
IplImage *frame = getCameraFrame(camera);
//IplImage* resized=cvCreateImage(cvSize(420,240),frame->depth,3);
//cvResizeWindow( "Image:", 640, 480);
//cvResize(frame,resized);
//cvShowImage( "Realtime:", resized );
IplImage *imgA = resizeImage(frame, 420,240, true);
IplImage *frame1 = detectFaces(imgA,faceCascade,storage);
frame1 = preprocess(frame1);
}
// Free the camera.
cvReleaseCapture( &camera );
cvReleaseMemStorage( &storage );
return 0;
}
IplImage* getCameraFrame(CvCapture* &camera)
{
IplImage *frame;
int w, h;
// If the camera hasn't been initialized, then open it.
if (!camera) {
printf("Acessing the camera ...\n");
camera = cvCreateCameraCapture( 0 );
if (!camera) {
printf("Couldn't access the camera.\n");
exit(1);
}
// Try to set the camera resolution to 320 x 240.
cvSetCaptureProperty(camera, CV_CAP_PROP_FRAME_WIDTH, 320);
cvSetCaptureProperty(camera, CV_CAP_PROP_FRAME_HEIGHT, 240);
// Get the first frame, to make sure the camera is initialized.
frame = cvQueryFrame( camera );
if (frame) {
w = frame->width;
h = frame->height;
printf("Got the camera at %dx%d resolution.\n", w, h);
}
// Wait a little, so that the camera can auto-adjust its brightness.
Sleep(1000); // (in milliseconds)
}
// Wait until the next camera frame is ready, then grab it.
frame = cvQueryFrame( camera );
if (!frame) {
printf("Couldn't grab a camera frame.\n");
exit(1);
}
return frame;
}
CvRect detectFaceInImage(IplImage *inputImg, CvHaarClassifierCascade* cascade)
{
// Smallest face size.
CvSize minFeatureSize = cvSize(20, 20);
// Only search for 1 face.
int flags = CV_HAAR_FIND_BIGGEST_OBJECT | CV_HAAR_DO_ROUGH_SEARCH;
// How detailed should the search be.
float search_scale_factor = 1.1f;
IplImage *detectImg;
IplImage *greyImg = 0;
CvMemStorage* storage;
CvRect rc;
double t;
CvSeq* rects;
CvSize size;
int i, ms, nFaces;
storage = cvCreateMemStorage(0);
cvClearMemStorage( storage );
// If the image is color, use a greyscale copy of the image.
detectImg = (IplImage*)inputImg;
if (inputImg->nChannels > 1) {
size = cvSize(inputImg->width, inputImg->height);
greyImg = cvCreateImage(size, IPL_DEPTH_8U, 1 );
cvCvtColor( inputImg, greyImg, CV_BGR2GRAY );
detectImg = greyImg; // Use the greyscale image.
}
// Detect all the faces in the greyscale image.
t = (double)cvGetTickCount();
rects = cvHaarDetectObjects( detectImg, cascade, storage,
search_scale_factor, 3, flags, minFeatureSize);
t = (double)cvGetTickCount() - t;
ms = cvRound( t / ((double)cvGetTickFrequency() * 1000.0) );
nFaces = rects->total;
printf("Face Detection took %d ms and found %d objects\n", ms, nFaces);
// Get the first detected face (the biggest).
if (nFaces > 0)
rc = *(CvRect*)cvGetSeqElem( rects, 0 );
else
rc = cvRect(-1,-1,-1,-1); // Couldn't find the face.
if (greyImg)
cvReleaseImage( &greyImg );
cvReleaseMemStorage( &storage );
//cvReleaseHaarClassifierCascade( &cascade );
return rc; // Return the biggest face found, or (-1,-1,-1,-1).
}
IplImage* detectFaces( IplImage *img ,CvHaarClassifierCascade* facecascade,CvMemStorage* storage )
{
int i;
CvRect *r;
CvSeq *faces = cvHaarDetectObjects(
img,
facecascade,
storage,
1.1,
3,
0 /*CV_HAAR_DO_CANNY_PRUNNING*/,
cvSize( 40, 40 ) );
int padding_width = 30; // pixels
int padding_height = 30; // pixels
for( i = 0 ; i < ( faces ? faces->total : 0 ) ; i++ ) {
r = ( CvRect* )cvGetSeqElem( faces, i );
cvRectangle( img,
cvPoint( r->x, r->y ),
cvPoint( r->x + r->width, r->y + r->height ),
CV_RGB( 255, 0, 0 ), 1, 8, 0 );
}
cvShowImage( "Realtime:", img );
//cropping the face
cvSetImageROI(img, cvRect(r->x,r->y,r->width,r->height));
IplImage *img2 = cvCreateImage(cvGetSize(img),
img->depth,
img->nChannels);
cvCopy(img, img2, NULL);
cvResetImageROI(img);
return img;
}
IplImage* preprocess( IplImage* inputImg){
IplImage *detectImg, *greyImg = 0;
IplImage *imageProcessed;
CvSize size;
detectImg = (IplImage*)inputImg;
if (inputImg->nChannels > 1) {
size = cvSize(inputImg->width, inputImg->height);
greyImg = cvCreateImage(size, IPL_DEPTH_8U, 1 );
cvCvtColor( inputImg, greyImg, CV_BGR2GRAY );
detectImg = greyImg; // Use the greyscale image.
}
imageProcessed = cvCreateImage(cvSize(inputImg->width, inputImg->height), IPL_DEPTH_8U, 1);
cvResize(detectImg, imageProcessed, CV_INTER_LINEAR);
cvEqualizeHist(imageProcessed, imageProcessed);
return imageProcessed;
}
IplImage* resizeImage(const IplImage *origImg, int newWidth,
int newHeight, bool keepAspectRatio)
{
IplImage *outImg = 0;
int origWidth;
int origHeight;
if (origImg) {
origWidth = origImg->width;
origHeight = origImg->height;
}
if (newWidth <= 0 || newHeight <= 0 || origImg == 0
|| origWidth <= 0 || origHeight <= 0) {
//cerr << "ERROR: Bad desired image size of " << newWidth
// << "x" << newHeight << " in resizeImage().\n";
exit(1);
}
if (keepAspectRatio) {
// Resize the image without changing its aspect ratio,
// by cropping off the edges and enlarging the middle section.
CvRect r;
// input aspect ratio
float origAspect = (origWidth / (float)origHeight);
// output aspect ratio
float newAspect = (newWidth / (float)newHeight);
// crop width to be origHeight * newAspect
if (origAspect > newAspect) {
int tw = (origHeight * newWidth) / newHeight;
r = cvRect((origWidth - tw)/2, 0, tw, origHeight);
}
else { // crop height to be origWidth / newAspect
int th = (origWidth * newHeight) / newWidth;
r = cvRect(0, (origHeight - th)/2, origWidth, th);
}
IplImage *croppedImg = cropImage(origImg, r);
// Call this function again, with the new aspect ratio image.
// Will do a scaled image resize with the correct aspect ratio.
outImg = resizeImage(croppedImg, newWidth, newHeight, false);
cvReleaseImage( &croppedImg );
}
else {
// Scale the image to the new dimensions,
// even if the aspect ratio will be changed.
outImg = cvCreateImage(cvSize(newWidth, newHeight),
origImg->depth, origImg->nChannels);
if (newWidth > origImg->width && newHeight > origImg->height) {
// Make the image larger
cvResetImageROI((IplImage*)origImg);
// CV_INTER_LINEAR: good at enlarging.
// CV_INTER_CUBIC: good at enlarging.
cvResize(origImg, outImg, CV_INTER_LINEAR);
}
else {
// Make the image smaller
cvResetImageROI((IplImage*)origImg);
// CV_INTER_AREA: good at shrinking (decimation) only.
cvResize(origImg, outImg, CV_INTER_AREA);
}
}
return outImg;
}
void learn()
{
int i, offset;
// load training data
nTrainFaces = loadFaceImgArray("C:/Users/HP/Desktop/OpenCV/50_images_of_15_people.txt");
if( nTrainFaces < 2 )
{
fprintf(stderr,
"Need 2 or more training faces\n"
"Input file contains only %d\n", nTrainFaces);
return;
}
// do PCA on the training faces
doPCA();
// project the training images onto the PCA subspace
projectedTrainFaceMat = cvCreateMat( nTrainFaces, nEigens, CV_32FC1 );
offset = projectedTrainFaceMat->step / sizeof(float);
for(i=0; i<nTrainFaces; i++)
{
//int offset = i * nEigens;
cvEigenDecomposite(
faceImgArr[i],
nEigens,
eigenVectArr,
0, 0,
pAvgTrainImg,
//projectedTrainFaceMat->data.fl + i*nEigens);
projectedTrainFaceMat->data.fl + i*offset);
}
// store the recognition data as an xml file
storeTrainingData();
}
void recognize()
{
int i, nTestFaces = 0; // the number of test images
CvMat * trainPersonNumMat = 0; // the person numbers during training
float * projectedTestFace = 0;
// load test images and ground truth for person number
nTestFaces = loadFaceImgArray("C:/Users/HP/Desktop/OpenCV/test.txt");
printf("%d test faces loaded\n", nTestFaces);
// load the saved training data
if( !loadTrainingData( &trainPersonNumMat ) ) return;
// project the test images onto the PCA subspace
projectedTestFace = (float *)cvAlloc( nEigens*sizeof(float) );
for(i=0; i<nTestFaces; i++)
{
int iNearest, nearest, truth;
// project the test image onto the PCA subspace
cvEigenDecomposite(
faceImgArr[i],
nEigens,
eigenVectArr,
0, 0,
pAvgTrainImg,
projectedTestFace);
iNearest = findNearestNeighbor(projectedTestFace);
truth = personNumTruthMat->data.i[i];
nearest = trainPersonNumMat->data.i[iNearest];
printf("nearest = %d, Truth = %d\n", nearest, truth);
}
}
int loadTrainingData(CvMat ** pTrainPersonNumMat)
{
CvFileStorage * fileStorage;
int i;
// create a file-storage interface
fileStorage = cvOpenFileStorage( "facedata.xml", 0, CV_STORAGE_READ );
if( !fileStorage )
{
fprintf(stderr, "Can't open facedata.xml\n");
return 0;
}
nEigens = cvReadIntByName(fileStorage, 0, "nEigens", 0);
nTrainFaces = cvReadIntByName(fileStorage, 0, "nTrainFaces", 0);
*pTrainPersonNumMat = (CvMat *)cvReadByName(fileStorage, 0, "trainPersonNumMat", 0);
eigenValMat = (CvMat *)cvReadByName(fileStorage, 0, "eigenValMat", 0);
projectedTrainFaceMat = (CvMat *)cvReadByName(fileStorage, 0, "projectedTrainFaceMat", 0);
pAvgTrainImg = (IplImage *)cvReadByName(fileStorage, 0, "avgTrainImg", 0);
eigenVectArr = (IplImage **)cvAlloc(nTrainFaces*sizeof(IplImage *));
for(i=0; i<nEigens; i++)
{
char varname[200];
sprintf( varname, "eigenVect_%d", i );
eigenVectArr[i] = (IplImage *)cvReadByName(fileStorage, 0, varname, 0);
}
// release the file-storage interface
cvReleaseFileStorage( &fileStorage );
return 1;
}
void storeTrainingData()
{
CvFileStorage * fileStorage;
int i;
// create a file-storage interface
fileStorage = cvOpenFileStorage( "facedata.xml", 0, CV_STORAGE_WRITE );
// store all the data
cvWriteInt( fileStorage, "nEigens", nEigens );
cvWriteInt( fileStorage, "nTrainFaces", nTrainFaces );
cvWrite(fileStorage, "trainPersonNumMat", personNumTruthMat, cvAttrList(0,0));
cvWrite(fileStorage, "eigenValMat", eigenValMat, cvAttrList(0,0));
cvWrite(fileStorage, "projectedTrainFaceMat", projectedTrainFaceMat, cvAttrList(0,0));
cvWrite(fileStorage, "avgTrainImg", pAvgTrainImg, cvAttrList(0,0));
for(i=0; i<nEigens; i++)
{
char varname[200];
sprintf( varname, "eigenVect_%d", i );
cvWrite(fileStorage, varname, eigenVectArr[i], cvAttrList(0,0));
}
// release the file-storage interface
cvReleaseFileStorage( &fileStorage );
}
int findNearestNeighbor(float * projectedTestFace)
{
//double leastDistSq = 1e12;
double leastDistSq = DBL_MAX;
int i, iTrain, iNearest = 0;
for(iTrain=0; iTrain<nTrainFaces; iTrain++)
{
double distSq=0;
for(i=0; i<nEigens; i++)
{
float d_i =
projectedTestFace[i] -
projectedTrainFaceMat->data.fl[iTrain*nEigens + i];
//distSq += d_i*d_i / eigenValMat->data.fl[i]; // Mahalanobis
distSq += d_i*d_i; // Euclidean
}
if(distSq < leastDistSq)
{
leastDistSq = distSq;
iNearest = iTrain;
}
}
return iNearest;
}
void doPCA()
{
int i;
CvTermCriteria calcLimit;
CvSize faceImgSize;
// set the number of eigenvalues to use
nEigens = nTrainFaces-1;
// allocate the eigenvector images
faceImgSize.width = faceImgArr[0]->width;
faceImgSize.height = faceImgArr[0]->height;
eigenVectArr = (IplImage**)cvAlloc(sizeof(IplImage*) * nEigens);
for(i=0; i<nEigens; i++)
eigenVectArr[i] = cvCreateImage(faceImgSize, IPL_DEPTH_32F, 1);
// allocate the eigenvalue array
eigenValMat = cvCreateMat( 1, nEigens, CV_32FC1 );
// allocate the averaged image
pAvgTrainImg = cvCreateImage(faceImgSize, IPL_DEPTH_32F, 1);
// set the PCA termination criterion
calcLimit = cvTermCriteria( CV_TERMCRIT_ITER, nEigens, 1);
// compute average image, eigenvalues, and eigenvectors
cvCalcEigenObjects(
nTrainFaces,
(void*)faceImgArr,
(void*)eigenVectArr,
CV_EIGOBJ_NO_CALLBACK,
0,
0,
&calcLimit,
pAvgTrainImg,
eigenValMat->data.fl);
cvNormalize(eigenValMat, eigenValMat, 1, 0, CV_L1, 0);
}
int loadFaceImgArray(char * filename)
{
FILE * imgListFile = 0;
char imgFilename[512];
int iFace, nFaces=0;
// open the input file
if( !(imgListFile = fopen(filename, "r")) )
{
fprintf(stderr, "Can\'t open file %s\n", filename);
return 0;
}
// count the number of faces
while( fgets(imgFilename, 512, imgListFile) ) ++nFaces;
rewind(imgListFile);
// allocate the face-image array and person number matrix
faceImgArr = (IplImage **)cvAlloc( nFaces*sizeof(IplImage *) );
personNumTruthMat = cvCreateMat( 1, nFaces, CV_32SC1 );
// store the face images in an array
for(iFace=0; iFace<nFaces; iFace++)
{
// read person number and name of image file
fscanf(imgListFile,
"%d %s", personNumTruthMat->data.i+iFace, imgFilename);
// load the face image
faceImgArr[iFace] = cvLoadImage(imgFilename, CV_LOAD_IMAGE_GRAYSCALE);
if( !faceImgArr[iFace] )
{
fprintf(stderr, "Can\'t load image from %s\n", imgFilename);
return 0;
}
}
fclose(imgListFile);
return nFaces;
}
My answer may came late but it might be useful for pals if i answer it.I am working on a similar project and i have faced the same problem.I solved it by writing a function the saves or write the detected,cropped and preprocessed image on to the hard disk of my computer(Using CvWrite).And feeding the parameter of the saved images to the recognition part of the code. It has made my life easier.It has been a bit harder for me to to pass the parameters of the rect of the region of interest. If you or someone else did this it might be great sharing the code with us.
You can use the following code to save the image after resizing it to a constant value using the resizeimage function on you code.
void saveCroppedFaces(CvSeq* tempon,IplImage* DetectedImage)
{
char* name;
int nFaces;
CvRect rect;
nFaces=tempon->total;
name =new char[nFaces];
IplImage* cropped = 0;
IplImage* croppedResized=0;
Mat croped;
for(int k=0;k<nFaces;k++)
{
itoa(k,(name+k),10);
rect = *(CvRect*)cvGetSeqElem( tempon, k );
cropped= cropImage(DetectedImage,rect);
//i can resize the cropped faces in to a fixed size here
//i can write a function to save images and call it so
//that it will save it in to hard drive
//cvNamedWindow((name+k),CV_WINDOW_AUTOSIZE);
//cvShowImage((name+k),cropped);
croppedResized=resizeImage(cropped,60,60);
croped=IplToMatConverter(croppedResized);
saveROI(croped,itoa(k,(name+k),10));
cvReleaseImage(&cropped);
}
name=NULL;
delete[] name;
}
void saveROI(Mat mat,String outputFileName)
{
string store_path("C://Users/sizusuzu/Desktop/Images/FaceDetection2
/"+outputFileName+".jpg");
bool write_success = imwrite(store_path,mat);
}
After this you can change the IplImage* to Mat using
Mat IplToMatConverter(IplImage* imageToMat)
{
Mat mat = cvarrToMat(imageToMat);
return mat;
}
And use the Mat in FaceRecognizer API.Or just do the other/harder way.
Thanks
I just read
int _tmain(int argc, _TCHAR* argv[])
{
.......
}
part of your code. This code is used for detecting the face in the image. Lets say it is Face_x. Now extract features from Face_x, call it as F_x. In your database, you should store features {F_1, F_2,..., F_N} extracted from n different faces {Face_1, Face_2,..Face_N}.
Simple algorithm to recognize Face_x is to calculate Euclidean distances between F_x and n features. The minimum distance (below threshold) gives corresponding face. If the minimum distance is not below threshold then Face_x is a new face. Add feature F_x to database. This way you can increase your database. You can begin your algorithm with no features in database. With each new face, database grows.
I hope the method suggested by me will lead you to the solution

How to increase haar detector's window size in OpenCV

I am using the code available in this website: http://nashruddin.com/OpenCV_Face_Detection to do face detection.
I would like to increase the size of the detected face region. I am not sure how to do it. Need some help on it..
The code i am using is this:
//
#include "stdafx.h"
#include <stdio.h>
#include <cv.h>
#include <highgui.h>
CvHaarClassifierCascade *cascade;
CvMemStorage *storage;
void detectFaces( IplImage *img );
int main( int argc, char** argv )
{
CvCapture *capture;
IplImage *frame;
int key;
char *filename = "C:/OpenCV2.1/data/haarcascades/haarcascade_frontalface_alt.xml";
cascade = ( CvHaarClassifierCascade* )cvLoad( filename, 0, 0, 0 );
storage = cvCreateMemStorage( 0 );
capture = cvCaptureFromCAM( 0 );
assert( cascade && storage && capture );
cvNamedWindow( "video", 1 );
while( key != 'q' ) {
frame = cvQueryFrame( capture );
if( !frame ) {
fprintf( stderr, "Cannot query frame!\n" );
break;
}
cvFlip( frame, frame, -1 );
frame->origin = 0;
detectFaces( frame );
key = cvWaitKey( 10 );
}
cvReleaseCapture( &capture );
cvDestroyWindow( "video" );
cvReleaseHaarClassifierCascade( &cascade );
cvReleaseMemStorage( &storage );
return 0;
}
void detectFaces( IplImage *img )
{
int i;
CvSeq *faces = cvHaarDetectObjects(
img,
cascade,
storage,
1.1,
3,
0 /*CV_HAAR_DO_CANNY_PRUNNING*/,
cvSize( 40, 40 ) );
for( i = 0 ; i < ( faces ? faces->total : 0 ) ; i++ ) {
CvRect *r = ( CvRect* )cvGetSeqElem( faces, i );
cvRectangle( img,
cvPoint( r->x, r->y ),
cvPoint( r->x + r->width, r->y + r->height ),
CV_RGB( 255, 0, 0 ), 1, 8, 0 );
}
cvShowImage( "video", img );
}
This increases the size of the rectangle around the face. If you meant increasing the haar detector's window size, please update your question.
int padding_width = 30; // pixels
int padding_height = 30; // pixels
for( i = 0 ; i < ( faces ? faces->total : 0 ) ; i++ ) {
CvRect *r = ( CvRect* )cvGetSeqElem( faces, i );
// Yes yes, all of this could be written much more compactly.
// It was written like this for clarity.
int topleft_x = r->x - (padding_width / 2);
int topleft_y = r->y - (padding_height / 2);
if (topleft_x < 0)
topleft_x = 0;
if (topleft_y < 0)
topleft_y = 0;
int bottomright_x = r->x + r->width + (padding_width / 2);
int bottomright_y = r->y + r->height + (padding_height / 2);
if (bottomright_x >= img->width)
bottomright_x = img->width - 1;
if (bottomright_y >= img->height)
bottomright_y = img->height - 1;
cvRectangle( img,
cvPoint(topleft_x, topleft_y),
cvPoint(bottomright_x, bottomright_y),
CV_RGB( 255, 0, 0 ), 1, 8, 0 );
}

Resources