Embedding a video on another video using Opencv - opencv

I wish to run another video in the window of the main video. Here is the attempted code for it :
#include <cv.h>
#include <highgui.h>
#include <iostream>
using namespace std;
void OverlayImage(IplImage* src, IplImage* overlay, CvScalar S, CvScalar D) {
CvPoint location;
//location.x = (0.5*(src->width))-50;
//location.y = src->height-110;
//cout << location.x << " " << location.y << endl;
location.x = 100;
location.y = 100;
for (int i = location.y; i < (location.y + overlay->height); i++) {
for (int j = location.x; j < (location.x + overlay->width); j++) {
CvScalar source = cvGet2D(src, i, j);
CvScalar over = cvGet2D(overlay, i-location.y, j-location.x);
CvScalar merged;
for(int i = 0; i < 4; i++)
merged.val[i] = (S.val[i] * source.val[i] + D.val[i] * over.val[i]);
cvSet2D(src, i + location.y, j + location.x, merged);
}
}
}
int main (int argc, char* argv[]) {
CvCapture* capture = NULL;
CvCapture* ad = NULL;
capture = cvCaptureFromAVI("Cricketc11.avi");
ad = cvCaptureFromAVI("Cricketc1.avi");
assert(ad);
assert(capture);
cvNamedWindow("Video", 0);
int fps = ( int )cvGetCaptureProperty( capture, CV_CAP_PROP_FPS );
int noOfFrames = ( int )cvGetCaptureProperty( capture, CV_CAP_PROP_FRAME_COUNT );
int height = ( int )cvGetCaptureProperty( capture, CV_CAP_PROP_FRAME_HEIGHT );
int width = ( int )cvGetCaptureProperty( capture, CV_CAP_PROP_FRAME_WIDTH );
cout << height << " " << width << endl;
int fpsad = ( int )cvGetCaptureProperty( ad, CV_CAP_PROP_FPS );
int noOfFramesad = ( int )cvGetCaptureProperty( ad, CV_CAP_PROP_FRAME_COUNT );
int heightad = ( int )cvGetCaptureProperty( ad, CV_CAP_PROP_FRAME_HEIGHT );
int widthad = ( int )cvGetCaptureProperty( ad, CV_CAP_PROP_FRAME_WIDTH );
IplImage* tempimg = NULL;
IplImage* tempad = NULL;
while(capture) {
tempimg = cvQueryFrame(capture);
assert(tempimg);
if (ad) {
tempad = cvQueryFrame(ad);
assert(tempad);
IplImage* newimg = cvCreateImage(cvSize(100,100), IPL_DEPTH_8U, tempad->nChannels);
cvResize(tempad, newimg, 1);
OverlayImage(tempimg, newimg, cvScalar(0,0,0,0), cvScalar(1,1,1,1));
}
else
cvReleaseCapture(&ad);
cvWaitKey(1000/fps);
cvShowImage("Video", tempimg);
}
cvReleaseCapture(&capture);
cvDestroyAllWindows();
return 0;
}
This code runs fine only when the input videos are the same. If the videos are of different lengths or fps, it gives an error after the embedded video finishes.
How to correct that ?

What happens
Each time you call cvQueryFrame(source) the inner frame counter of the source is incremented.
This is why your second movie should be as long (speaking in frames) as the main movie.
As a workaround, I would suggest you to use an ad movie that has a number of frames (length * fps) equal to an integer ratio of the master movie and use temporary image buffers to hold the data you need.
An ideal solution would be to first interpolate the shortest (in frames) movie to the size of the longest, then merge them as you do, but temporal upsampling can be challenging to implement if you're not willing to use nearest neighbour or linear interpolation.
If the ad vido is smaller
You can choose among several solutions:
detect that you have reached the end and stop sending an image
detect that you have reached the end and re-open the ad movie from the beginning
use a temporary image to always keep in memory the last valid frame from the ad movie and send this image if there is no new one
etc.

Related

How to increase BatchSize with Tensorflow's C++ API?

I took the code in https://gist.github.com/kyrs/9adf86366e9e4f04addb (which takes an opencv cv::Mat image as input and converts it to tensor) and I use it to label images with the model inception_v3_2016_08_28_frozen.pb stated in the Tensorflow tutorial (https://www.tensorflow.org/tutorials/image_recognition#usage_with_the_c_api). Everything worked fine when using a batchsize of 1. However, when I increase the batchsize to 2 (or greater), the size of
finalOutput (which is of type std::vector) is zero.
Here's the code to reproduce the error:
// Only for VisualStudio
#define COMPILER_MSVC
#define NOMINMAX
#include <string>
#include <iostream>
#include <fstream>
#include <opencv2/opencv.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include "tensorflow/core/public/session.h"
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/framework/tensor.h"
int batchSize = 2;
int height = 299;
int width = 299;
int depth = 3;
int mean = 0;
int stdev = 255;
// Set image paths
cv::String pathFilenameImg1 = "D:/IMGS/grace_hopper.jpg";
cv::String pathFilenameImg2 = "D:/IMGS/lenna.jpg";
// Set model paths
std::string graphFile = "D:/Tensorflow/models/inception_v3_2016_08_28_frozen.pb";
std::string labelfile = "D:/Tensorflow/models/imagenet_slim_labels.txt";
std::string InputName = "input";
std::string OutputName = "InceptionV3/Predictions/Reshape_1";
void read_prepare_image(cv::String pathImg, cv::Mat &imgPrepared) {
// Read Color image:
cv::Mat imgBGR = cv::imread(pathImg);
// Now we resize the image to fit Model's expected sizes:
cv::Size s(height, width);
cv::Mat imgResized;
cv::resize(imgBGR, imgResized, s, 0, 0, cv::INTER_CUBIC);
// Convert the image to float and normalize data:
imgResized.convertTo(imgPrepared, CV_32FC1);
imgPrepared = imgPrepared - mean;
imgPrepared = imgPrepared / stdev;
}
int main()
{
// Read and prepare images using OpenCV:
cv::Mat img1, img2;
read_prepare_image(pathFilenameImg1, img1);
read_prepare_image(pathFilenameImg2, img2);
// creating a Tensor for storing the data
tensorflow::Tensor input_tensor(tensorflow::DT_FLOAT, tensorflow::TensorShape({ batchSize, height, width, depth }));
auto input_tensor_mapped = input_tensor.tensor<float, 4>();
// Copy images data into the tensor:
for (int b = 0; b < batchSize; ++b) {
const float * source_data;
if (b == 0)
source_data = (float*)img1.data;
else
source_data = (float*)img2.data;
for (int y = 0; y < height; ++y) {
const float* source_row = source_data + (y * width * depth);
for (int x = 0; x < width; ++x) {
const float* source_pixel = source_row + (x * depth);
const float* source_B = source_pixel + 0;
const float* source_G = source_pixel + 1;
const float* source_R = source_pixel + 2;
input_tensor_mapped(b, y, x, 0) = *source_R;
input_tensor_mapped(b, y, x, 1) = *source_G;
input_tensor_mapped(b, y, x, 2) = *source_B;
}
}
}
// Load the graph:
tensorflow::GraphDef graph_def;
ReadBinaryProto(tensorflow::Env::Default(), graphFile, &graph_def);
// create a session with the graph
std::unique_ptr<tensorflow::Session> session_inception(tensorflow::NewSession(tensorflow::SessionOptions()));
session_inception->Create(graph_def);
// run the loaded graph
std::vector<tensorflow::Tensor> finalOutput;
session_inception->Run({ { InputName,input_tensor } }, { OutputName }, {}, &finalOutput);
// Get Top 5 classes:
std::cerr << "final output size = " << finalOutput.size() << std::endl;
tensorflow::Tensor output = std::move(finalOutput.at(0));
auto scores = output.flat<float>();
std::cerr << "scores size=" << scores.size() << std::endl;
std::ifstream label(labelfile);
std::string line;
std::vector<std::pair<float, std::string>> sorted;
for (unsigned int i = 0; i <= 1000; ++i) {
std::getline(label, line);
sorted.emplace_back(scores(i), line);
}
std::sort(sorted.begin(), sorted.end());
std::reverse(sorted.begin(), sorted.end());
std::cout << "size of the sorted file is " << sorted.size() << std::endl;
for (unsigned int i = 0; i< 5; ++i)
std::cout << "The output of the current graph has category " << sorted[i].second << " with probability " << sorted[i].first << std::endl;
}
Do I miss anything? Any ideas?
Thanks in advance!
I had the same problem. When I changed to the model used in https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/benchmark (differente version of inception) bigger batch sizes work correctly.
Notice you need to change the input size from 299,299,3 to 224,224,3 and the input and output layer names to: input:0 and output:0
Probably the graph in the protobuf file had a fixed batch size of 1 and I was only changing the shape of the input, not the graph. The graph has to accept a variable batch size by setting the shape to (None, width, heihgt, channels). This is done when you freeze the graph. Since the graph we have is already frozen, there is no way to change the batch size at this point.

Openni opencv kinect Bad Memory allocation

Basically I've got a loop which goes through all the kinects depth pixels. If they are greater than 3000mm it sets the pixel value to black.
For some reason this works only at a close range while pointed to a wall. If I pull the kinect back (giving it a larger area to scan) I get a Bad Memory allocation error. My code can be found below. I get the bad memory allocation error inside that try catch statement. Most of the code is from the opencv kinect sample here and here.
i figured out the problem, its because the depth values are stored in an array instead of matrix, i need a better way of finding out which location in the array, the x.y of the pixels which start from 1,1 point to instead of the (i = x+y*640)
#include <opencv.hpp>
#include <iostream>
#include <string>
#include <stdio.h>
#include <OpenNI.h>
using namespace std;
using namespace cv;
int main()
{
openni::Device device;
openni::VideoStream depth;
const char* device_uri = openni::ANY_DEVICE;
openni::Status ret = openni::OpenNI::initialize();
// Open
ret =device.open( device_uri );
ret = depth.create( device, openni::SENSOR_DEPTH );
if ( ret == openni::STATUS_OK )
{
// Start Depth
depth.start();
}
// Get Depth Stream Min-Max Value
int minDepthValue = depth.getMinPixelValue();
int maxDepthValue = depth.getMaxPixelValue();
//cout << "Depth min-Max Value : " << minDepthValue << "-" << maxDepthValue << endl;
// Frame Information Reference
openni::VideoFrameRef depthFrame;
// Get Sensor Resolution Information
int dImgWidth = depth.getVideoMode().getResolutionX();
int dImgHeight = depth.getVideoMode().getResolutionY();
// Depth Image Matrix
cv::Mat dImg = cv::Mat( dImgHeight, dImgWidth, CV_8UC3 );
Mat grey= cvCreateImage(cvSize(640, 480), 8, 1); ;
for(;;)
{
depth.readFrame( &depthFrame );
openni::DepthPixel* depthImgRaw = (openni::DepthPixel*)depthFrame.getData();
for ( int i = 0 ; i < ( depthFrame.getDataSize() / sizeof( openni::DepthPixel ) ) ; i++ )
{
int idx = i * 3; // Grayscale
unsigned char* data = &dImg.data[idx];
int gray_scale = ( ( depthImgRaw[i] * 255 ) / ( maxDepthValue - minDepthValue ) );
data[0] = (unsigned char)~gray_scale;
data[1] = (unsigned char)~gray_scale;
data[2] = (unsigned char)~gray_scale;
}
openni::DepthPixel* depthpixels = (openni::DepthPixel*)depthFrame.getData();
cvtColor(dImg, grey, CV_RGB2GRAY);
int i ;
try{
for( int y =0; y < 480 ; y++){
//getting in to each pixel in a row
for(int x = 0; x < 640; x++){
//getting out the corresponding pixel value from the array
i = x+y*640;
if (depthpixels[i] >3000)
{
grey.at<unsigned char>(x,y) = 0;
}
}
}
}catch(exception e)
{cout << e.what() <<endl ;
cout <<depthpixels[i] <<endl ;
cout << i <<endl ;
}
// cv:imshow( "depth", dImg );
imshow("dpeth2", grey);
int k = cvWaitKey( 30 ); // About 30fps
if ( k == 0x1b )
break;
}
// Destroy Streams
depth.destroy();
// Close Device
device.close();
// Shutdown OpenNI
openni::OpenNI::shutdown();
return 0;
}
solved the problem simply by swapping my x and y around
for( y =0; y < 480 ; y++)
{
//getting in to each pixel in a row
for( x = 0; x < 640; x++)
{
if (depthpixels[i]>1500)
{
grey.at<unsigned char >(y,x) = 0;
}
if (depthpixels[i] <500)
{
grey.at<unsigned char >(y,x) = 0;
}
i++;
}
}

histogram on opencv

hey i tried to made a histogram that shows frames substraction, the code is running but i got gray window without result.
the message on the command window is:
Compiler did not align stack variables. Libavcodec has been miscompiled
and may be very slow or crash. This is not a bug in libavcodec,
but in the compiler. You may try recompiling using gcc >= 4.2.
Do not report crashes to FFmpeg developers.
OpenCV Error: Assertion failed (images[j].channels() == 1) in unknown function,
file ........\ocv\opencv\src\cv\cvhistogram.cpp, line 137
here is the code someone have an idea?thanks for help.....
int main()
{
int key = 0;
CvCapture* capture = cvCaptureFromAVI( "macroblock.mpg" );
IplImage* frame = cvQueryFrame( capture );
IplImage* currframe = cvCreateImage(cvGetSize(frame),IPL_DEPTH_8U,3);
IplImage* destframe = cvCreateImage(cvGetSize(frame),IPL_DEPTH_8U,3);
IplImage* imgHistogram = 0;
CvHistogram* hist;
if ( !capture )
{
fprintf( stderr, "Cannot open AVI!\n" );
return 1;
}
int fps = ( int )cvGetCaptureProperty( capture, CV_CAP_PROP_FPS );
cvNamedWindow( "dest", CV_WINDOW_AUTOSIZE );
cvNamedWindow( "imgHistogram", CV_WINDOW_AUTOSIZE );
while( key != 'x' )
{
frame = cvQueryFrame( capture );
currframe = cvCloneImage( frame );
frame = cvQueryFrame( capture );
cvSub(frame,currframe,destframe);
int bins = 256;
int hsize[] = {bins};
float max_value = 0, min_value = 0;
float value;
int normalized;
float xranges[] = {0, 256};
float* ranges[] = {xranges};
IplImage* planes[] = {destframe};
hist = cvCreateHist(1, hsize, CV_HIST_ARRAY, ranges,1);
cvCalcHist(planes, hist, 0, NULL);
cvGetMinMaxHistValue(hist, &min_value, &max_value);
// printf("Minimum Histogram Value: %f, Maximum Histogram Value: %f\n", min_value, max_value);
imgHistogram = cvCreateImage(cvSize(bins, 50),IPL_DEPTH_8U,3);
cvRectangle(imgHistogram, cvPoint(0,0), cvPoint(256,50), CV_RGB(255,255,255),-1);
for(int i=0; i < bins; i++){
value = cvQueryHistValue_1D(hist, i);
normalized = cvRound(value*50/max_value);
cvLine(imgHistogram,cvPoint(i,50), cvPoint(i,50-normalized), CV_RGB(0,0,0));
}
if(key==27 )break;
cvShowImage( "dest",destframe);
cvShowImage( "imgHistogram",imgHistogram);
key = cvWaitKey( 1000 / 10 );
}
cvDestroyWindow( "dest" );
cvReleaseCapture( &capture );
return 0;
}
Since you are trying to show a 1D histogram, the histogram plane needs to be in grayscale. So, you need to convert the resulting image from cvSub() to grayscale first. Try
IplImage *gray = NULL;
gray = cvCreateImage(cvGetSize(frame), IPL_DEPTH_8U, 1);
while(key != 'x') {
...
cvSub(frame, currframe, destframe);
cvCvtColor(destframe, gray, CV_BGR2GRAY);
...
IplImage* planes[] = {gray};
..
}
Let me know if it works for you.

Online Face Recognition using OpenCV

I am trying to implement online face recognition using the webcam. I am using this two websites as references
shervinemami.co.cc
cognotics.com
I have few questions:
In face recognition, there are 6 steps:
Grab a frame from the camera
Detect a face within the image
Crop the frame to show just the face
Convert the frame to greyscale
Preprocess the image
Recognize the person in the image.
I am able to do the first five steps. Last step i am not able to do. I am not sure how to link step 5 to step 6.
I have already created the train.txt file and test.txt file which contains the information of the training and testing images. I have already added the functions such as learn(), doPCA() to the code...
But the point is how to use these functions in the main to recognize the image that is already preprocessed.
Need some help on it...
Attached the code below:
// Real-time.cpp : Defines the entry point for the console application.
#include "stdafx.h"
#include <cv.h>
#include <cxcore.h>
#include <highgui.h>
#include <cvaux.h>
IplImage ** faceImgArr = 0; // array of face images
CvMat * personNumTruthMat = 0; // array of person numbers
int nTrainFaces = 0; // the number of training images
int nEigens = 0; // the number of eigenvalues
IplImage * pAvgTrainImg = 0; // the average image
IplImage ** eigenVectArr = 0; // eigenvectors
CvMat * eigenValMat = 0; // eigenvalues
CvMat * projectedTrainFaceMat = 0; // projected training faces
IplImage* getCameraFrame(CvCapture* &camera);
IplImage* detectFaces( IplImage *img ,CvHaarClassifierCascade* facecascade,CvMemStorage* storage );
CvRect detectFaceInImage(IplImage *inputImg, CvHaarClassifierCascade* cascade);
IplImage* preprocess( IplImage* inputImg);
IplImage* resizeImage(const IplImage *origImg, int newWidth,
int newHeight, bool keepAspectRatio);
void learn();
void recognize();
void doPCA();
void storeTrainingData();
int loadTrainingData(CvMat ** pTrainPersonNumMat);
int findNearestNeighbor(float * projectedTestFace);
int loadFaceImgArray(char * filename);
int _tmain(int argc, _TCHAR* argv[])
{
CvCapture* camera = 0; // The camera device.
CvMemStorage *storage;
cvNamedWindow( "Realtime:", CV_WINDOW_AUTOSIZE);
char *faceCascadeFilename = "C:/OpenCV2.1/data/haarcascades/haarcascade_frontalface_alt.xml";
CvHaarClassifierCascade* faceCascade;
faceCascade = (CvHaarClassifierCascade*)cvLoad(faceCascadeFilename, 0, 0, 0);
storage = cvCreateMemStorage( 0 );
learn();
while ( cvWaitKey(10) != 27 ) // Quit on "Escape" key
{
IplImage *frame = getCameraFrame(camera);
//IplImage* resized=cvCreateImage(cvSize(420,240),frame->depth,3);
//cvResizeWindow( "Image:", 640, 480);
//cvResize(frame,resized);
//cvShowImage( "Realtime:", resized );
IplImage *imgA = resizeImage(frame, 420,240, true);
IplImage *frame1 = detectFaces(imgA,faceCascade,storage);
frame1 = preprocess(frame1);
}
// Free the camera.
cvReleaseCapture( &camera );
cvReleaseMemStorage( &storage );
return 0;
}
IplImage* getCameraFrame(CvCapture* &camera)
{
IplImage *frame;
int w, h;
// If the camera hasn't been initialized, then open it.
if (!camera) {
printf("Acessing the camera ...\n");
camera = cvCreateCameraCapture( 0 );
if (!camera) {
printf("Couldn't access the camera.\n");
exit(1);
}
// Try to set the camera resolution to 320 x 240.
cvSetCaptureProperty(camera, CV_CAP_PROP_FRAME_WIDTH, 320);
cvSetCaptureProperty(camera, CV_CAP_PROP_FRAME_HEIGHT, 240);
// Get the first frame, to make sure the camera is initialized.
frame = cvQueryFrame( camera );
if (frame) {
w = frame->width;
h = frame->height;
printf("Got the camera at %dx%d resolution.\n", w, h);
}
// Wait a little, so that the camera can auto-adjust its brightness.
Sleep(1000); // (in milliseconds)
}
// Wait until the next camera frame is ready, then grab it.
frame = cvQueryFrame( camera );
if (!frame) {
printf("Couldn't grab a camera frame.\n");
exit(1);
}
return frame;
}
CvRect detectFaceInImage(IplImage *inputImg, CvHaarClassifierCascade* cascade)
{
// Smallest face size.
CvSize minFeatureSize = cvSize(20, 20);
// Only search for 1 face.
int flags = CV_HAAR_FIND_BIGGEST_OBJECT | CV_HAAR_DO_ROUGH_SEARCH;
// How detailed should the search be.
float search_scale_factor = 1.1f;
IplImage *detectImg;
IplImage *greyImg = 0;
CvMemStorage* storage;
CvRect rc;
double t;
CvSeq* rects;
CvSize size;
int i, ms, nFaces;
storage = cvCreateMemStorage(0);
cvClearMemStorage( storage );
// If the image is color, use a greyscale copy of the image.
detectImg = (IplImage*)inputImg;
if (inputImg->nChannels > 1) {
size = cvSize(inputImg->width, inputImg->height);
greyImg = cvCreateImage(size, IPL_DEPTH_8U, 1 );
cvCvtColor( inputImg, greyImg, CV_BGR2GRAY );
detectImg = greyImg; // Use the greyscale image.
}
// Detect all the faces in the greyscale image.
t = (double)cvGetTickCount();
rects = cvHaarDetectObjects( detectImg, cascade, storage,
search_scale_factor, 3, flags, minFeatureSize);
t = (double)cvGetTickCount() - t;
ms = cvRound( t / ((double)cvGetTickFrequency() * 1000.0) );
nFaces = rects->total;
printf("Face Detection took %d ms and found %d objects\n", ms, nFaces);
// Get the first detected face (the biggest).
if (nFaces > 0)
rc = *(CvRect*)cvGetSeqElem( rects, 0 );
else
rc = cvRect(-1,-1,-1,-1); // Couldn't find the face.
if (greyImg)
cvReleaseImage( &greyImg );
cvReleaseMemStorage( &storage );
//cvReleaseHaarClassifierCascade( &cascade );
return rc; // Return the biggest face found, or (-1,-1,-1,-1).
}
IplImage* detectFaces( IplImage *img ,CvHaarClassifierCascade* facecascade,CvMemStorage* storage )
{
int i;
CvRect *r;
CvSeq *faces = cvHaarDetectObjects(
img,
facecascade,
storage,
1.1,
3,
0 /*CV_HAAR_DO_CANNY_PRUNNING*/,
cvSize( 40, 40 ) );
int padding_width = 30; // pixels
int padding_height = 30; // pixels
for( i = 0 ; i < ( faces ? faces->total : 0 ) ; i++ ) {
r = ( CvRect* )cvGetSeqElem( faces, i );
cvRectangle( img,
cvPoint( r->x, r->y ),
cvPoint( r->x + r->width, r->y + r->height ),
CV_RGB( 255, 0, 0 ), 1, 8, 0 );
}
cvShowImage( "Realtime:", img );
//cropping the face
cvSetImageROI(img, cvRect(r->x,r->y,r->width,r->height));
IplImage *img2 = cvCreateImage(cvGetSize(img),
img->depth,
img->nChannels);
cvCopy(img, img2, NULL);
cvResetImageROI(img);
return img;
}
IplImage* preprocess( IplImage* inputImg){
IplImage *detectImg, *greyImg = 0;
IplImage *imageProcessed;
CvSize size;
detectImg = (IplImage*)inputImg;
if (inputImg->nChannels > 1) {
size = cvSize(inputImg->width, inputImg->height);
greyImg = cvCreateImage(size, IPL_DEPTH_8U, 1 );
cvCvtColor( inputImg, greyImg, CV_BGR2GRAY );
detectImg = greyImg; // Use the greyscale image.
}
imageProcessed = cvCreateImage(cvSize(inputImg->width, inputImg->height), IPL_DEPTH_8U, 1);
cvResize(detectImg, imageProcessed, CV_INTER_LINEAR);
cvEqualizeHist(imageProcessed, imageProcessed);
return imageProcessed;
}
IplImage* resizeImage(const IplImage *origImg, int newWidth,
int newHeight, bool keepAspectRatio)
{
IplImage *outImg = 0;
int origWidth;
int origHeight;
if (origImg) {
origWidth = origImg->width;
origHeight = origImg->height;
}
if (newWidth <= 0 || newHeight <= 0 || origImg == 0
|| origWidth <= 0 || origHeight <= 0) {
//cerr << "ERROR: Bad desired image size of " << newWidth
// << "x" << newHeight << " in resizeImage().\n";
exit(1);
}
if (keepAspectRatio) {
// Resize the image without changing its aspect ratio,
// by cropping off the edges and enlarging the middle section.
CvRect r;
// input aspect ratio
float origAspect = (origWidth / (float)origHeight);
// output aspect ratio
float newAspect = (newWidth / (float)newHeight);
// crop width to be origHeight * newAspect
if (origAspect > newAspect) {
int tw = (origHeight * newWidth) / newHeight;
r = cvRect((origWidth - tw)/2, 0, tw, origHeight);
}
else { // crop height to be origWidth / newAspect
int th = (origWidth * newHeight) / newWidth;
r = cvRect(0, (origHeight - th)/2, origWidth, th);
}
IplImage *croppedImg = cropImage(origImg, r);
// Call this function again, with the new aspect ratio image.
// Will do a scaled image resize with the correct aspect ratio.
outImg = resizeImage(croppedImg, newWidth, newHeight, false);
cvReleaseImage( &croppedImg );
}
else {
// Scale the image to the new dimensions,
// even if the aspect ratio will be changed.
outImg = cvCreateImage(cvSize(newWidth, newHeight),
origImg->depth, origImg->nChannels);
if (newWidth > origImg->width && newHeight > origImg->height) {
// Make the image larger
cvResetImageROI((IplImage*)origImg);
// CV_INTER_LINEAR: good at enlarging.
// CV_INTER_CUBIC: good at enlarging.
cvResize(origImg, outImg, CV_INTER_LINEAR);
}
else {
// Make the image smaller
cvResetImageROI((IplImage*)origImg);
// CV_INTER_AREA: good at shrinking (decimation) only.
cvResize(origImg, outImg, CV_INTER_AREA);
}
}
return outImg;
}
void learn()
{
int i, offset;
// load training data
nTrainFaces = loadFaceImgArray("C:/Users/HP/Desktop/OpenCV/50_images_of_15_people.txt");
if( nTrainFaces < 2 )
{
fprintf(stderr,
"Need 2 or more training faces\n"
"Input file contains only %d\n", nTrainFaces);
return;
}
// do PCA on the training faces
doPCA();
// project the training images onto the PCA subspace
projectedTrainFaceMat = cvCreateMat( nTrainFaces, nEigens, CV_32FC1 );
offset = projectedTrainFaceMat->step / sizeof(float);
for(i=0; i<nTrainFaces; i++)
{
//int offset = i * nEigens;
cvEigenDecomposite(
faceImgArr[i],
nEigens,
eigenVectArr,
0, 0,
pAvgTrainImg,
//projectedTrainFaceMat->data.fl + i*nEigens);
projectedTrainFaceMat->data.fl + i*offset);
}
// store the recognition data as an xml file
storeTrainingData();
}
void recognize()
{
int i, nTestFaces = 0; // the number of test images
CvMat * trainPersonNumMat = 0; // the person numbers during training
float * projectedTestFace = 0;
// load test images and ground truth for person number
nTestFaces = loadFaceImgArray("C:/Users/HP/Desktop/OpenCV/test.txt");
printf("%d test faces loaded\n", nTestFaces);
// load the saved training data
if( !loadTrainingData( &trainPersonNumMat ) ) return;
// project the test images onto the PCA subspace
projectedTestFace = (float *)cvAlloc( nEigens*sizeof(float) );
for(i=0; i<nTestFaces; i++)
{
int iNearest, nearest, truth;
// project the test image onto the PCA subspace
cvEigenDecomposite(
faceImgArr[i],
nEigens,
eigenVectArr,
0, 0,
pAvgTrainImg,
projectedTestFace);
iNearest = findNearestNeighbor(projectedTestFace);
truth = personNumTruthMat->data.i[i];
nearest = trainPersonNumMat->data.i[iNearest];
printf("nearest = %d, Truth = %d\n", nearest, truth);
}
}
int loadTrainingData(CvMat ** pTrainPersonNumMat)
{
CvFileStorage * fileStorage;
int i;
// create a file-storage interface
fileStorage = cvOpenFileStorage( "facedata.xml", 0, CV_STORAGE_READ );
if( !fileStorage )
{
fprintf(stderr, "Can't open facedata.xml\n");
return 0;
}
nEigens = cvReadIntByName(fileStorage, 0, "nEigens", 0);
nTrainFaces = cvReadIntByName(fileStorage, 0, "nTrainFaces", 0);
*pTrainPersonNumMat = (CvMat *)cvReadByName(fileStorage, 0, "trainPersonNumMat", 0);
eigenValMat = (CvMat *)cvReadByName(fileStorage, 0, "eigenValMat", 0);
projectedTrainFaceMat = (CvMat *)cvReadByName(fileStorage, 0, "projectedTrainFaceMat", 0);
pAvgTrainImg = (IplImage *)cvReadByName(fileStorage, 0, "avgTrainImg", 0);
eigenVectArr = (IplImage **)cvAlloc(nTrainFaces*sizeof(IplImage *));
for(i=0; i<nEigens; i++)
{
char varname[200];
sprintf( varname, "eigenVect_%d", i );
eigenVectArr[i] = (IplImage *)cvReadByName(fileStorage, 0, varname, 0);
}
// release the file-storage interface
cvReleaseFileStorage( &fileStorage );
return 1;
}
void storeTrainingData()
{
CvFileStorage * fileStorage;
int i;
// create a file-storage interface
fileStorage = cvOpenFileStorage( "facedata.xml", 0, CV_STORAGE_WRITE );
// store all the data
cvWriteInt( fileStorage, "nEigens", nEigens );
cvWriteInt( fileStorage, "nTrainFaces", nTrainFaces );
cvWrite(fileStorage, "trainPersonNumMat", personNumTruthMat, cvAttrList(0,0));
cvWrite(fileStorage, "eigenValMat", eigenValMat, cvAttrList(0,0));
cvWrite(fileStorage, "projectedTrainFaceMat", projectedTrainFaceMat, cvAttrList(0,0));
cvWrite(fileStorage, "avgTrainImg", pAvgTrainImg, cvAttrList(0,0));
for(i=0; i<nEigens; i++)
{
char varname[200];
sprintf( varname, "eigenVect_%d", i );
cvWrite(fileStorage, varname, eigenVectArr[i], cvAttrList(0,0));
}
// release the file-storage interface
cvReleaseFileStorage( &fileStorage );
}
int findNearestNeighbor(float * projectedTestFace)
{
//double leastDistSq = 1e12;
double leastDistSq = DBL_MAX;
int i, iTrain, iNearest = 0;
for(iTrain=0; iTrain<nTrainFaces; iTrain++)
{
double distSq=0;
for(i=0; i<nEigens; i++)
{
float d_i =
projectedTestFace[i] -
projectedTrainFaceMat->data.fl[iTrain*nEigens + i];
//distSq += d_i*d_i / eigenValMat->data.fl[i]; // Mahalanobis
distSq += d_i*d_i; // Euclidean
}
if(distSq < leastDistSq)
{
leastDistSq = distSq;
iNearest = iTrain;
}
}
return iNearest;
}
void doPCA()
{
int i;
CvTermCriteria calcLimit;
CvSize faceImgSize;
// set the number of eigenvalues to use
nEigens = nTrainFaces-1;
// allocate the eigenvector images
faceImgSize.width = faceImgArr[0]->width;
faceImgSize.height = faceImgArr[0]->height;
eigenVectArr = (IplImage**)cvAlloc(sizeof(IplImage*) * nEigens);
for(i=0; i<nEigens; i++)
eigenVectArr[i] = cvCreateImage(faceImgSize, IPL_DEPTH_32F, 1);
// allocate the eigenvalue array
eigenValMat = cvCreateMat( 1, nEigens, CV_32FC1 );
// allocate the averaged image
pAvgTrainImg = cvCreateImage(faceImgSize, IPL_DEPTH_32F, 1);
// set the PCA termination criterion
calcLimit = cvTermCriteria( CV_TERMCRIT_ITER, nEigens, 1);
// compute average image, eigenvalues, and eigenvectors
cvCalcEigenObjects(
nTrainFaces,
(void*)faceImgArr,
(void*)eigenVectArr,
CV_EIGOBJ_NO_CALLBACK,
0,
0,
&calcLimit,
pAvgTrainImg,
eigenValMat->data.fl);
cvNormalize(eigenValMat, eigenValMat, 1, 0, CV_L1, 0);
}
int loadFaceImgArray(char * filename)
{
FILE * imgListFile = 0;
char imgFilename[512];
int iFace, nFaces=0;
// open the input file
if( !(imgListFile = fopen(filename, "r")) )
{
fprintf(stderr, "Can\'t open file %s\n", filename);
return 0;
}
// count the number of faces
while( fgets(imgFilename, 512, imgListFile) ) ++nFaces;
rewind(imgListFile);
// allocate the face-image array and person number matrix
faceImgArr = (IplImage **)cvAlloc( nFaces*sizeof(IplImage *) );
personNumTruthMat = cvCreateMat( 1, nFaces, CV_32SC1 );
// store the face images in an array
for(iFace=0; iFace<nFaces; iFace++)
{
// read person number and name of image file
fscanf(imgListFile,
"%d %s", personNumTruthMat->data.i+iFace, imgFilename);
// load the face image
faceImgArr[iFace] = cvLoadImage(imgFilename, CV_LOAD_IMAGE_GRAYSCALE);
if( !faceImgArr[iFace] )
{
fprintf(stderr, "Can\'t load image from %s\n", imgFilename);
return 0;
}
}
fclose(imgListFile);
return nFaces;
}
My answer may came late but it might be useful for pals if i answer it.I am working on a similar project and i have faced the same problem.I solved it by writing a function the saves or write the detected,cropped and preprocessed image on to the hard disk of my computer(Using CvWrite).And feeding the parameter of the saved images to the recognition part of the code. It has made my life easier.It has been a bit harder for me to to pass the parameters of the rect of the region of interest. If you or someone else did this it might be great sharing the code with us.
You can use the following code to save the image after resizing it to a constant value using the resizeimage function on you code.
void saveCroppedFaces(CvSeq* tempon,IplImage* DetectedImage)
{
char* name;
int nFaces;
CvRect rect;
nFaces=tempon->total;
name =new char[nFaces];
IplImage* cropped = 0;
IplImage* croppedResized=0;
Mat croped;
for(int k=0;k<nFaces;k++)
{
itoa(k,(name+k),10);
rect = *(CvRect*)cvGetSeqElem( tempon, k );
cropped= cropImage(DetectedImage,rect);
//i can resize the cropped faces in to a fixed size here
//i can write a function to save images and call it so
//that it will save it in to hard drive
//cvNamedWindow((name+k),CV_WINDOW_AUTOSIZE);
//cvShowImage((name+k),cropped);
croppedResized=resizeImage(cropped,60,60);
croped=IplToMatConverter(croppedResized);
saveROI(croped,itoa(k,(name+k),10));
cvReleaseImage(&cropped);
}
name=NULL;
delete[] name;
}
void saveROI(Mat mat,String outputFileName)
{
string store_path("C://Users/sizusuzu/Desktop/Images/FaceDetection2
/"+outputFileName+".jpg");
bool write_success = imwrite(store_path,mat);
}
After this you can change the IplImage* to Mat using
Mat IplToMatConverter(IplImage* imageToMat)
{
Mat mat = cvarrToMat(imageToMat);
return mat;
}
And use the Mat in FaceRecognizer API.Or just do the other/harder way.
Thanks
I just read
int _tmain(int argc, _TCHAR* argv[])
{
.......
}
part of your code. This code is used for detecting the face in the image. Lets say it is Face_x. Now extract features from Face_x, call it as F_x. In your database, you should store features {F_1, F_2,..., F_N} extracted from n different faces {Face_1, Face_2,..Face_N}.
Simple algorithm to recognize Face_x is to calculate Euclidean distances between F_x and n features. The minimum distance (below threshold) gives corresponding face. If the minimum distance is not below threshold then Face_x is a new face. Add feature F_x to database. This way you can increase your database. You can begin your algorithm with no features in database. With each new face, database grows.
I hope the method suggested by me will lead you to the solution

OpenCV Max locations

I am working on an OpenCV project and am using cvMatchTemplate to locate part of an image I am then using cvMinMaxLoc to find the maximum area, therefore best match, my problem is that cvMinMaxLoc only returns one max location were as there may be multiple matches in one image.
Is there any way to return all the max locations above a particular threshold
I.e.
for each location > threshold
add location to array
I'm new to OpenCV and dont know if something like this already exists but so far I haven't been able to find anything
Any help greatly appreciated
I modified the matchTemplate tutorial to get you started. It basically uses a queue to track the top X match points, and later plots all of them. Hope that is helpful!
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <iostream>
#include <vector>
#include <limits>
#include <queue>
using namespace cv;
using namespace std;
void maxLocs(const Mat& src, queue<Point>& dst, size_t size)
{
float maxValue = -1.0f * numeric_limits<float>::max();
float* srcData = reinterpret_cast<float*>(src.data);
for(int i = 0; i < src.rows; i++)
{
for(int j = 0; j < src.cols; j++)
{
if(srcData[i*src.cols + j] > maxValue)
{
maxValue = srcData[i*src.cols + j];
dst.push(Point(j, i));
// pop the smaller one off the end if we reach the size threshold.
if(dst.size() > size)
{
dst.pop();
}
}
}
}
}
/// Global Variables
Mat img; Mat templ; Mat result;
string image_window = "Source Image";
string result_window = "Result window";
int match_method;
int max_Trackbar = 5;
/// Function Headers
void MatchingMethod( int, void* );
int main(int argc, char* argv[])
{
/// Load image and template
img = imread( "dogs.jpg", 1 );
templ = imread( "dog_templ.jpg", 1 );
/// Create windows
namedWindow( image_window, CV_WINDOW_AUTOSIZE );
namedWindow( result_window, CV_WINDOW_AUTOSIZE );
/// Create Trackbar
string trackbar_label = "Method: \n 0: SQDIFF \n 1: SQDIFF NORMED \n 2: TM CCORR \n 3: TM CCORR NORMED \n 4: TM COEFF \n 5: TM COEFF NORMED";
createTrackbar( trackbar_label, image_window, &match_method, max_Trackbar, MatchingMethod );
MatchingMethod( 0, 0 );
waitKey(0);
return 0;
}
/**
* #function MatchingMethod
* #brief Trackbar callback
*/
void MatchingMethod( int, void* )
{
/// Source image to display
Mat img_display;
img.copyTo( img_display );
/// Create the result matrix
int result_cols = img.cols - templ.cols + 1;
int result_rows = img.rows - templ.rows + 1;
result.create( result_cols, result_rows, CV_32FC1 );
/// Do the Matching and Normalize
matchTemplate( img, templ, result, match_method );
normalize( result, result, 0, 1, NORM_MINMAX, -1, Mat() );
/// For SQDIFF and SQDIFF_NORMED, the best matches are lower values. For all the other methods, the higher the better
if( match_method == CV_TM_SQDIFF || match_method == CV_TM_SQDIFF_NORMED )
{
result = 1.0 - result;
}
// get the top 100 maximums...
queue<Point> locations;
maxLocs(result, locations, 100);
/// Show me what you got
while(!locations.empty())
{
Point matchLoc = locations.front();
rectangle( img_display, matchLoc, Point( matchLoc.x + templ.cols , matchLoc.y + templ.rows ), Scalar::all(0), 2, 8, 0 );
rectangle( result, matchLoc, Point( matchLoc.x + templ.cols , matchLoc.y + templ.rows ), Scalar::all(0), 2, 8, 0 );
locations.pop();
}
imshow( image_window, img_display );
imshow( result_window, result );
return;
}
Try cvThreshold(src, dst, threshold, CV_THRESH_BINARY)
This would return an image in dst with all pixels above threshold as white and all others as black. You would then iterate through all the pixels and check if it is greater than 0 then that is a location you want. Something like this
char* data = dst->imageData;
int size = (dst->height) * (dst->width)
for (int i=0; i<size; i++)
{
if(data[i] > 0)
//copy i into your array
}

Resources