Related
Apologies if this seems trivial - relatively new to openCV.
Essentially, I'm trying to create a function that can take in a camera's image, the known world coordinates of that image, and the world coordinates of some other point 2, and then transform the camera's image to what it would look like if the camera was at point 2. From my understanding, the best way to tackle this is using a homography transformation using the warpPerspective tool.
The experiment is being done inside the Unreal Game simulation engine. Right now, I essentially read the data from the camera, and add a set transformation to the image. However, I seem to be doing something wrong as the image is looking something like this (original image first then distorted image):
Original Image
Distorted Image
This is the current code I have. Basically, it reads in the texture from Unreal engine, and then gets the individual pixel values and puts them into the openCV Mat. Then I try and apply my warpPerspective transformation. Interestingly, if I just try a simple warpAffine transformation (rotation), it works fine. I have seen this questions: Opencv virtually camera rotating/translating for bird's eye view, but I cannot figure out what I am doing wrong vs. their solution. I would really appreciate any help or guidance any of you may have. Thanks in advance!
ROSCamTextureRenderTargetRes->ReadPixels(ImageData);
cv::Mat image_data_matrix(TexHeight, TexWidth, CV_8UC3);
cv::Mat warp_dst, warp_rotate_dst;
int currCol = 0;
int currRow = 0;
cv::Vec3b* pixel_left = image_data_matrix.ptr<cv::Vec3b>(currRow);
for (auto color : ImageData)
{
pixel_left[currCol][2] = color.R;
pixel_left[currCol][1] = color.G;
pixel_left[currCol][0] = color.B;
currCol++;
if (currCol == TexWidth)
{
currRow++;
currCol = 0;
pixel_left = image_data_matrix.ptr<cv::Vec3b>(currRow);
}
}
warp_dst = cv::Mat(image_data_matrix.rows, image_data_matrix.cols, image_data_matrix.type());
double rotX = (45 - 90)*PI / 180;
double rotY = (90 - 90)*PI / 180;
double rotZ = (90 - 90)*PI / 180;
cv::Mat A1 = (cv::Mat_<float>(4, 3) <<
1, 0, (-1)*TexWidth / 2,
0, 1, (-1)*TexHeight / 2,
0, 0, 0,
0, 0, 1);
// Rotation matrices Rx, Ry, Rz
cv::Mat RX = (cv::Mat_<float>(4, 4) <<
1, 0, 0, 0,
0, cos(rotX), (-1)*sin(rotX), 0,
0, sin(rotX), cos(rotX), 0,
0, 0, 0, 1);
cv::Mat RY = (cv::Mat_<float>(4, 4) <<
cos(rotY), 0, (-1)*sin(rotY), 0,
0, 1, 0, 0,
sin(rotY), 0, cos(rotY), 0,
0, 0, 0, 1);
cv::Mat RZ = (cv::Mat_<float>(4, 4) <<
cos(rotZ), (-1)*sin(rotZ), 0, 0,
sin(rotZ), cos(rotZ), 0, 0,
0, 0, 1, 0,
0, 0, 0, 1);
// R - rotation matrix
cv::Mat R = RX * RY * RZ;
// T - translation matrix
cv::Mat T = (cv::Mat_<float>(4, 4) <<
1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, dist,
0, 0, 0, 1);
// K - intrinsic matrix
cv::Mat K = (cv::Mat_<float>(3, 4) <<
12.5, 0, TexHeight / 2, 0,
0, 12.5, TexWidth / 2, 0,
0, 0, 1, 0
);
cv::Mat warp_mat = K * (T * (R * A1));
//warp_mat = cv::getRotationMatrix2D(srcTri[0], 43.0, 1);
//cv::warpAffine(image_data_matrix, warp_dst, warp_mat, warp_dst.size());
cv::warpPerspective(image_data_matrix, warp_dst, warp_mat, image_data_matrix.size(), CV_INTER_CUBIC | CV_WARP_INVERSE_MAP);
cv::imshow("distort", warp_dst);
cv::imshow("imaage", image_data_matrix)
I am working on an object detection code and I chose the edge orientation histogram as a descriptor for the matching.
I am facing a problem in the back projected histogram as i don't seem to have a good matching , the back projected image is mostly white, which means that i cannot use meanshift or so for detection of the object.
Please help me regarding this matter:
here is what i've done so far:
Take an initial ROI (the target needed to be detected in the video stream).
convert ROI to grayscale
apply sobel operator for both x, y derivatives.
calculate orientation using opencv phase function (from derivative x and derivative y)
make a histogram of the generated orientation. with the following specs:
(range : 0 to 2 PI) , (single channel) , (256 bins)
normalize the histogram
the code for doing these steps is the following:
Mat ROI_grad_x, ROI_grad_y , ROI_grad , ROI_gray;
Mat ROI_abs_grad_x, ROI_abs_grad_y;
cvtColor(ROI, ROI_gray, CV_BGR2GRAY);
/// Gradient X
Sobel( ROI_gray, ROI_grad_x, CV_16S, 1, 0, 3 );
/// Gradient Y
Sobel( ROI_gray, ROI_grad_y, CV_16S, 0, 1, 3 );
convertScaleAbs( ROI_grad_x, ROI_abs_grad_x );
convertScaleAbs( ROI_grad_y, ROI_abs_grad_y );
addWeighted( ROI_abs_grad_x, 0.5, ROI_abs_grad_y, 0.5, 0, ROI_grad );
Mat ROI_orientation = Mat::zeros(ROI_abs_grad_x.rows, ROI_abs_grad_y.cols, CV_32F); //to store the gradients
Mat ROI_orientation_norm ;
ROI_grad_x.convertTo(ROI_grad_x,CV_32F);
ROI_grad_y.convertTo(ROI_grad_y,CV_32F);
phase(ROI_grad_x, ROI_grad_y, ROI_orientation , false);
Mat ROI_orientation_hist ;
float ROI_orientation_range[] = {0 , CV_PI};
const float *ROI_orientation_histRange[] = {ROI_orientation_range};
int ROI_orientation_histSize =256;
//calcHist( &ROI_orientation, 1, 0, Mat(), ROI_orientation_hist, 1, &ROI_orientation_histSize, &ROI_orientation_histRange , true, false);
calcHist( &ROI_orientation, 1, 0, Mat(), ROI_orientation_hist, 1, &ROI_orientation_histSize, ROI_orientation_histRange , true, false);
normalize( ROI_orientation_hist, ROI_orientation_hist, 0, 255, NORM_MINMAX, -1, Mat() );
then , and for each camera frame captured , I do the following steps:
convert to grayscale
apply sobel operator for both x derivative and y derivative.
compute orientation using phase opencv function (using the 2 derivatives mentioned above)
back project the histogram onto the orientation frame matrix to get the matches.
the code used for this part is the following :
Mat grad_x, grad_y , grad;
Mat abs_grad_x, abs_grad_y;
/// Gradient X
Sobel( frame_gray, grad_x, CV_16S, 1, 0, 3 );
/// Gradient Y
Sobel( frame_gray, grad_y, CV_16S, 0, 1, 3 );
convertScaleAbs( grad_x, abs_grad_x );
convertScaleAbs( grad_y, abs_grad_y );
addWeighted( abs_grad_x, 0.5, abs_grad_y, 0.5, 0, grad );
///======================
Mat orientation = Mat::zeros(abs_grad_x.rows, abs_grad_y.cols, CV_32F); //to store the gradients
Mat orientation_norm ;
grad_x.convertTo(grad_x,CV_32F);
grad_y.convertTo(grad_y,CV_32F);
phase(grad_x, grad_y, orientation , false);
Mat EOH_backProj ;
calcBackProject( &orientation, 1, 0, ROI_orientation_hist, EOH_backProj, ROI_orientation_histRange, 1, true );
So , what seems to be the problem in my approach ?!
Thanks alot.
I want to apply the Tenengrad algorithm to a central rectangular region inside the image. Assuming that I have the coordinates of the vertices of the rectangular region or maybe one corner and the dimensions, how can I modify the following code to apply the sharpness measure over the selected region ?
double tenengrad(const cv::Mat& src, int ksize)
{
cv::Mat Gx, Gy;
cv::Sobel(src, Gx, CV_64F, 1, 0, ksize);
cv::Sobel(src, Gy, CV_64F, 0, 1, ksize);
cv::Mat FM = Gx.mul(Gx) + Gy.mul(Gy);
double focusMeasure = cv::mean(FM).val[0];
return focusMeasure;
}
cv::Mat imageRegion;
imageRegion = src(cv::Rect(x, y, width, height));
creates a matrix that points to the region of the original image specified by the rectangle (x, y, width, height). Modifying imageRegion will modify the original image src. So you can use imageRegion instead of src
cv::Mat Gx, Gy;
cv::Sobel(imageRegion, Gx, CV_64F, 1, 0, ksize);
cv::Sobel(imageRegion, Gy, CV_64F, 0, 1, ksize);
cv::Mat FM = Gx.mul(Gx) + Gy.mul(Gy);
double focusMeasure = cv::mean(FM).val[0];
I have been studying the operation of the Kalman filter for a couple of days now to improve the performance of my face detection program. From the information I have gathered I have put together a code. The code for the Kalman filter part is as follows.
int Kalman(int X,int faceWidth,int Y,int faceHeight, IplImage *img1){
CvRandState rng;
const float T = 0.1;
// Initialize Kalman filter object, window, number generator, etc
cvRandInit( &rng, 0, 1, -1, CV_RAND_UNI );
//IplImage* img = cvCreateImage( cvSize(500,500), 8, 3 );
CvKalman* kalman = cvCreateKalman( 4, 4, 0 );
// Initializing with random guesses
// state x_k
CvMat* state = cvCreateMat( 4, 1, CV_32FC1 );
cvRandSetRange( &rng, 0, 0.1, 0 );
rng.disttype = CV_RAND_NORMAL;
cvRand( &rng, state );
// Process noise w_k
CvMat* process_noise = cvCreateMat( 4, 1, CV_32FC1 );
// Measurement z_k
CvMat* measurement = cvCreateMat( 4, 1, CV_32FC1 );
cvZero(measurement);
/* create matrix data */
const float A[] = {
1, 0, T, 0,
0, 1, 0, T,
0, 0, 1, 0,
0, 0, 0, 1
};
const float H[] = {
1, 0, 0, 0,
0, 0, 0, 0,
0, 0, 1, 0,
0, 0, 0, 0
};
//Didn't use this matrix in the end as it gave an error:'ambiguous call to overloaded function'
/* const float P[] = {
pow(320,2), pow(320,2)/T, 0, 0,
pow(320,2)/T, pow(320,2)/pow(T,2), 0, 0,
0, 0, pow(240,2), pow(240,2)/T,
0, 0, pow(240,2)/T, pow(240,2)/pow(T,2)
}; */
const float Q[] = {
pow(T,3)/3, pow(T,2)/2, 0, 0,
pow(T,2)/2, T, 0, 0,
0, 0, pow(T,3)/3, pow(T,2)/2,
0, 0, pow(T,2)/2, T
};
const float R[] = {
1, 0, 0, 0,
0, 0, 0, 0,
0, 0, 1, 0,
0, 0, 0, 0
};
//Copy created matrices into kalman structure
memcpy( kalman->transition_matrix->data.fl, A, sizeof(A));
memcpy( kalman->measurement_matrix->data.fl, H, sizeof(H));
memcpy( kalman->process_noise_cov->data.fl, Q, sizeof(Q));
//memcpy( kalman->error_cov_post->data.fl, P, sizeof(P));
memcpy( kalman->measurement_noise_cov->data.fl, R, sizeof(R));
//Initialize other Kalman Filter parameters
//cvSetIdentity( kalman->measurement_matrix, cvRealScalar(1) );
//cvSetIdentity( kalman->process_noise_cov, cvRealScalar(1e-5) );
/*cvSetIdentity( kalman->measurement_noise_cov, cvRealScalar(1e-1) );*/
cvSetIdentity( kalman->error_cov_post, cvRealScalar(1e-5) );
/* choose initial state */
kalman->state_post->data.fl[0]=X;
kalman->state_post->data.fl[1]=faceWidth;
kalman->state_post->data.fl[2]=Y;
kalman->state_post->data.fl[3]=faceHeight;
//cvRand( &rng, kalman->state_post );
/* predict position of point */
const CvMat* prediction=cvKalmanPredict(kalman,0);
//generate measurement (z_k)
cvRandSetRange( &rng, 0, sqrt(kalman->measurement_noise_cov->data.fl[0]), 0 );
cvRand( &rng, measurement );
cvMatMulAdd( kalman->measurement_matrix, state, measurement, measurement );
//Draw rectangles in detected face location
cvRectangle( img1,
cvPoint( kalman->state_post->data.fl[0], kalman->state_post->data.fl[2] ),
cvPoint( kalman->state_post->data.fl[1], kalman->state_post->data.fl[3] ),
CV_RGB( 0, 255, 0 ), 1, 8, 0 );
cvRectangle( img1,
cvPoint( prediction->data.fl[0], prediction->data.fl[2] ),
cvPoint( prediction->data.fl[1], prediction->data.fl[3] ),
CV_RGB( 0, 0, 255 ), 1, 8, 0 );
cvShowImage("Kalman",img1);
//adjust kalman filter state
cvKalmanCorrect(kalman,measurement);
cvMatMulAdd(kalman->transition_matrix, state, process_noise, state);
return 0;
}
In the face detection part(not shown), a box is drawn for the face detected. 'X, Y, faceWidth and faceHeight' are coordinates of the box and the width and the height passed into the Kalman filter. 'img1' is the current frame of a video.
Results:
Although I do get two new rectangles from the 'state_post' and 'prediction' data (as seen in the code), none of them seem to be any more stable than the initial box drawn without the Kalman filter.
Here are my questions:
Are the matrices initialized (transition matrix A, measurement matrix H etc.), correct for this four input case? (eg.4*4 matrices for four inputs?)
Can't we set every matrix to be an identity matrix?
Is the method I followed till plotting the rectangles theoretically correct? I followed the examples in this and the book 'Learning OpenCV' which don't use external inputs.
Any help regarding this would be much appreciated!
H[] should be identity if you measure directly from the image. If you have 4 measurements and you make 0 some values on the diagonal, you are making those expected measurements (x*H) 0, when it is not true. Then the innovation ( z- x*H) on the kalman filter will be high.
R[] should also be diagonal, though the covariance of the error of measurement can be different from one. If you have normalized coordinates ( the width=height=1), R could be something like 0.01. If you are dealing with pixel coordinates, R=diagonal_ones means an error of one pixel, and that's ok. You can try with 2,3,4, etc...
Your transition matrix A[], which is supposed to propagate the state on each frame, looks like a transition matrix for a state composed of x,y, v_x and v_y. You don't mention velocity in your model, you only talk about measurements. Be careful, do not confuse State (describes position of the face) with Measurements (used to update the state). Your state can be position, velocity and acceleration, and your measurements can be n points in the image. Or the x and y position of the face.
Hope this helps.
I've a calibrated camera where I exactly know the intrinsic and extrinsic data. Also the height of the camera is known. Now I want to virtually rotate the camera for getting a Bird's eye view, such that I can build the Homography matrix with the three rotation angles and the translation.
I know that 2 points can be transformed from one image to another via Homography as
x=K*(R-t*n/d)K^-1 * x'
there are a few things I'd like to know now:
if I want to bring back the image coordinate in ccs, I have to multiply it with K^-1, right? As Image coordinate I use (x',y',1) ?
Then I need to built a rotation matrix for rotating the ccs...but which convention should I use? And how do I know how to set up my WCS?
The next thing is the normal and the distance. Is it right just to take three points lying on the ground and compute the normal out of them? and is the distance then the camera height?
Also I'd like to know how I can change the height of the virtually looking bird view camera, such that I can say I want to see the ground plane from 3 meters height. How can I use the unit "meter" in the translation and homography Matrix?
So far for now, it would be great if someone could enlighten and help me. And please don't suggest generating the bird view with "getperspective", I ve already tried that but this way is not suitable for me.
Senna
That is the code i would advise (it's one of mine), to my mind it answers a lot of your questions,
If you want the distance, i would precise that it is in the Z matrix, the (4,3) coefficient.
Hope it will help you...
Mat source=imread("Whatyouwant.jpg");
int alpha_=90., beta_=90., gamma_=90.;
int f_ = 500, dist_ = 500;
Mat destination;
string wndname1 = getFormatWindowName("Source: ");
string wndname2 = getFormatWindowName("WarpPerspective: ");
string tbarname1 = "Alpha";
string tbarname2 = "Beta";
string tbarname3 = "Gamma";
string tbarname4 = "f";
string tbarname5 = "Distance";
namedWindow(wndname1, 1);
namedWindow(wndname2, 1);
createTrackbar(tbarname1, wndname2, &alpha_, 180);
createTrackbar(tbarname2, wndname2, &beta_, 180);
createTrackbar(tbarname3, wndname2, &gamma_, 180);
createTrackbar(tbarname4, wndname2, &f_, 2000);
createTrackbar(tbarname5, wndname2, &dist_, 2000);
imshow(wndname1, source);
while(true) {
double f, dist;
double alpha, beta, gamma;
alpha = ((double)alpha_ - 90.)*PI/180;
beta = ((double)beta_ - 90.)*PI/180;
gamma = ((double)gamma_ - 90.)*PI/180;
f = (double) f_;
dist = (double) dist_;
Size taille = source.size();
double w = (double)taille.width, h = (double)taille.height;
// Projection 2D -> 3D matrix
Mat A1 = (Mat_<double>(4,3) <<
1, 0, -w/2,
0, 1, -h/2,
0, 0, 0,
0, 0, 1);
// Rotation matrices around the X,Y,Z axis
Mat RX = (Mat_<double>(4, 4) <<
1, 0, 0, 0,
0, cos(alpha), -sin(alpha), 0,
0, sin(alpha), cos(alpha), 0,
0, 0, 0, 1);
Mat RY = (Mat_<double>(4, 4) <<
cos(beta), 0, -sin(beta), 0,
0, 1, 0, 0,
sin(beta), 0, cos(beta), 0,
0, 0, 0, 1);
Mat RZ = (Mat_<double>(4, 4) <<
cos(gamma), -sin(gamma), 0, 0,
sin(gamma), cos(gamma), 0, 0,
0, 0, 1, 0,
0, 0, 0, 1);
// Composed rotation matrix with (RX,RY,RZ)
Mat R = RX * RY * RZ;
// Translation matrix on the Z axis change dist will change the height
Mat T = (Mat_<double>(4, 4) <<
1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, dist,
0, 0, 0, 1);
// Camera Intrisecs matrix 3D -> 2D
Mat A2 = (Mat_<double>(3,4) <<
f, 0, w/2, 0,
0, f, h/2, 0,
0, 0, 1, 0);
// Final and overall transformation matrix
Mat transfo = A2 * (T * (R * A1));
// Apply matrix transformation
warpPerspective(source, destination, transfo, taille, INTER_CUBIC | WARP_INVERSE_MAP);
imshow(wndname2, destination);
waitKey(30);
}
This code works for me but I don't know why the Roll and Pitch angles are exchanged. When I change "alpha", the image is warped in pitch and when I change "beta" the image in warped in roll. So, I changed my rotation matrix, as can be seen below.
Also, the RY has a signal error. You can check Ry at: http://en.wikipedia.org/wiki/Rotation_matrix.
The rotation metrix I use:
Mat RX = (Mat_<double>(4, 4) <<
1, 0, 0, 0,
0, cos(beta), -sin(beta), 0,
0, sin(beta), cos(beta), 0,
0, 0, 0, 1);
Mat RY = (Mat_<double>(4, 4) <<
cos(alpha), 0, sin(alpha), 0,
0, 1, 0, 0,
-sin(alpha), 0, cos(alpha), 0,
0, 0, 0, 1);
Mat RZ = (Mat_<double>(4, 4) <<
cos(gamma), -sin(gamma), 0, 0,
sin(gamma), cos(gamma), 0, 0,
0, 0, 1, 0,
0, 0, 0, 1);
Regards