Related
I'm trying to get opencv camera calibration working but having trouble getting it to output valid data. I have an uncalibrated camera that I would like to calibrate, but to test my code I am using an Azure Kinect camera (the color camera), since the SDK supplies the correct intrinsics for it and I can verify them. I've collected 30 images of a chessboard from slightly different angles, which I understand should be sufficient, and run the calibration function, but no matter what flags I pass in I get values for fx and fy that are pretty different from the correct fx and fy, and distortion coefficients that are WILDLY different. Am I doing something wrong? Do I need more or better data?
A sample of the images I'm using can be found here: https://www.dropbox.com/sh/9pa94uedoe5mlxz/AABisSvgWwBT-bY65lfzp2N3a?dl=0
Save them in c:\calibration_test to run the code below.
#include <filesystem>
#include <iostream>
#include <opencv2/core.hpp>
#include <opencv2/calib3d/calib3d.hpp>
#include <opencv2/features2d/features2d.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/imgcodecs.hpp>
using namespace std;
namespace fs = experimental::filesystem;
static bool extractCorners(cv::Mat colorImage, vector<cv::Point3f>& corners3d, vector<cv::Point2f>& corners)
{
// Each square is 20x20mm
const float kSquareSize = 0.020f;
const cv::Size boardSize(7, 9);
const cv::Point3f kCenterOffset((float)(boardSize.width - 1) * kSquareSize, (float)(boardSize.height - 1) * kSquareSize, 0.f);
cv::Mat image;
cv::cvtColor(colorImage, image, cv::COLOR_BGRA2GRAY);
int chessBoardFlags = cv::CALIB_CB_ADAPTIVE_THRESH | cv::CALIB_CB_NORMALIZE_IMAGE;
if (!cv::findChessboardCorners(image, boardSize, corners, chessBoardFlags))
{
return false;
}
cv::cornerSubPix(image, corners, cv::Size(11, 11), cv::Size(-1, -1),
cv::TermCriteria(cv::TermCriteria::EPS + cv::TermCriteria::COUNT, 30, 0.1));
// Construct the corners
for (int i = 0; i < boardSize.height; ++i)
for (int j = 0; j < boardSize.width; ++j)
corners3d.push_back(cv::Point3f(j * kSquareSize, i * kSquareSize, 0) - kCenterOffset);
return true;
}
int main()
{
vector<cv::Mat> frames;
for (const auto& p : fs::directory_iterator("c:\\calibration_test\\"))
{
frames.push_back(cv::imread(p.path().string()));
}
int numFrames = (int)frames.size();
vector<vector<cv::Point2f>> corners(numFrames);
vector<vector<cv::Point3f>> corners3d(numFrames);
int framesWithCorners = 0;
for (int i = 0; i < numFrames; ++i)
{
if (extractCorners(frames[i], corners3d[framesWithCorners], corners[framesWithCorners]))
{
++framesWithCorners;
}
}
numFrames = framesWithCorners;
corners.resize(numFrames);
corners3d.resize(numFrames);
// Camera intrinsics come from the Azure Kinect API
cv::Matx33d cameraMatrix(
914.111755f, 0.f, 960.887390f,
0.f, 913.880615f, 551.566528f,
0.f, 0.f, 1.f);
vector<float> distCoeffs = { 0.576340079f, -2.71203661f, 0.000563957903f, -0.000239689150f, 1.54344523f, 0.454746544f, -2.53860712f, 1.47272563f };
cv::Size imageSize = frames[0].size();
vector<cv::Point3d> rotations;
vector<cv::Point3d> translations;
int flags = cv::CALIB_USE_INTRINSIC_GUESS | cv::CALIB_FIX_PRINCIPAL_POINT | cv::CALIB_RATIONAL_MODEL;
double result = cv::calibrateCamera(corners3d, corners, imageSize, cameraMatrix, distCoeffs, rotations, translations,
flags);
// After this call, cameraMatrix has different values for fx and fy, and WILDLY different distortion coefficients.
cout << "fx: " << cameraMatrix(0, 0) << endl;
cout << "fy: " << cameraMatrix(1, 1) << endl;
cout << "cx: " << cameraMatrix(0, 2) << endl;
cout << "cy: " << cameraMatrix(1, 2) << endl;
for (size_t i = 0; i < distCoeffs.size(); ++i)
{
cout << "d" << i << ": " << distCoeffs[i] << endl;
}
return 0;
}
Some sample output is:
fx: 913.143
fy: 917.965
cx: 960.887
cy: 551.567
d0: 0.327596
d1: -73.1837
d2: -0.00125972
d3: 0.002805
d4: -7.93086
d5: 0.295437
d6: -73.481
d7: -3.25043
d8: 0
d9: 0
d10: 0
d11: 0
d12: 0
d13: 0
Any idea what I'm doing wrong?
Bonus question: Why do I get 14 distortion coefficients back instead of 8? If I leave off CALIB_RATIONAL_MODEL then I only get 5 (three radial and two tangential).
You need to take images from the whole field of view of the camera to correctly capture the lens distortion characteristics. The images you provide only show the chessboad in one position, slightly angled.
Ideally you should have images of the chessboard evenly distributed over the x and y axis of the image plane, right up to the edges of the image. Make sure sufficient white boarder around the board is always visible though for detection robustness.
You should also try to capture images where the chessboard is nearer to the camera and farther away, not just a uniform distance. The different angles you provide look good on the other hand.
You can find an extensive guide how to ensure good calibration results in this answer: How to verify the correctness of calibration of a webcam?
Comparing your camera matrix to the one coming from Azure Kinect API it doesn't look so bad. The principle point is pretty spot on and the focal length is in a reasonable range. If you improve the quality of the input with my tips and the SO answer I have provided the results should be even closer. Comparing sets of distortion coefficients by their distance doesn't really work that well, the error function is not convex so you can have lots of local minima that produce relatively good results but they are far from the global minimum that would yield the best results. If that explanation makes sense to you.
Regarding your bonus question: I only see 8 values filled in in the output you return, the rest is 0 so doesn't have any influence. I'm not sure if the output is expected to be different from that function.
We captured a 3d Image using Kinect with OpenNI Library and got the rgb and depth images in the form of OpenCV Mat using this code.
main()
{
OpenNI::initialize();
puts( "Kinect initialization..." );
Device device;
if ( device.open( openni::ANY_DEVICE ) != 0 )
{
puts( "Kinect not found !" );
return -1;
}
puts( "Kinect opened" );
VideoStream depth, color;
color.create( device, SENSOR_COLOR );
color.start();
puts( "Camera ok" );
depth.create( device, SENSOR_DEPTH );
depth.start();
puts( "Depth sensor ok" );
VideoMode paramvideo;
paramvideo.setResolution( 640, 480 );
paramvideo.setFps( 30 );
paramvideo.setPixelFormat( PIXEL_FORMAT_DEPTH_100_UM );
depth.setVideoMode( paramvideo );
paramvideo.setPixelFormat( PIXEL_FORMAT_RGB888 );
color.setVideoMode( paramvideo );
puts( "Réglages des flux vidéos ok" );
// If the depth/color synchronisation is not necessary, start is faster :
//device.setDepthColorSyncEnabled( false );
// Otherwise, the streams can be synchronized with a reception in the order of our choice :
device.setDepthColorSyncEnabled( true );
device.setImageRegistrationMode( openni::IMAGE_REGISTRATION_DEPTH_TO_COLOR );
VideoStream** stream = new VideoStream*[2];
stream[0] = &depth;
stream[1] = &color;
puts( "Kinect initialization completed" );
if ( device.getSensorInfo( SENSOR_DEPTH ) != NULL )
{
VideoFrameRef depthFrame, colorFrame;
cv::Mat colorcv( cv::Size( 640, 480 ), CV_8UC3, NULL );
cv::Mat depthcv( cv::Size( 640, 480 ), CV_16UC1, NULL );
cv::namedWindow( "RGB", CV_WINDOW_AUTOSIZE );
cv::namedWindow( "Depth", CV_WINDOW_AUTOSIZE );
int changedIndex;
while( device.isValid() )
{
OpenNI::waitForAnyStream( stream, 2, &changedIndex );
switch ( changedIndex )
{
case 0:
depth.readFrame( &depthFrame );
if ( depthFrame.isValid() )
{
depthcv.data = (uchar*) depthFrame.getData();
cv::imshow( "Depth", depthcv );
}
break;
case 1:
color.readFrame( &colorFrame );
if ( colorFrame.isValid() )
{
colorcv.data = (uchar*) colorFrame.getData();
cv::cvtColor( colorcv, colorcv, CV_BGR2RGB );
cv::imshow( "RGB", colorcv );
}
break;
default:
puts( "Error retrieving a stream" );
}
cv::waitKey( 1 );
}
cv::destroyWindow( "RGB" );
cv::destroyWindow( "Depth" );
}
depth.stop();
depth.destroy();
color.stop();
color.destroy();
device.close();
OpenNI::shutdown();
}
We added some code to above and got the RGB and depth Mat from that and the we processed RGB using OpenCV.
Now we need to display that image in 3D.
We are using :-
1) Windows 8 x64
2) Visual Studio 2012 x64
3) OpenCV 2.4.10
4) OpenNI 2.2.0.33
5) Kinect1
6) Kinect SDK 1.8.0
Questions :-
1) Can we directly display this Image using OpenCV OR we need any external libraries ??
2) If we need to use external Libraries which one is better for this simple task OpenGL, PCL Or any other ??
3) Does PCL support Visual Studio 12 and OpenNI2 and Since PCL comes packed with other version of OpenNI does these two versions conflict??
To improve the answer of antarctician,
to display the image in 3D you need to create your point cloud first...
The RGB and Depth images give you the necessary data to create an organized colored pointcloud. To do so, you need to calculate the x,y,z values for each point. The z value comes from the depth pixel, but the x and y must be calculated.
to do it you can do something like this:
void Viewer::get_pcl(cv::Mat& color_mat, cv::Mat& depth_mat, pcl::PointCloud<pcl::PointXYZRGBA>& cloud ){
float x,y,z;
for (int j = 0; j< depth_mat.rows; j ++){
for(int i = 0; i < depth_mat.cols; i++){
// the RGB data is created
PCD_BGRA pcd_BGRA;
pcd_BGRA.B = color_mat.at<cv::Vec3b>(j,i)[0];
pcd_BGRA.R = color_mat.at<cv::Vec3b>(j,i)[2];
pcd_BGRA.G = color_mat.at<cv::Vec3b>(j,i)[1];
pcd_BGRA.A = 0;
pcl::PointXYZRGBA vertex;
int depth_value = (int) depth_mat.at<unsigned short>(j,i);
// find the world coordinates
openni::CoordinateConverter::convertDepthToWorld(depth, i, j, (openni::DepthPixel) depth_mat.at<unsigned short>(j,i), &x, &y,&z );
// the point is created with depth and color data
if ( limitx_min <= i && limitx_max >=i && limity_min <= j && limity_max >= j && depth_value != 0 && depth_value <= limitz_max && depth_value >= limitz_min){
vertex.x = (float) x;
vertex.y = (float) y;
vertex.z = (float) depth_value;
} else {
// if the data is outside the boundaries
vertex.x = bad_point;
vertex.y = bad_point;
vertex.z = bad_point;
}
vertex.rgb = pcd_BGRA.RGB_float;
// the point is pushed back in the cloud
cloud.points.push_back( vertex );
}
}
}
and PCD_BGRA is
union PCD_BGRA
{
struct
{
uchar B; // LSB
uchar G; // ---
uchar R; // MSB
uchar A; //
};
float RGB_float;
uint RGB_uint;
};
Of course, this is for the case you want to use PCL, but it is more or less the calculations of the x,y,z values stands. This relies on openni::CoordinateConverter::convertDepthToWorld to find the position of the point in 3D. You may also do this manually
const float invfocalLength = 1.f / 525.f;
const float centerX = 319.5f;
const float centerY = 239.5f;
const float factor = 1.f / 1000.f;
float dist = factor * (float)(*depthdata);
p.x = (x-centerX) * dist * invfocalLength;
p.y = (y-centerY) * dist * invfocalLength;
p.z = dist;
Where centerX, centerY, and focallength are the intrinsic calibration of the camera (this one is for Kinect). and the factor it is if you need the distance in meters or millimeters... this value depends on your program
For the questions:
Yes, you can display it using the latest OpenCV with the viz class or with another external library that suits your needs.
OpenGl is nice, but PCL (or OpenCV) is easier to use if you do not know how to use any of them (I mean for displaying pointclouds)
I haven't use it with windows, but in theory it can be used with visual studio 2012. As far as I know the version that PCL comes packed with is OpenNI 1, and it won't affect OpenNI2...
I haven't done this with OpenNI and OpenCV but I hope I can help you. So first of all to answer your first two questions:
Probably yes, as far as I understood you want to visualize a 3D point cloud. OpenCV is only an image processing library and you would need a 3D rendering library to do what you want.
I worked with OpenSceneGraph and would recommend it. However you can also use OpenGL or Direct X.
If you only want to visualize a point cloud such as the "3D View" of the Kinect Studio, you wouldn't need PCL as it would be too much for this simple job.
The basic idea of doing this task is to create 3D quads as the same number of pixels you have on your images. For example if you have a 640x480 resolution, you would need 640*480 quads. Each quad would have the color of the corresponding pixel depending on the pixel values from the color image. You would then move these quads back and forth on the Z axis, depending on the values from the depth image. This can be done with modern OpenGL or if you feel more comfortable with C++, OpenSceneGraph(which is also based on OpenGL).
You would have to be careful about two things:
Drawing so many quads can be slow even on a modern computer. You would need to read about "instanced rendering" to render a large number of instances of an object(in our case a quad) in a single GPU draw call. This can be done using the vertex shader.
Since the RGB and the Depth camera of the Kinect have different physical locations, you would need to calibrate both of them. There are functions to do this in the official Kinect SDK, however I don't know about OpenNI.
If you decide to do this with OpenGL, I would suggest reading about the GPU pipeline if you aren't familiar with it. This would help you to save a lot of time when working with the vertex shaders.
I'm trying to implement in OpenCV an algorithm to bring out the details of a palm vein pattern. I've based myself on a paper called "A Contactless Biometric System Using Palm Print and Palm Vein Features" that I've found on the Internet. The part I'm interested in is the chapter 3.2 Pre-processing. The steps involved are shown there.
I'd like to do the implementation using OpenCV but until now I'm stuck hard. Especially they use a Laplacian filter on the response of a low-pass filter to isolate the principal veins but my result gets very noisy, no matter the parameters I try!
Any help would be greatly appreciated!
Ok finally I've figured out by myself how to do it. Here is my code :
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#define THRESHOLD 150
#define BRIGHT 0.7
#define DARK 0.2
using namespace std;
using namespace cv;
int main()
{
// Read source image in grayscale mode
Mat img = imread("roi.png", CV_LOAD_IMAGE_GRAYSCALE);
// Apply ??? algorithm from https://stackoverflow.com/a/14874992/2501769
Mat enhanced, float_gray, blur, num, den;
img.convertTo(float_gray, CV_32F, 1.0/255.0);
cv::GaussianBlur(float_gray, blur, Size(0,0), 10);
num = float_gray - blur;
cv::GaussianBlur(num.mul(num), blur, Size(0,0), 20);
cv::pow(blur, 0.5, den);
enhanced = num / den;
cv::normalize(enhanced, enhanced, 0.0, 255.0, NORM_MINMAX, -1);
enhanced.convertTo(enhanced, CV_8UC1);
// Low-pass filter
Mat gaussian;
cv::GaussianBlur(enhanced, gaussian, Size(0,0), 3);
// High-pass filter on computed low-pass image
Mat laplace;
Laplacian(gaussian, laplace, CV_32F, 19);
double lapmin, lapmax;
minMaxLoc(laplace, &lapmin, &lapmax);
double scale = 127/ max(-lapmin, lapmax);
laplace.convertTo(laplace, CV_8U, scale, 128);
// Thresholding using empirical value of 150 to create a vein mask
Mat mask;
cv::threshold(laplace, mask, THRESHOLD, 255, CV_THRESH_BINARY);
// Clean-up the mask using open morphological operation
morphologyEx(mask,mask,cv::MORPH_OPEN,
getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(5,5)));
// Connect the neighboring areas using close morphological operation
Mat connected;
morphologyEx(mask,mask,cv::MORPH_CLOSE,
getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(11,11)));
// Blurry the mask for a smoother enhancement
cv::GaussianBlur(mask, mask, Size(15,15), 0);
// Blurry a little bit the image as well to remove noise
cv::GaussianBlur(enhanced, enhanced, Size(3,3), 0);
// The mask is used to amplify the veins
Mat result(enhanced);
ushort new_pixel;
double coeff;
for(int i=0;i<mask.rows;i++){
for(int j=0;j<mask.cols;j++){
coeff = (1.0-(mask.at<uchar>(i,j)/255.0))*BRIGHT + (1-DARK);
new_pixel = coeff * enhanced.at<uchar>(i,j);
result.at<uchar>(i,j) = (new_pixel>255) ? 255 : new_pixel;
}
}
// Show results
imshow("frame", img);
waitKey();
imshow("frame", result);
waitKey();
return 0;
}
So the main steps of the paper are followed here. For some parts I've inspired myself on code I've found. It's the case for the first processing I apply that I've found here. Also for the High-pass filter (laplacian) I've inspired myself on the code given in OpenCV 2 Computer Vision Application Programming Cookbook.
Finally I've done some little improvements by allowing to modify the brightness of the background and the darkness of the veins (see defines BRIGHT and DARK). I've also decided to blur a bit the mask to have a more "natural" enhancement.
Here the results (Source / Paper result / My result) :
I am trying to get the pose of the camera with the help of solvePNP() from OpenCV.
After running my program I get the following errors:
OpenCV Error: Assertion failed (npoints >= 0 && npoints == std::max(ipoints.checkVector(2, CV_32F), ipoints.checkVector(2, CV_64F))) in solvePnP, file /opt/local/var/macports/build/_opt_local_var_macports_sources_rsync.macports.org_release_tarballs_ports_graphics_opencv/opencv/work/OpenCV-2.4.2/modules/calib3d/src/solvepnp.cpp, line 55
libc++abi.dylib: terminate called throwing an exception
I tried to search how to solve these errors, but I couldn't resolve it unfortunately!
Here is my code, all comment/help is much appreciated:
enum Pattern { NOT_EXISTING, CHESSBOARD, CIRCLES_GRID, ASYMMETRIC_CIRCLES_GRID };
void calcBoardCornerPositions(Size boardSize, float squareSize, vector<Point3f>& corners,
Pattern patternType)
{
corners.clear();
switch(patternType)
{
case CHESSBOARD:
case CIRCLES_GRID:
for( int i = 0; i < boardSize.height; ++i )
for( int j = 0; j < boardSize.width; ++j )
corners.push_back(Point3f(float( j*squareSize ), float( i*squareSize ), 0));
break;
case ASYMMETRIC_CIRCLES_GRID:
for( int i = 0; i < boardSize.height; i++ )
for( int j = 0; j < boardSize.width; j++ )
corners.push_back(Point3f(float((2*j + i % 2)*squareSize), float(i*squareSize), 0));
break;
}
}
int main(int argc, char* argv[])
{
float squareSize = 50.f;
Pattern calibrationPattern = CHESSBOARD;
//vector<Point2f> boardCorners;
vector<vector<Point2f> > imagePoints(1);
vector<vector<Point3f> > boardPoints(1);
Size boardSize;
boardSize.width = 9;
boardSize.height = 6;
vector<Mat> intrinsics, distortion;
string filename = "out_camera_xml.xml";
FileStorage fs(filename, FileStorage::READ);
fs["camera_matrix"] >> intrinsics;
fs["distortion_coefficients"] >> distortion;
fs.release();
vector<Mat> rvec, tvec;
Mat img = imread(argv[1], CV_LOAD_IMAGE_GRAYSCALE); // at kell adnom egy kepet
bool found = findChessboardCorners(img, boardSize, imagePoints[0], CV_CALIB_CB_ADAPTIVE_THRESH);
calcBoardCornerPositions(boardSize, squareSize, boardPoints[0], calibrationPattern);
boardPoints.resize(imagePoints.size(),boardPoints[0]);
//***Debug start***
cout << imagePoints.size() << endl << boardPoints.size() << endl << intrinsics.size() << endl << distortion.size() << endl;
//***Debug end***
solvePnP(Mat(boardPoints), Mat(imagePoints), intrinsics, distortion, rvec, tvec);
for(int i=0; i<rvec.size(); i++) {
cout << rvec[i] << endl;
}
return 0;
}
EDIT (some debug info):
I debugged it row by row. I stepped into all of the functions. I am getting the Assertion failed in SolvePNP(...). You can see below what I see when I step into the solvePNP function. First it jumps over the first if statement /if(vec.empty())/, and goes into the second if statement /if( !copyData )/, there when it executes the last line /*datalimit = dataend = datastart + rows*step[0]*/ jumps back to the first if statement and returns => than I get the Assertion failed error.
template<typename _Tp> inline Mat::Mat(const vector<_Tp>& vec, bool copyData)
: flags(MAGIC_VAL | DataType<_Tp>::type | CV_MAT_CONT_FLAG),
dims(2), rows((int)vec.size()), cols(1), data(0), refcount(0),
datastart(0), dataend(0), allocator(0), size(&rows)
{
if(vec.empty())
return;
if( !copyData )
{
step[0] = step[1] = sizeof(_Tp);
data = datastart = (uchar*)&vec[0];
datalimit = dataend = datastart + rows*step[0];
}
else
Mat((int)vec.size(), 1, DataType<_Tp>::type, (uchar*)&vec[0]).copyTo(*this);
}
Step into the function in a debugger and see exactly which assertion is failing. ( Probably it requires values in double (CV_64F) rather than float. )
OpenCVs new "inputarray" wrapper issuppsoed to allow you to call functions with any shape of mat, vector of points, etc - and it will sort it out. But a lot of functions assume a particular inut format or have obsolete assertions enforcing a particular format.
The stereo/calibration systems are the worst for requiring a specific layout, and frequently succesive operations require a different layout.
The types don't seem right, at least in the code that worked for me I used different types(as mentioned in the documentation).
objectPoints – Array of object points in the object coordinate space, 3xN/Nx3 1-channel or 1xN/Nx1 3-channel, where N is the number of points. vector can be also passed here.
imagePoints – Array of corresponding image points, 2xN/Nx2 1-channel or 1xN/Nx1 2-channel, where N is the number of points.
vector can be also passed here.
cameraMatrix – Input camera matrix A = \vecthreethree{fx}{0}{cx}{0}{fy}{cy}{0}{0}{1} .
distCoeffs – Input
vector of distortion coefficients (k_1, k_2, p_1, p_2[, k_3[, k_4,
k_5, k_6]]) of 4, 5, or 8 elements. If the vector is NULL/empty, the
zero distortion coefficients are assumed.
rvec – Output rotation vector (see Rodrigues() ) that, together with tvec , brings points from the model coordinate system to the
camera coordinate system.
tvec – Output translation vector.
useExtrinsicGuess – If true (1), the function uses the provided rvec and tvec values as initial
approximations of the rotation and translation vectors, respectively,
and further optimizes them.
Documentation from here.
vector<Mat> rvec, tvec should be Mat rvec, tvec instead.
vector<vector<Point2f> > imagePoints(1) should be vector<Point2f> imagePoints(1) instead.
vector<vector<Point3f> > boardPoints(1) should be
vector<Point3f> boardPoints(1) instead.
Note: I encountered the exact same problem, and this worked for me(It is a little bit confusing since calibrateCamera use vectors). Haven't tried it for imagePoints or boardPoints though.(but as it is documented in the link above, vector,vector should work, I thought I'd better mention it), but for rvec,trec I tried it myself.
I run in exactly the same problem with solvePnP and opencv3. I tried to isolate the problem in a single test case. I seams passing a std::vector to cv::InputArray does not what is expected. The following small test works with opencv 2.4.9 but not with 3.2.
And this is exactly the problem when passing a std::vector of points to solvePnP and causes the assert at line 63 in solvepnp.cpp to fail !
Generating a cv::mat out of the vector list before passing to solvePnP works.
//create list with 3 points
std::vector<cv::Point3f> vectorList;
vectorList.push_back(cv::Point3f(1.0, 1.0, 1.0));
vectorList.push_back(cv::Point3f(1.0, 1.0, 1.0));
vectorList.push_back(cv::Point3f(1.0, 1.0, 1.0));
//to input array
cv::InputArray inputArray(vectorList);
cv::Mat mat = inputArray.getMat();
cv::Mat matDirect = cv::Mat(vectorList);
LOG_INFO("Size vector: %d mat: %d matDirect: %d", vectorList.size(), mat.checkVector(3, CV_32F), matDirect.checkVector(3, CV_32F));
QVERIFY(vectorList.size() == mat.checkVector(3, CV_32F));
Result opencv 2.4.9 macos:
TestObject: OpenCV
Size vector: 3 mat: 3 matDirect: 3
Result opencv 3.2 win64:
TestObject: OpenCV
Size vector: 3 mat: 9740 matDirect: 3
I faced the same issue. In my case, (in python) converted the input array type as float.
It worked fine afterwards.
I'm currently working on Image stitching using OpenCV 2.3.1 on Visual Studio 2010, but I'm having some trouble.
Problem Description
I'm trying to write a code for stitching multiple images derived from a few cameras(about 3~4), i,e, the code should keep executing image stitching until I ask it to stop.
The following is what I've done so far:
(For simplification, I'll replace some part of the code with just a few words)
1.Reading frames(images) from 2 cameras (Currently I'm just working on 2 cameras.)
2.Feature detection, descriptor calculation (SURF)
3.Feature matching using FlannBasedMatcher
4.Removing outliers and calculate the Homography with inliers using RANSAC.
5.Warp one of both images.
For step 5., I followed the answer in the following thread and just changed some parameters:
Stitching 2 images in opencv
However, the result is terrible though.
I just uploaded the result onto youtube and of course only those who have the link will be able to see it.
http://youtu.be/Oy5z_7LeaMk
My code is shown below:
(Only crucial parts are shown)
VideoCapture cam1, cam2;
cam1.open(0);
cam2.open(1);
while(1)
{
Mat frm1, frm2;
cam1 >> frm1;
cam2 >> frm2;
//(SURF detection, descriptor calculation
//and matching using FlannBasedMatcher)
double max_dist = 0; double min_dist = 100;
//-- Quick calculation of max and min distances between keypoints
for( int i = 0; i < descriptors_1.rows; i++ )
{
double dist = matches[i].distance;
if( dist < min_dist ) min_dist = dist;
if( dist > max_dist ) max_dist = dist;
}
(Draw only "good" matches
(i.e. whose distance is less than 3*min_dist ))
vector<Point2f> frame1;
vector<Point2f> frame2;
for( int i = 0; i < good_matches.size(); i++ )
{
//-- Get the keypoints from the good matches
frame1.push_back( keypoints_1[ good_matches[i].queryIdx ].pt );
frame2.push_back( keypoints_2[ good_matches[i].trainIdx ].pt );
}
Mat H = findHomography( Mat(frame1), Mat(frame2), CV_RANSAC );
cout << "Homography: " << H << endl;
/* warp the image */
Mat warpImage2;
warpPerspective(frm2, warpImage2,
H, Size(frm2.cols, frm2.rows), INTER_CUBIC);
Mat final(Size(frm2.cols*3 + frm1.cols, frm2.rows),CV_8UC3);
Mat roi1(final, Rect(frm1.cols, 0, frm1.cols, frm1.rows));
Mat roi2(final, Rect(2*frm1.cols, 0, frm2.cols, frm2.rows));
warpImage2.copyTo(roi2);
frm1.copyTo(roi1);
imshow("final", final);
What else should I do to make the stitching better?
Besides, is it reasonable to make the Homography matrix fixed instead of keeping computing it ?
What I mean is to specify the angle and the displacement between the 2 cameras by myself so as to derive a Homography matrix that satisfies what I want.
Thanks. :)
It sounds like you are going about this sensibly, but if you have access to both of the cameras, and they will remain stationary with respect to each other, then calibrating offline, and simply applying the transformation online will make your application more efficient.
One point to note is, you say you are using the findHomography function from OpenCV. From the documentation, this function:
Finds a perspective transformation between two planes.
However, your points are not restricted to a specific plane as they are imaging a 3D scene. If you wanted to calibrate offline, you could image a chessboard with both cameras, and the detected corners could be used in this function.
Alternatively, you may like to investigate the Fundamental matrix, which can be calculated with a similar function. This matrix describes the relative position of the cameras, but some work (and a good textbook) will be required to extract them.
If you can find it, I would strongly recommend having a look at Part II: "Two-View Geometry" in the book "Multiple View Geometry in computer vision", by Richard Hartley and Andrew Zisserman, which goes through the process in detail.
I have been working lately on image registration. My algorithm takes two images, calculates the SURF features, find correspondences, find homography matrix and then stitch both images together, I did it with the next code:
void stich(Mat base, Mat target,Mat homography, Mat& panorama){
Mat corners1(1, 4,CV_32F);
Mat corners2(1,4,CV_32F);
Mat corners(1,4,CV_32F);
vector<Mat> planes;
/* compute corners
of warped image
*/
corners1.at<float>(0,0)=0;
corners2.at<float>(0,0)=0;
corners1.at<float>(0,1)=0;
corners2.at<float>(0,1)=target.rows;
corners1.at<float>(0,2)=target.cols;
corners2.at<float>(0,2)=0;
corners1.at<float>(0,3)=target.cols;
corners2.at<float>(0,3)=target.rows;
planes.push_back(corners1);
planes.push_back(corners2);
merge(planes,corners);
perspectiveTransform(corners, corners, homography);
/* compute size of resulting
image and allocate memory
*/
double x_start = min( min( (double)corners.at<Vec2f>(0,0)[0], (double)corners.at<Vec2f> (0,1)[0]),0.0);
double x_end = max( max( (double)corners.at<Vec2f>(0,2)[0], (double)corners.at<Vec2f>(0,3)[0]), (double)base.cols);
double y_start = min( min( (double)corners.at<Vec2f>(0,0)[1], (double)corners.at<Vec2f>(0,2)[1]), 0.0);
double y_end = max( max( (double)corners.at<Vec2f>(0,1)[1], (double)corners.at<Vec2f>(0,3)[1]), (double)base.rows);
/*Creating image
with same channels, depth
as target
and proper size
*/
panorama.create(Size(x_end - x_start + 1, y_end - y_start + 1), target.depth());
planes.clear();
/*Planes should
have same n.channels
as target
*/
for (int i=0;i<target.channels();i++){
planes.push_back(panorama);
}
merge(planes,panorama);
// create translation matrix in order to copy both images to correct places
Mat T;
T=Mat::zeros(3,3,CV_64F);
T.at<double>(0,0)=1;
T.at<double>(1,1)=1;
T.at<double>(2,2)=1;
T.at<double>(0,2)=-x_start;
T.at<double>(1,2)=-y_start;
// copy base image to correct position within output image
warpPerspective(base, panorama, T,panorama.size(),INTER_LINEAR| CV_WARP_FILL_OUTLIERS);
// change homography to take necessary translation into account
gemm(T, homography,1,T,0,T);
// warp second image and copy it to output image
warpPerspective(target,panorama, T, panorama.size(),INTER_LINEAR);
//tidy
corners.release();
T.release();
}
Any question I will try