Augmented reality with Aruco and SceneKit - ios

I'm trying to make augmented reality demo project with simple 3d object in the center of marker. I need to make it with OpenCV and SceneKit.
My steps are:
I obtain the corners of marker using Aruco
with cv::solvePnP get the tvec and rvec.
convert the tvec and rvec from OpenCv's Coordinate System to the SceneKit Coordinate System.
apply the converted rotation and translation to the camera node.
The Problem is:
The object is not centered on the marker. Rotation of object looks good. But is not positioned where it should be.
SolvePnp code:
cv::Mat intrinMat(3,3,cv::DataType<double>::type);
//From ARKit (ARFrame camera.intrinsics) - iphone 6s plus
intrinMat.at<double>(0,0) = 1662.49;
intrinMat.at<double>(0,1) = 0.0;
intrinMat.at<double>(0,2) = 0.0;
intrinMat.at<double>(1,0) = 0.0;
intrinMat.at<double>(1,1) = 1662.49;
intrinMat.at<double>(1,2) = 0.0;
intrinMat.at<double>(2,0) = 960.0 / 2;
intrinMat.at<double>(2,1) = 540.0 / 2;
intrinMat.at<double>(2,2) = 0.0;
double marker_dim = 3;
cv::Ptr<cv::aruco::Dictionary> dictionary = cv::aruco::getPredefinedDictionary(cv::aruco::DICT_6X6_250);
CVPixelBufferLockBaseAddress(pixelBuffer, 0);
void *baseaddress = CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 0);
CGFloat width = CVPixelBufferGetWidth(pixelBuffer);
CGFloat height = CVPixelBufferGetHeight(pixelBuffer);
cv::Mat mat(height, width, CV_8UC1, baseaddress, 0); //CV_8UC1
cv::rotate(mat, mat, cv::ROTATE_90_CLOCKWISE);
std::vector<int> ids;
std::vector<std::vector<cv::Point2f>> corners;
cv::aruco::detectMarkers(mat,dictionary,corners,ids);
if(ids.size() > 0) {
cv::Mat colorMat;
cv::cvtColor(mat, colorMat, CV_GRAY2RGB);
cv::aruco::drawDetectedMarkers(colorMat, corners, ids, cv::Scalar(0,255,24));
cv::Mat distCoeffs = cv::Mat::zeros(8, 1, cv::DataType<double>::type); //zero out distortion for now
//MARK: solvepnp
std::vector<cv::Point3f> object_points;
object_points = {cv::Point3f(-marker_dim , marker_dim , 0),
cv::Point3f(marker_dim , marker_dim , 0),
cv::Point3f(marker_dim , -marker_dim , 0),
cv::Point3f(-marker_dim , -marker_dim , 0)};
std::vector<cv::Point_<float>> image_points = std::vector<cv::Point2f>{corners[0][0], corners[0][1], corners[0][2], corners[0][3]};
std::cout << "object points: " << object_points << std::endl;
std::cout << "image points: " << image_points << std::endl;
cv::Mat rvec, tvec;
cv::solvePnP(object_points, image_points, intrinMat, distCoeffs, rvec, tvec);
cv::aruco::drawAxis(colorMat, intrinMat, distCoeffs, rvec, tvec, 3);
cv::Mat rotation, transform_matrix;
cv::Mat RotX(3, 3, cv::DataType<double>::type);
cv::setIdentity(RotX);
RotX.at<double>(4) = -1; //cos(180) = -1
RotX.at<double>(8) = -1;
cv::Mat R;
cv::Rodrigues(rvec, R);
std::cout << "rvecs: " << rvec << std::endl;
std::cout << "cv::Rodrigues(rvecs, R);: " << R << std::endl;
R = R.t(); // rotation of inverse
std::cout << "R = R.t() : " << R << std::endl;
cv::Mat rvecConverted;
Rodrigues(R, rvecConverted); //
std::cout << "rvec in world coords:\n" << rvecConverted << std::endl;
rvecConverted = RotX * rvecConverted;
std::cout << "rvec scenekit :\n" << rvecConverted << std::endl;
Rodrigues(rvecConverted, rotation);
std::cout << "-R: " << -R << std::endl;
std::cout << "tvec: " << tvec << std::endl;
cv::Mat tvecConverted = -R * tvec;
std::cout << "tvec in world coords:\n" << tvecConverted << std::endl;
tvecConverted = RotX * tvecConverted;
std::cout << "tvec scenekit :\n" << tvecConverted << std::endl;
SCNVector4 rotationVector = SCNVector4Make(rvecConverted.at<double>(0), rvecConverted.at<double>(1), rvecConverted.at<double>(2), norm(rvecConverted));
SCNVector3 translationVector = SCNVector3Make(tvecConverted.at<double>(0), tvecConverted.at<double>(1), tvecConverted.at<double>(2));
std::cout << "rotation :\n" << rotation << std::endl;
transform_matrix.create(4, 4, CV_64FC1);
transform_matrix( cv::Range(0,3), cv::Range(0,3) ) = rotation * 1;
transform_matrix.at<double>(0, 3) = tvecConverted.at<double>(0,0);
transform_matrix.at<double>(1, 3) = tvecConverted.at<double>(1,0);
transform_matrix.at<double>(2, 3) = tvecConverted.at<double>(2,0);
transform_matrix.at<double>(3, 3) = 1;
TransformModel *model = [TransformModel new];
model.rotationVector = rotationVector;
model.translationVector = translationVector;
return model;
}
swift code:
func initSceneKit() {
let scene = SCNScene()
cameraNode = SCNNode()
let camera = SCNCamera()
camera.zFar = 1000
camera.zNear = 0.1
cameraNode.camera = camera
scene.rootNode.addChildNode(cameraNode)
let scnView = sceneView!
scnView.scene = scene
scnView.autoenablesDefaultLighting = true
scnView.backgroundColor = UIColor.clear
let box = SCNBox(width: 10, height: 10 , length: 10, chamferRadius: 0)
boxNode = SCNNode(geometry: box)
boxNode.position = SCNVector3(0,0,0)
scene.rootNode.addChildNode(boxNode)
sceneView.pointOfView = cameraNode
}
func initCamera() {
let device = AVCaptureDevice.default(AVCaptureDevice.DeviceType.builtInWideAngleCamera, for: .video, position: AVCaptureDevice.Position.back)
let deviceInput = try! AVCaptureDeviceInput(device: device!)
self.session = AVCaptureSession()
self.session.sessionPreset = AVCaptureSession.Preset.iFrame960x540
self.session.addInput(deviceInput)
let sessionOutput: AVCaptureVideoDataOutput = AVCaptureVideoDataOutput()
let outputQueue = DispatchQueue(label: "VideoDataOutputQueue", attributes: [])
sessionOutput.setSampleBufferDelegate(self, queue: outputQueue)
self.session.addOutput(sessionOutput)
self.previewLayer = AVCaptureVideoPreviewLayer(session: self.session)
self.previewLayer.backgroundColor = UIColor.black.cgColor
self.previewLayer.videoGravity = AVLayerVideoGravity.resizeAspect
self.previewView.layer.addSublayer(self.previewLayer)
self.session.startRunning()
view.setNeedsLayout()
}
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
let pixelBuffer: CVPixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer)!
//QR detection
guard let transQR = OpenCVWrapper.arucoTransformMatrix(from: pixelBuffer) else {
return
}
DispatchQueue.main.async(execute: {
self.setCameraMatrix(transQR)
})
}
func setCameraMatrix(_ transformModel: TransformModel) {
cameraNode.rotation = transformModel.rotationVector
cameraNode.position = transformModel.translationVector
// cameraNode.transform = transformModel.transform
}
image of my result
Repo on github with my project: https://github.com/danilovdorin/ArucoAugmentedReality

Related

camera pose estimation with solvePnP() and SOLVEPNP_IPPE_SQUARE method

I'm working with ARKit and trying to get camera position from QR code with known size (0.16m).
To detect QR code I'am using Vision framework so i can get each corner point on image.
Data preparation:
let intrinsics = arFrame.camera.intrinsics
let imageResolution = arFrame.camera.imageResolution
let imagePointsArray = [NSValue(cgPoint: visionResult.topLeft), NSValue(cgPoint: visionResult.topRight), NSValue(cgPoint: visionResult.bottomLeft), NSValue(cgPoint: visionResult.bottomRight)]
let intrinsicsArray = (0..<3).flatMap { x in (0..<3).map { y in NSNumber(value: intrinsics[x][y]) } }
let squareLength = NSNumber(value: 0.16)
let res = OpenCVWrapper.findPose(imagePointsArray, intrinsics: intrinsicsArray, size: imageResolution, squareLength: squareLength)
To get camera position I'm using OpenCV solution solvePnP() with flag = SOLVEPNP_IPPE_SQUARE
OpenCV in Objective-C++ based on this answer:
+(Pose)findPose: (NSArray<NSValue *> *) imagePoints
intrinsics: (NSArray<NSNumber *> *) intrinsics
imageResolution: (CGSize) imageResolution
squareLength: (NSNumber *) squareLength {
cv::Mat distCoeffs(4,1,cv::DataType<double>::type, 0.0);
cv::Mat rvec(3,1,cv::DataType<double>::type);
cv::Mat tvec(3,1,cv::DataType<double>::type);
cv::Mat cameraMatrix = [self intrinsicMatrixWithArray:intrinsics];
vector<Point2f> cvImagePoints = [self convertImagePoints:imagePoints toSize: imageResolution];
vector<Point3f> cvObjectPoints = [self getObjectPointsWithSquareLength:squareLength];
std::cout << "object points: \n" << cvObjectPoints << std::endl;
std::cout << "image points: \n" << cvImagePoints << std::endl;
std::cout << "cameraMatrix points: \n" << cameraMatrix << std::endl;
cv::solvePnP(cvObjectPoints, cvImagePoints, cameraMatrix, distCoeffs, rvec, tvec, false, SOLVEPNP_IPPE_SQUARE);
std::cout << "rvec: \n" << rvec << std::endl;
std::cout << "tvec: \n" << tvec << std::endl;
cv::Mat RotX(3, 3, cv::DataType<double>::type);
cv::setIdentity(RotX);
RotX.at<double>(4) = -1; //cos(180) = -1
RotX.at<double>(8) = -1;
cv::Mat R;
cv::Rodrigues(rvec, R);
R = R.t(); // rotation of inverse
Mat rvecConverted;
Rodrigues(R, rvecConverted); //
std::cout << "rvec in world coords:\n" << rvecConverted << std::endl;
rvecConverted = RotX * rvecConverted;
std::cout << "rvec scenekit :\n" << rvecConverted << std::endl;
Mat tvecConverted = -R * tvec;
std::cout << "tvec in world coords:\n" << tvecConverted << std::endl;
tvecConverted = RotX * tvecConverted;
std::cout << "tvec scenekit :\n" << tvecConverted << std::endl;
SCNVector4 rotationVector = SCNVector4Make(rvecConverted.at<double>(0), rvecConverted.at<double>(1), rvecConverted.at<double>(2), norm(rvecConverted));
SCNVector3 translationVector = SCNVector3Make(tvecConverted.at<double>(0), tvecConverted.at<double>(1), tvecConverted.at<double>(2));
return Pose{rotationVector, translationVector};
}
+ (vector<Point3f>) getObjectPointsWithSquareLength: (NSNumber*) squareLength {
vector<Point3f> points;
double squareLengthDouble = [squareLength doubleValue];
points.push_back(Point3f(-squareLengthDouble/2, squareLengthDouble/2, 0));
points.push_back(Point3f(squareLengthDouble/2, squareLengthDouble/2, 0));
points.push_back(Point3f(squareLengthDouble/2, -squareLengthDouble/2, 0));
points.push_back(Point3f(-squareLengthDouble/2, -squareLengthDouble/2, 0));
return points;
}
+ (vector<Point2f>) convertImagePoints: (NSArray<NSValue *> *) array
toSize: (CGSize) size {
vector<Point2f> points;
for (NSValue * value in array) {
CGPoint point = [value CGPointValue];
points.push_back(Point2f((point.x * size.width), (point.y * size.height)));
}
return points;
}
+ (cv::Mat) intrinsicMatrixWithArray: (NSArray<NSNumber *> *) intrinsics {
Mat result(3,3,cv::DataType<double>::type);
cv::setIdentity(result);
result.at<double>(0) = [intrinsics[0] doubleValue]; //fx
result.at<double>(4) = [intrinsics[4] doubleValue]; //fy
result.at<double>(2) = [intrinsics[6] doubleValue]; //cx
result.at<double>(5) = [intrinsics[7] doubleValue]; //cy
result.at<double>(8) = [intrinsics[8] doubleValue]; //1
return result;
}
The problem is when I point camera directly to QR code with 2 meters distance, the result of translationVector.z (tvec scenekit) should be equal to 2 meters, but instead there is a random positive or negative numbers.
Output:
Calculated distance to QR 2.0856588
object points:
[-0.079999998, 0.079999998, 0;
0.079999998, 0.079999998, 0;
0.079999998, -0.079999998, 0;
-0.079999998, -0.079999998, 0]
image points:
[795.98724, 717.27045;
684.5592, 715.80487;
793.31567, 826.06146;
684.40692, 824.39771]
cameraMatrix points:
[1454.490478515625, 0, 935.6685791015625;
0, 1454.490478515625, 717.999267578125;
0, 0, 1]
rvec:
[-0.9251278749049585;
1.185890362907954;
-0.9989977018022447]
tvec:
[0.04753833193572054;
-0.009999648596310796;
-0.3527916723601041]
rvec in world coords:
[0.9251278749049584;
-1.185890362907954;
0.9989977018022447]
rvec scenekit :
[0.9251278749049584;
1.185890362907954;
-0.9989977018022447]
tvec in world coords:
[-0.1159248829391864;
-0.3366933247327607;
0.004569098144615695]
tvec scenekit :
[-0.1159248829391864;
0.3366933247327607;
-0.004569098144615695]
Thanks for any help
The estimated translation between the camera and the tag is not correct. The tz is negative which is physically not possible. See here for the details about the camera coordinates system.
You have to be sure that each 3D object point matches with the corresponding 2D image point.
If I plot the 2D coordinates, I have the following image:
With RGBM the order of the points.
If you swap the last two image points, you should get:
rvec: [0.1217246105180353, 0.1224686744740433, -3.116495036698598]
tvec: [-0.2866576939480562, 0.07760414675470864, 2.127895748451679]

OpenCV: Wrong result in calibrateHandEye function

I am working in a robot application, in which I have a camera fixed to a robot gripper. In order to calculate the matrix transformation between the camera and the gripper Hcg I am using the calibrateHandEye new function provided in the OpenCV version 4.1.0
I had taken 10 pictures of the chessboard from the camera mounted in the gripper and at the same time I recorded the robot position.
The code I am working on:
// My_handeye.cpp : This file contains the 'main' function. Program execution begins and ends there.
//
#include <iostream>
#include <sstream>
#include <string>
#include <ctime>
#include <cstdio>
#include "pch.h"
#include <opencv2/opencv.hpp>
#include <opencv2/core.hpp>
#include <opencv2/core/utility.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/calib3d.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/videoio.hpp>
#include <opencv2/highgui.hpp>
using namespace cv;
using namespace std;
Mat eulerAnglesToRotationMatrix(Vec3f &theta);
Vec3f rotationMatrixToEulerAngles(Mat &R);
float rad2deg(float radian);
float deg2rad(float degree);
int main()
{
// Camera calibration information
std::vector<double> distortionCoefficients(5); // camera distortion
distortionCoefficients[0] = 2.4472856611074989e-01;
distortionCoefficients[1] = -8.1042032574246325e-01;
distortionCoefficients[2] = 0;
distortionCoefficients[3] = 0;
distortionCoefficients[4] = 7.8769462320821060e-01;
double f_x = 1.3624172121852105e+03; // Focal length in x axis
double f_y = 1.3624172121852105e+03; // Focal length in y axis (usually the same?)
double c_x = 960; // Camera primary point x
double c_y = 540; // Camera primary point y
cv::Mat cameraMatrix(3, 3, CV_32FC1);
cameraMatrix.at<float>(0, 0) = f_x;
cameraMatrix.at<float>(0, 1) = 0.0;
cameraMatrix.at<float>(0, 2) = c_x;
cameraMatrix.at<float>(1, 0) = 0.0;
cameraMatrix.at<float>(1, 1) = f_y;
cameraMatrix.at<float>(1, 2) = c_y;
cameraMatrix.at<float>(2, 0) = 0.0;
cameraMatrix.at<float>(2, 1) = 0.0;
cameraMatrix.at<float>(2, 2) = 1.0;
Mat rvec(3, 1, CV_32F), tvec(3, 1, CV_32F);
//
std::vector<Mat> R_gripper2base;
std::vector<Mat> t_gripper2base;
std::vector<Mat> R_target2cam;
std::vector<Mat> t_target2cam;
Mat R_cam2gripper = (Mat_<float>(3, 3));
Mat t_cam2gripper = (Mat_<float>(3, 1));
vector<String> fn;
glob("images/*.bmp", fn, false);
vector<Mat> images;
size_t num_images = fn.size(); //number of bmp files in images folder
Size patternsize(6, 8); //number of centers
vector<Point2f> centers; //this will be filled by the detected centers
float cell_size = 30;
vector<Point3f> obj_points;
R_gripper2base.reserve(num_images);
t_gripper2base.reserve(num_images);
R_target2cam.reserve(num_images);
t_target2cam.reserve(num_images);
for (int i = 0; i < patternsize.height; ++i)
for (int j = 0; j < patternsize.width; ++j)
obj_points.push_back(Point3f(float(j*cell_size),
float(i*cell_size), 0.f));
for (size_t i = 0; i < num_images; i++)
images.push_back(imread(fn[i]));
Mat frame;
for (size_t i = 0; i < num_images; i++)
{
frame = imread(fn[i]); //source image
bool patternfound = findChessboardCorners(frame, patternsize, centers);
if (patternfound)
{
drawChessboardCorners(frame, patternsize, Mat(centers), patternfound);
//imshow("window", frame);
//int key = cv::waitKey(0) & 0xff;
solvePnP(Mat(obj_points), Mat(centers), cameraMatrix, distortionCoefficients, rvec, tvec);
Mat R;
Rodrigues(rvec, R); // R is 3x3
R_target2cam.push_back(R);
t_target2cam.push_back(tvec);
Mat T = Mat::eye(4, 4, R.type()); // T is 4x4
T(Range(0, 3), Range(0, 3)) = R * 1; // copies R into T
T(Range(0, 3), Range(3, 4)) = tvec * 1; // copies tvec into T
cout << "T = " << endl << " " << T << endl << endl;
}
cout << patternfound << endl;
}
Vec3f theta_01{ deg2rad(-153.61), deg2rad(8.3), deg2rad(-91.91) };
Vec3f theta_02{ deg2rad(-166.71), deg2rad(3.04), deg2rad(-93.31) };
Vec3f theta_03{ deg2rad(-170.04), deg2rad(24.92), deg2rad(-88.29) };
Vec3f theta_04{ deg2rad(-165.71), deg2rad(24.68), deg2rad(-84.85) };
Vec3f theta_05{ deg2rad(-160.18), deg2rad(-15.94),deg2rad(-56.24) };
Vec3f theta_06{ deg2rad(175.68), deg2rad(10.95), deg2rad(180) };
Vec3f theta_07{ deg2rad(175.73), deg2rad(45.78), deg2rad(-179.92) };
Vec3f theta_08{ deg2rad(-165.34), deg2rad(47.37), deg2rad(-166.25) };
Vec3f theta_09{ deg2rad(-165.62), deg2rad(17.95), deg2rad(-166.17) };
Vec3f theta_10{ deg2rad(-151.99), deg2rad(-14.59),deg2rad(-94.19) };
Mat robot_rot_01 = eulerAnglesToRotationMatrix(theta_01);
Mat robot_rot_02 = eulerAnglesToRotationMatrix(theta_02);
Mat robot_rot_03 = eulerAnglesToRotationMatrix(theta_03);
Mat robot_rot_04 = eulerAnglesToRotationMatrix(theta_04);
Mat robot_rot_05 = eulerAnglesToRotationMatrix(theta_05);
Mat robot_rot_06 = eulerAnglesToRotationMatrix(theta_06);
Mat robot_rot_07 = eulerAnglesToRotationMatrix(theta_07);
Mat robot_rot_08 = eulerAnglesToRotationMatrix(theta_08);
Mat robot_rot_09 = eulerAnglesToRotationMatrix(theta_09);
Mat robot_rot_10 = eulerAnglesToRotationMatrix(theta_10);
const Mat robot_tr_01 = (Mat_<float>(3, 1) << 781.2, 338.59, 903.48);
const Mat robot_tr_02 = (Mat_<float>(3, 1) << 867.65, 382.52, 884.42);
const Mat robot_tr_03 = (Mat_<float>(3, 1) << 856.91, 172.99, 964.61);
const Mat robot_tr_04 = (Mat_<float>(3, 1) << 748.81, 146.75, 1043.29);
const Mat robot_tr_05 = (Mat_<float>(3, 1) << 627.66, 554.08, 920.85);
const Mat robot_tr_06 = (Mat_<float>(3, 1) << 715.06, 195.96, 889.38);
const Mat robot_tr_07 = (Mat_<float>(3, 1) << 790.9, 196.29, 1117.38);
const Mat robot_tr_08 = (Mat_<float>(3, 1) << 743.5, 283.93, 1131.92);
const Mat robot_tr_09 = (Mat_<float>(3, 1) << 748.9, 288.19, 910.58);
const Mat robot_tr_10 = (Mat_<float>(3, 1) << 813.18, 400.44, 917.16);
R_gripper2base.push_back(robot_rot_01);
R_gripper2base.push_back(robot_rot_02);
R_gripper2base.push_back(robot_rot_03);
R_gripper2base.push_back(robot_rot_04);
R_gripper2base.push_back(robot_rot_05);
R_gripper2base.push_back(robot_rot_06);
R_gripper2base.push_back(robot_rot_07);
R_gripper2base.push_back(robot_rot_08);
R_gripper2base.push_back(robot_rot_09);
R_gripper2base.push_back(robot_rot_10);
t_gripper2base.push_back(robot_tr_01);
t_gripper2base.push_back(robot_tr_02);
t_gripper2base.push_back(robot_tr_03);
t_gripper2base.push_back(robot_tr_04);
t_gripper2base.push_back(robot_tr_05);
t_gripper2base.push_back(robot_tr_06);
t_gripper2base.push_back(robot_tr_07);
t_gripper2base.push_back(robot_tr_08);
t_gripper2base.push_back(robot_tr_09);
t_gripper2base.push_back(robot_tr_10);
calibrateHandEye(R_gripper2base, t_gripper2base, R_target2cam, t_target2cam, R_cam2gripper, t_cam2gripper, CALIB_HAND_EYE_TSAI);
Vec3f R_cam2gripper_r = rotationMatrixToEulerAngles(R_cam2gripper);
cout << "R_cam2gripper = " << endl << " " << R_cam2gripper << endl << endl;
cout << "R_cam2gripper_r = " << endl << " " << R_cam2gripper_r << endl << endl;
cout << "t_cam2gripper = " << endl << " " << t_cam2gripper << endl << endl;
}
Mat eulerAnglesToRotationMatrix(Vec3f &theta)
{
// Calculate rotation about x axis
Mat R_x = (Mat_<double>(3, 3) <<
1, 0, 0,
0, cos(theta[0]), -sin(theta[0]),
0, sin(theta[0]), cos(theta[0])
);
// Calculate rotation about y axis
Mat R_y = (Mat_<double>(3, 3) <<
cos(theta[1]), 0, sin(theta[1]),
0, 1, 0,
-sin(theta[1]), 0, cos(theta[1])
);
// Calculate rotation about z axis
Mat R_z = (Mat_<double>(3, 3) <<
cos(theta[2]), -sin(theta[2]), 0,
sin(theta[2]), cos(theta[2]), 0,
0, 0, 1);
// Combined rotation matrix
Mat R = R_z * R_y * R_x;
return R;
}
float rad2deg(float radian) {
double pi = 3.14159;
return(radian * (180 / pi));
}
float deg2rad(float degree) {
double pi = 3.14159;
return(degree * (pi / 180));
}
// Checks if a matrix is a valid rotation matrix.
bool isRotationMatrix(Mat &R)
{
Mat Rt;
transpose(R, Rt);
Mat shouldBeIdentity = Rt * R;
Mat I = Mat::eye(3, 3, shouldBeIdentity.type());
return norm(I, shouldBeIdentity) < 1e-6;
}
// Calculates rotation matrix to euler angles
// The result is the same as MATLAB except the order
// of the euler angles ( x and z are swapped ).
Vec3f rotationMatrixToEulerAngles(Mat &R)
{
assert(isRotationMatrix(R));
float sy = sqrt(R.at<double>(0, 0) * R.at<double>(0, 0) + R.at<double>(1, 0) * R.at<double>(1, 0));
bool singular = sy < 1e-6; // If
float x, y, z;
if (!singular)
{
x = atan2(R.at<double>(2, 1), R.at<double>(2, 2));
y = atan2(-R.at<double>(2, 0), sy);
z = atan2(R.at<double>(1, 0), R.at<double>(0, 0));
}
else
{
x = atan2(-R.at<double>(1, 2), R.at<double>(1, 1));
y = atan2(-R.at<double>(2, 0), sy);
z = 0;
}
return Vec3f(x, y, z);
}
The result the function is giving me is the next one:
R_cam2gripper =
[0.3099803593003124, -0.8923086952824562, -0.3281727733547833;
0.7129271761196039, 0.4465219155360299, -0.5406967916458927;
0.6290047840821058, -0.0663579028402444, 0.7745641421680119]
R_cam2gripper_r =
[-0.0854626, -0.680272, 1.16065]
t_cam2gripper =
[-35.02063730299775;
-74.80633768251272;
-307.6725851251873]
I am getting 'good' results provided by other software. With them, the robot got to the exact points I am pointing in the camera (I have a 3D camera, from which I am getting the x, y, z from the camera world) so they are certainly correct, but I am having troubles to repeat the same result with the OpenCV function.
Sorry for the long introduction to my problem. Any understanding of why the solutions are not what is supposed to be? My guess is, that I have a problem understanding the angles or converting them but I couldn't find any way to solve this. Any hint will be well welcome!
I actually managed to solve this problem. The general idea was correct, but:
I was not understanding correctly the vector rotation notation the robot was giving. It was necessary to multiply the actual values by a factor.
I created a new program that extracts directly from the robot and the pictures the matrixes that the algorithm requires and writes these values to a YML file.
The CALIB_HAND_EYE_TSAI method wasn't giving me correct values. But with the four others, the values seem to converge to the actual values
Anyway, thank you for your help. I am stuck to get more precision in the algorithm, but that's for another question.

Car detection using HOG features and cvsvm

I am doing a project for which I need to detect the rear of a car using HOG features. Once I calculated the HOG features I trained the cvsvm using positive and negative samples. cvsvm is correctly classifying the new data. Here is my code that I used to train cvsvm.
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/ml/ml.hpp>
#include "opencv2/opencv.hpp"
#include "LinearSVM.h"
using namespace cv;
using namespace std;
int main(void)
{
LinearSVM *s = new LinearSVM;
vector<float> values, values1, values2, values3, values4;
FileStorage fs2("/home/ubuntu/Desktop/opencv-svm/vecSupport.yml", FileStorage::READ);
FileStorage fs3("/home/ubuntu/Desktop/opencv-svm/vecSupport1.yml", FileStorage::READ);
FileStorage fs4("/home/ubuntu/Desktop/opencv-svm/vecSupport2.yml", FileStorage::READ);
FileStorage fs5("/home/ubuntu/Desktop/opencv-svm/vecSupport3.yml", FileStorage::READ);
FileStorage fs6("/home/ubuntu/Desktop/opencv-svm/vecSupport4.yml", FileStorage::READ);
fs2["vector"]>>values;
fs3["vector"]>>values1;
fs4["vector"]>>values2;
fs5["vector"]>>values3;
fs6["vector"]>>values4;
//fill with data
values.insert(values.end(), values1.begin(), values1.end());
values.insert(values.end(), values2.begin(), values2.end());
fs2.release();
fs3.release();
fs4.release();
float arr[188496];
float car[2772];
float noncar[2772];
// move positive and negative to arr
std::copy(values.begin(), values.end(), arr);
std::copy(values3.begin(), values3.end(), car);
std::copy(values4.begin(), values4.end(), noncar);
float labels[68];
for (unsigned int s = 0; s < 68; s++)
{
if (s<34)
labels[s] = +1;
else
labels[s] = -1;
}
Mat labelsMat(68, 1, CV_32FC1, labels);
Mat trainingDataMat(68,2772, CV_32FC1, arr);
// Set up SVM's parameters
CvSVMParams params;
params.svm_type = CvSVM::C_SVC;
params.kernel_type = CvSVM::LINEAR;
params.term_crit = cvTermCriteria(CV_TERMCRIT_ITER, 100, 1e-6);
// Train the SVM
LinearSVM SVM;
SVM.train(trainingDataMat, labelsMat, Mat(), Mat(), params);
Mat matinput(1,2772,CV_32FC1,noncar);
//cout<<matinput;
float response = SVM.predict(matinput);
cout<<"Response : "<<response<<endl;
SVM.save("Classifier.xml");
vector<float>primal;
// LinearSVM s;
//s.getSupportVector(primal);
SVM.getSupportVector(primal);
FileStorage fs("/home/ubuntu/Desktop/opencv-svm/test.yml", FileStorage::WRITE);
fs << "dector" << primal;
fs.release();
}
// LinearSVM cpp file
#include "LinearSVM.h"
void LinearSVM::getSupportVector(std::vector<float>& support_vector) const {
int sv_count = get_support_vector_count();
const CvSVMDecisionFunc* df = decision_func;
const double* alphas = df[0].alpha;
double rho = df[0].rho;
int var_count = get_var_count();
support_vector.resize(var_count, 0);
for (unsigned int r = 0; r < (unsigned)sv_count; r++) {
float myalpha = alphas[r];
const float* v = get_support_vector(r);
for (int j = 0; j < var_count; j++,v++) {
support_vector[j] += (-myalpha) * (*v);
}
}
support_vector.push_back(rho);
}
// LinearSVM head file
#ifndef LINEAR_SVM_H_
#define LINEAR_SVM_H_
#include <opencv2/core/core.hpp>
#include <opencv2/ml/ml.hpp>
class LinearSVM: public CvSVM {
public:
void getSupportVector(std::vector<float>& support_vector) const;
};
#endif /* LINEAR_SVM_H_ */
After this step I got the vector file that I can fed into setsvmdetector method. Here is my code. I have used window size of 96 x 64 and scale of 1.11
#include <iostream>
#include <fstream>
#include <string>
#include <time.h>
#include <iostream>
#include <sstream>
#include <iomanip>
#include <stdexcept>
#include <stdexcept>
#include "opencv2/gpu/gpu.hpp"
#include "opencv2/highgui/highgui.hpp"
using namespace std;
using namespace cv;
bool help_showed = false;
class Args
{
public:
Args();
static Args read(int argc, char** argv);
string src;
bool src_is_video;
bool src_is_camera;
int camera_id;
bool write_video;
string dst_video;
double dst_video_fps;
bool make_gray;
bool resize_src;
int width, height;
double scale;
int nlevels;
int gr_threshold;
double hit_threshold;
bool hit_threshold_auto;
int win_width;
int win_stride_width, win_stride_height;
bool gamma_corr;
};
class App
{
public:
App(const Args& s);
void run();
void handleKey(char key);
void hogWorkBegin();
void hogWorkEnd();
string hogWorkFps() const;
void workBegin();
void workEnd();
string workFps() const;
string message() const;
private:
App operator=(App&);
Args args;
bool running;
bool use_gpu;
bool make_gray;
double scale;
int gr_threshold;
int nlevels;
double hit_threshold;
bool gamma_corr;
int64 hog_work_begin;
double hog_work_fps;
int64 work_begin;
double work_fps;
};
static void printHelp()
{
cout << "Histogram of Oriented Gradients descriptor and detector sample.\n"
<< "\nUsage: hog_gpu\n"
<< " (<image>|--video <vide>|--camera <camera_id>) # frames source\n"
<< " [--make_gray <true/false>] # convert image to gray one or not\n"
<< " [--resize_src <true/false>] # do resize of the source image or not\n"
<< " [--width <int>] # resized image width\n"
<< " [--height <int>] # resized image height\n"
<< " [--hit_threshold <double>] # classifying plane distance threshold (0.0 usually)\n"
<< " [--scale <double>] # HOG window scale factor\n"
<< " [--nlevels <int>] # max number of HOG window scales\n"
<< " [--win_width <int>] # width of the window (48 or 64)\n"
<< " [--win_stride_width <int>] # distance by OX axis between neighbour wins\n"
<< " [--win_stride_height <int>] # distance by OY axis between neighbour wins\n"
<< " [--gr_threshold <int>] # merging similar rects constant\n"
<< " [--gamma_correct <int>] # do gamma correction or not\n"
<< " [--write_video <bool>] # write video or not\n"
<< " [--dst_video <path>] # output video path\n"
<< " [--dst_video_fps <double>] # output video fps\n";
help_showed = true;
}
int main(int argc, char** argv)
{
try
{
if (argc < 2)
printHelp();
Args args = Args::read(argc, argv);
if (help_showed)
return -1;
App app(args);
app.run();
}
catch (const Exception& e) { return cout << "error: " << e.what() << endl, 1; }
catch (const exception& e) { return cout << "error: " << e.what() << endl, 1; }
catch(...) { return cout << "unknown exception" << endl, 1; }
return 0;
}
Args::Args()
{
src_is_video = false;
src_is_camera = false;
camera_id = 0;
write_video = false;
dst_video_fps = 24.;
make_gray = false;
resize_src = false;
width = 640;
height = 480;
scale = 1.11;
nlevels = 13;
gr_threshold = 1;
hit_threshold = 1.4;
hit_threshold_auto = true;
win_width = 64;
win_stride_width = 8;
win_stride_height = 8;
gamma_corr = true;
}
Args Args::read(int argc, char** argv)
{
Args args;
for (int i = 1; i < argc; i++)
{
if (string(argv[i]) == "--make_gray") args.make_gray = (string(argv[++i]) == "true");
else if (string(argv[i]) == "--resize_src") args.resize_src = (string(argv[++i]) == "true");
else if (string(argv[i]) == "--width") args.width = atoi(argv[++i]);
else if (string(argv[i]) == "--height") args.height = atoi(argv[++i]);
else if (string(argv[i]) == "--hit_threshold")
{
args.hit_threshold = atof(argv[++i]);
args.hit_threshold_auto = false;
}
else if (string(argv[i]) == "--scale") args.scale = atof(argv[++i]);
else if (string(argv[i]) == "--nlevels") args.nlevels = atoi(argv[++i]);
else if (string(argv[i]) == "--win_width") args.win_width = atoi(argv[++i]);
else if (string(argv[i]) == "--win_stride_width") args.win_stride_width = atoi(argv[++i]);
else if (string(argv[i]) == "--win_stride_height") args.win_stride_height = atoi(argv[++i]);
else if (string(argv[i]) == "--gr_threshold") args.gr_threshold = atoi(argv[++i]);
else if (string(argv[i]) == "--gamma_correct") args.gamma_corr = (string(argv[++i]) == "true");
else if (string(argv[i]) == "--write_video") args.write_video = (string(argv[++i]) == "true");
else if (string(argv[i]) == "--dst_video") args.dst_video = argv[++i];
else if (string(argv[i]) == "--dst_video_fps") args.dst_video_fps = atof(argv[++i]);
else if (string(argv[i]) == "--help") printHelp();
else if (string(argv[i]) == "--video") { args.src = argv[++i]; args.src_is_video = true; }
else if (string(argv[i]) == "--camera") { args.camera_id = atoi(argv[++i]); args.src_is_camera = true; }
else if (args.src.empty()) args.src = argv[i];
else throw runtime_error((string("unknown key: ") + argv[i]));
}
return args;
}
App::App(const Args& s)
{
cv::gpu::printShortCudaDeviceInfo(cv::gpu::getDevice());
args = s;
cout << "\nControls:\n"
<< "\tESC - exit\n"
<< "\tm - change mode GPU <-> CPU\n"
<< "\tg - convert image to gray or not\n"
<< "\t1/q - increase/decrease HOG scale\n"
<< "\t2/w - increase/decrease levels count\n"
<< "\t3/e - increase/decrease HOG group threshold\n"
<< "\t4/r - increase/decrease hit threshold\n"
<< endl;
use_gpu = true;
make_gray = args.make_gray;
scale = args.scale;
gr_threshold = args.gr_threshold;
nlevels = args.nlevels;
if (args.hit_threshold_auto)
args.hit_threshold = args.win_width == 48 ? 1.4 : 0.;
hit_threshold = args.hit_threshold;
gamma_corr = args.gamma_corr;
/*
if (args.win_width != 64 && args.win_width != 48)
args.win_width = 64;*/
cout << "Scale: " << scale << endl;
if (args.resize_src)
cout << "Resized source: (" << args.width << ", " << args.height << ")\n";
cout << "Group threshold: " << gr_threshold << endl;
cout << "Levels number: " << nlevels << endl;
cout << "Win width: " << args.win_width << endl;
cout << "Win stride: (" << args.win_stride_width << ", " << args.win_stride_height << ")\n";
cout << "Hit threshold: " << hit_threshold << endl;
cout << "Gamma correction: " << gamma_corr << endl;
cout << endl;
}
void App::run()
{
FileStorage fs("/home/ubuntu/Desktop/implemenatation/vecSupport.yml", FileStorage::READ);
vector<float> detector;
int frameCount;
fs["vector"] >> detector;
for (unsigned int i=0; i<detector.size(); i++)
{
std::cout << std::fixed << std::setprecision(10) << detector[i] << std::endl;
}
fs.release();
running = true;
cv::VideoWriter video_writer;
Size win_size(96,64); //(64, 128) or (48, 96)
Size win_stride(args.win_stride_width, args.win_stride_height);
// Create HOG descriptors and detectors here
/*
vector<float> detector;
if (win_size == Size(64, 128))
detector = cv::gpu::HOGDescriptor::getPeopleDetector64x128();
else
detector = cv::gpu::HOGDescriptor::getPeopleDetector48x96();*/
cv::gpu::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9,
cv::gpu::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr,
cv::gpu::HOGDescriptor::DEFAULT_NLEVELS);
cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1,
HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS);
gpu_hog.setSVMDetector(detector);
cpu_hog.setSVMDetector(detector);
while (running)
{
VideoCapture vc;
Mat frame;
if (args.src_is_video)
{
vc.open(args.src.c_str());
if (!vc.isOpened())
throw runtime_error(string("can't open video file: " + args.src));
vc >> frame;
}
else if (args.src_is_camera)
{
vc.open(args.camera_id);
if (!vc.isOpened())
{
stringstream msg;
msg << "can't open camera: " << args.camera_id;
throw runtime_error(msg.str());
}
vc >> frame;
}
else
{
frame = imread(args.src);
if (frame.empty())
throw runtime_error(string("can't open image file: " + args.src));
}
Mat img_aux, img, img_to_show;
gpu::GpuMat gpu_img;
// Iterate over all frames
while (running && !frame.empty())
{
workBegin();
// Change format of the image
if (make_gray) cvtColor(frame, img_aux, CV_BGR2GRAY);
else if (use_gpu) cvtColor(frame, img_aux, CV_BGR2BGRA);
else frame.copyTo(img_aux);
// Resize image
if (args.resize_src) resize(img_aux, img, Size(args.width, args.height));
else img = img_aux;
img_to_show = img;
gpu_hog.nlevels = nlevels;
cpu_hog.nlevels = nlevels;
vector<Rect> found;
// Perform HOG classification
hogWorkBegin();
if (use_gpu)
{
gpu_img.upload(img);
gpu_hog.detectMultiScale(gpu_img, found, hit_threshold, win_stride,
Size(0, 0), scale, gr_threshold);
}
else cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride,
Size(0, 0), scale, gr_threshold);
hogWorkEnd();
// Draw positive classified windows
for (size_t i = 0; i < found.size(); i++)
{
Rect r = found[i];
rectangle(img_to_show, r.tl(), r.br(), CV_RGB(0, 255, 0), 3);
}
if (use_gpu)
putText(img_to_show, "Mode: GPU", Point(5, 25), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
else
putText(img_to_show, "Mode: CPU", Point(5, 25), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
putText(img_to_show, "FPS (HOG only): " + hogWorkFps(), Point(5, 65), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
putText(img_to_show, "FPS (total): " + workFps(), Point(5, 105), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
imshow("opencv_gpu_hog", img_to_show);
if (args.src_is_video || args.src_is_camera) vc >> frame;
workEnd();
if (args.write_video)
{
if (!video_writer.isOpened())
{
video_writer.open(args.dst_video, CV_FOURCC('x','v','i','d'), args.dst_video_fps,
img_to_show.size(), true);
if (!video_writer.isOpened())
throw std::runtime_error("can't create video writer");
}
if (make_gray) cvtColor(img_to_show, img, CV_GRAY2BGR);
else cvtColor(img_to_show, img, CV_BGRA2BGR);
video_writer << img;
}
handleKey((char)waitKey(3));
}
}
}
void App::handleKey(char key)
{
switch (key)
{
case 27:
running = false;
break;
case 'm':
case 'M':
use_gpu = !use_gpu;
cout << "Switched to " << (use_gpu ? "CUDA" : "CPU") << " mode\n";
break;
case 'g':
case 'G':
make_gray = !make_gray;
cout << "Convert image to gray: " << (make_gray ? "YES" : "NO") << endl;
break;
case '1':
scale *= 1.11;
cout << "Scale: " << scale << endl;
break;
case 'q':
case 'Q':
scale /= 1.11;
cout << "Scale: " << scale << endl;
break;
case '2':
nlevels++;
cout << "Levels number: " << nlevels << endl;
break;
case 'w':
case 'W':
nlevels = max(nlevels - 1, 1);
cout << "Levels number: " << nlevels << endl;
break;
case '3':
gr_threshold++;
cout << "Group threshold: " << gr_threshold << endl;
break;
case 'e':
case 'E':
gr_threshold = max(0, gr_threshold - 1);
cout << "Group threshold: " << gr_threshold << endl;
break;
case '4':
hit_threshold+=0.25;
cout << "Hit threshold: " << hit_threshold << endl;
break;
case 'r':
case 'R':
hit_threshold = max(0.0, hit_threshold - 0.25);
cout << "Hit threshold: " << hit_threshold << endl;
break;
case 'c':
case 'C':
gamma_corr = !gamma_corr;
cout << "Gamma correction: " << gamma_corr << endl;
break;
}
}
inline void App::hogWorkBegin() { hog_work_begin = getTickCount(); }
inline void App::hogWorkEnd()
{
int64 delta = getTickCount() - hog_work_begin;
double freq = getTickFrequency();
hog_work_fps = freq / delta;
}
inline string App::hogWorkFps() const
{
stringstream ss;
ss << hog_work_fps;
return ss.str();
}
inline void App::workBegin() { work_begin = getTickCount(); }
inline void App::workEnd()
{
int64 delta = getTickCount() - work_begin;
double freq = getTickFrequency();
work_fps = freq / delta;
}
inline string App::workFps() const
{
stringstream ss;
ss << work_fps;
return ss.str();
}
Problem:
I am not able to detect anything. Can someone look at my work and can let me know what I am doing wrong. Any suggestions would be valuable. Thank you. From last four weeks I am doing these steps over and over again.
P.S: You can find yaml files here and test images along with the annotations here
First of all, partition your data for cross-validation as suggested already. Second thing is that it is a good idea to use RBF kernel rather than Linear kernel. I highly doubt that a linear kernel can learn complex objects. A brief explanation is given here. Finally, experiment with the parameters. To do that, you need to check the limits of the parameter space, it's been a while since I haven't used SVMs therefore I cannot provide any details but a grid search with 20% cross-validation is a good start.

3D rotation matrix between two 3D points

I have 2 known 3d points OC1 and OC2 which are the origin of 2 axis plot in the space and I need to compute the 3D rotation matrix between them.
I know that using R1&T1 I can get to Oc1 and using R2&T2 I can get to Oc2 but I need to compute the 3D rotation matrix between Oc1 and Oc2. I just thought about this rule:
oMc1=(R1 | T1) and oMc2=(R2 | T2) and what I want is:
c1Mc2 = (oMc1)^-1 oMc2
So I tried to implement it and here is my code:
vector <Point3f> listOfPointsOnTable;
cout << "******** DATA *******" << endl;
listOfPointsOnTable.push_back(Point3f(0,0,0));
listOfPointsOnTable.push_back(Point3f(100,0,0));
listOfPointsOnTable.push_back(Point3f(100,100,0));
listOfPointsOnTable.push_back(Point3f(0,100,0));
cout << endl << "Scene points :" << endl;
for (int i = 0; i < listOfPointsOnTable.size(); i++)
{
cout << listOfPointsOnTable[i] << endl;
}
//Define the optical center of each camera
Point3f centreOfC1 = Point3f(23,0,50);
Point3f centreOfC2 = Point3f(0,42,20);
cout << endl << "Center Of C1: " << centreOfC1 << " , Center of C2 : " << centreOfC2 << endl;
//Define the translation and rotation between main axis and the camera 1 axis
Mat translationOfC1 = (Mat_<double>(3, 1) << (0-centreOfC1.x), (0-centreOfC1.y), (0-centreOfC1.z));
float rotxC1 = 0, rotyC1 = 0, rotzC1 = -45;
int focaleC1 = 2;
Mat rotationOfC1 = rotation3D(rotxC1, rotyC1,rotzC1);
cout << endl << "Translation from default axis to C1: " << translationOfC1 << endl;
cout << "Rotation from default axis to C1: " << rotationOfC1 << endl;
Mat transformationToC1 = buildTransformationMatrix(rotationOfC1, translationOfC1);
cout << "Transformation from default axis to C1: " << transformationToC1 << endl << endl;
//Define the translation and rotation between main axis and the camera 2 axis
Mat translationOfC2 = (Mat_<double>(3, 1) << (0-centreOfC2.x), (0-centreOfC2.y), (0-centreOfC2.z));
float rotxC2 = 0, rotyC2 = 0, rotzC2 = -90;
int focaleC2 = 2;
Mat rotationOfC2 = rotation3D(rotxC2, rotyC2,rotzC2);
cout << endl << "Translation from default axis to C2: " << translationOfC2 << endl;
cout << "Rotation from default axis to C2: " << rotationOfC2 << endl;
Mat transformationToC2 = buildTransformationMatrix(rotationOfC2, translationOfC2);
cout << "Transformation from default axis to C2: " << transformationToC2 << endl << endl;
Mat centreOfC2InMat = (Mat_<double>(3, 1) << centreOfC2.x, centreOfC2.y, centreOfC2.z);
Mat centreOfC2InCamera1 = rotationOfC1 * centreOfC2InMat + translationOfC1;
Mat translationBetweenC1AndC2 = -centreOfC2InCamera1;
cout << endl << "****Translation from C2 to C1" << endl;
cout << translationBetweenC1AndC2 << endl;
Mat centreOfC1InMat = (Mat_<double>(3, 1) << centreOfC1.x, centreOfC1.y, centreOfC1.z);
Mat centreOfC1InCamera2 = rotationOfC2 * centreOfC1InMat + translationOfC2;
Mat translationBetweenC2AndC1 = -centreOfC1InCamera2;
cout << "****Translation from C1 to C2" << endl;
cout << translationBetweenC2AndC1 << endl;
cout << "Tran1-1 * Trans2 = " << transformationToC1.inv() * transformationToC2 << endl;
cout << "Tran2-1 * Trans1 = " << transformationToC2.inv() * transformationToC1 << endl;
Mat rotation3D(int alpha, int beta, int gamma)
{
// Rotation matrices around the X, Y, and Z axis
double alphaInRadian = alpha * M_PI / 180.0;
double betaInRadian = beta * M_PI / 180.0;
double gammaInRadian = gamma * M_PI / 180.0;
Mat RX = (Mat_<double>(3, 3) <<
1, 0, 0,
0, cosf(alphaInRadian), sinf(alphaInRadian),
0, -sinf(alphaInRadian), cosf(alphaInRadian));
Mat RY = (Mat_<double>(3, 3) <<
cosf(betaInRadian), 0, sinf(betaInRadian),
0, 1, 0,
-sinf(betaInRadian), 0, cosf(betaInRadian));
Mat RZ = (Mat_<double>(3, 3) <<
cosf(gammaInRadian), sinf(gammaInRadian), 0,
-sinf(gammaInRadian),cosf(gammaInRadian), 0,
0, 0, 1);
// Composed rotation matrix with (RX, RY, RZ)
Mat R = RX * RY * RZ;
return R;
}
Mat buildTransformationMatrix(Mat rotation, Mat translation)
{
Mat transformation = (Mat_<double>(4, 4) <<
rotation.at<double>(0,0), rotation.at<double>(0,1), rotation.at<double>(0,2), translation.at<double>(0,0),
rotation.at<double>(1,0), rotation.at<double>(1,1), rotation.at<double>(1,2), translation.at<double>(1,0),
rotation.at<double>(2,0), rotation.at<double>(2,1), rotation.at<double>(2,2), translation.at<double>(2,0),
0, 0, 0, 1);
return transformation;
}
here is the output:
//Origin of 3 axis
O(0,0,0), OC1 (23, 0, 50), OC2 (0, 42, 20)
Translation from default axis to OC1: [-23;
0;
-50]
Rotation from default axis to OC1: [0.7071067690849304, -0.7071067690849304, 0;
0.7071067690849304, 0.7071067690849304, 0;
0, 0, 1]
Trans1 = Transformation from default axis to OC1: [0.7071067690849304, -0.7071067690849304, 0, -23;
0.7071067690849304, 0.7071067690849304, 0, 0;
0, 0, 1, -50;
0, 0, 0, 1]
Translation from default axis to OC2: [0;
-42;
-20]
Rotation from default axis to OC2: [-4.371138828673793e-08, -1, 0;
1, -4.371138828673793e-08, 0;
0, 0, 1]
Trans2 = Transformation from default axis to OC2: [-4.371138828673793e-08, -1, 0, 0;
1, -4.371138828673793e-08, 0, -42;
0, 0, 1, -20;
0, 0, 0, 1]
(Trans1)-1 * (Trans2) = [0.7071067623795453, -0.7071068241967844, 0, -13.43502907247513;
0.7071068241967844, 0.7071067623795453, 0, -45.96194156373071;
0, 0, 1, 30;
0, 0, 0, 1]
(Trans2)-1 * (Trans1) = [0.7071067381763105, 0.7071067999935476, 0, 42.00000100536185;
-0.7071067999935475, 0.7071067381763104, -0, 22.99999816412165;
0, 0, 1, -30;
0, 0, 0, 1]
//Calculation of translation between OC1 and OC2:
****Translation from C2 to C1
[52.69848430156708;
-29.69848430156708;
30]
****Translation from C1 to C2
[1.005361930594972e-06;
19;
-30]
As you can see above, the 4th column of (Trans1)-1 * (Trans2) is not equal neither to the translation from C2->C1 nor to the translation from C2->C1
it lets me think that c1Mc2 = (oMc1)^-1 oMc2 does not get what I want but I don't really understand why? Is there any other solution to get what I want?
The rotation matrix of Oc1 is by definition the component of the local XYZ axes:
| Xc1[0] Yc1[0] Zc1[0] |
R_1 = | Xc1[1] Yc1[1] Zc1[1] |
| Xc1[2] Yc1[2] Zc1[2] |
and similarly for R_2 of Oc2.
If the relative rotation between them is R then you can define
R_2 = R_1*R
and thus:
R = transpose(R_1)*R_2
that is all you need.

Inacurate tracking when drawing calcOpticalFlow's outputed feature vector

I have been trying to develop a simple feature tracking program. The user outlines an area on the screen with their mouse, and a mask is created for this area and passed to goodFeaturesToTrack. The features found by the function are then drawn on the screen (represented by blue circles).
Next I pass the feature vector returned by the function to calcOpticalFlowPyrLk and draw the resulting vector of points on the screen (represented by green circles). Although the program tracks the direction of flow correctly, for some reason the features output by the calcOpticalFlow funciton do not line up with the object's location on the screen.
I feel as though it is a small mistake in the logic I have used on my part, but I just can't seem to decompose it, and I would really appreciate some help from the you guys.
I have posted my code below, and I would like to greatly apologize for the global variables and messy structure. I am just testing at the moment, and plan to clean up and convert to an OOP format as soon as I get it running.
As well, here is a link to a YouTube video I have uploaded that demonstrates the behavior I am combating.
bool drawingBox = false;
bool destroyBox = false;
bool targetAcquired = false;
bool featuresFound = false;
CvRect box;
int boxCounter = 0;
cv::Point objectLocation;
cv::Mat prevFrame, nextFrame, prevFrame_1C, nextFrame_1C;
std::vector<cv::Point2f> originalFeatures, newFeatures, baseFeatures;
std::vector<uchar> opticalFlowFeatures;
std::vector<float> opticalFlowFeaturesError;
cv::TermCriteria opticalFlowTermination = cv::TermCriteria(CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 20, 0.3);
cv::Mat mask;
cv::Mat clearMask;
long currentFrame = 0;
void draw(cv::Mat image, CvRect rectangle)
{
if (drawingBox)
{
cv::rectangle(image, cv::Point(box.x, box.y), cv::Point(box.x + box.width, box.y + box.height), cv::Scalar(225, 238 , 81), 2);
CvRect rectangle2 = cvRect(box.x, box.y, box.width, box.height);
}
if (featuresFound)
{
for (int i = 0; i < originalFeatures.size(); i++)
{
cv::circle(image, baseFeatures[i], 4, cv::Scalar(255, 0, 0), 1, 8, 0);
cv::circle(image, newFeatures[i], 4, cv::Scalar(0, 255, 0),1, 8, 0);
cv::line(image, baseFeatures[i], newFeatures[i], cv::Scalar(255, 0, 0), 2, CV_AA);
}
}
}
void findFeatures(cv::Mat mask)
{
if (!featuresFound && targetAcquired)
{
cv::goodFeaturesToTrack(prevFrame_1C, baseFeatures, 200, 0.1, 0.1, mask);
originalFeatures= baseFeatures;
featuresFound = true;
std::cout << "Number of Corners Detected: " << originalFeatures.size() << std::endl;
for(int i = 0; i < originalFeatures.size(); i++)
{
std::cout << "Corner Location " << i << ": " << originalFeatures[i].x << "," << originalFeatures[i].y << std::endl;
}
}
}
void trackFeatures()
{
cv::calcOpticalFlowPyrLK(prevFrame_1C, nextFrame_1C, originalFeatures, newFeatures, opticalFlowFeatures, opticalFlowFeaturesError, cv::Size(30,30), 5, opticalFlowTermination);
originalFeatures = newFeatures;
}
void mouseCallback(int event, int x, int y, int flags, void *param)
{
cv::Mat frame;
frame = *((cv::Mat*)param);
switch(event)
{
case CV_EVENT_MOUSEMOVE:
{
if(drawingBox)
{
box.width = x-box.x;
box.height = y-box.y;
}
}
break;
case CV_EVENT_LBUTTONDOWN:
{
drawingBox = true;
box = cvRect (x, y, 0, 0);
targetAcquired = false;
cv::destroyWindow("Selection");
}
break;
case CV_EVENT_LBUTTONUP:
{
drawingBox = false;
featuresFound = false;
boxCounter++;
std::cout << "Box " << boxCounter << std::endl;
std::cout << "Box Coordinates: " << box.x << "," << box.y << std::endl;
std::cout << "Box Height: " << box.height << std::endl;
std::cout << "Box Width: " << box.width << std:: endl << std::endl;
if(box.width < 0)
{
box.x += box.width;
box.width *= -1;
}
if(box.height < 0)
{
box.y +=box.height;
box.height *= -1;
}
objectLocation.x = box.x;
objectLocation.y = box.y;
targetAcquired = true;
}
break;
case CV_EVENT_RBUTTONUP:
{
destroyBox = true;
}
break;
}
}
int main ()
{
const char *name = "Boundary Box";
cv::namedWindow(name);
cv::VideoCapture camera;
cv::Mat cameraFrame;
int cameraNumber = 0;
camera.open(cameraNumber);
camera >> cameraFrame;
cv::Mat mask = cv::Mat::zeros(cameraFrame.size(), CV_8UC1);
cv::Mat clearMask = cv::Mat::zeros(cameraFrame.size(), CV_8UC1);
if (!camera.isOpened())
{
std::cerr << "ERROR: Could not access the camera or video!" << std::endl;
}
cv::setMouseCallback(name, mouseCallback, &cameraFrame);
while(true)
{
if (destroyBox)
{
cv::destroyAllWindows();
break;
}
camera >> cameraFrame;
if (cameraFrame.empty())
{
std::cerr << "ERROR: Could not grab a camera frame." << std::endl;
exit(1);
}
camera.set(CV_CAP_PROP_POS_FRAMES, currentFrame);
camera >> prevFrame;
cv::cvtColor(prevFrame, prevFrame_1C, cv::COLOR_BGR2GRAY);
camera.set(CV_CAP_PROP_POS_FRAMES, currentFrame ++);
camera >> nextFrame;
cv::cvtColor(nextFrame, nextFrame_1C, cv::COLOR_BGR2GRAY);
if (targetAcquired)
{
cv::Mat roi (mask, cv::Rect(box.x, box.y, box.width, box.height));
roi = cv::Scalar(255, 255, 255);
findFeatures(mask);
clearMask.copyTo(mask);
trackFeatures();
}
draw(cameraFrame, box);
cv::imshow(name, cameraFrame);
cv::waitKey(20);
}
cv::destroyWindow(name);
return 0;
}
In my opinion you can't use camera.set(CV_CAP_PROP_POS_FRAMES, currentFrame) on a webcam, but I 'm not positive about that.
Instead I suggest you to save the previous frame in your prevFrame variable.
As an example I can suggest you this working code, I only change inside the while loop and I add comment before all my adds :
while(true)
{
if (destroyBox)
{
cv::destroyAllWindows();
break;
}
camera >> cameraFrame;
if (cameraFrame.empty())
{
std::cerr << "ERROR: Could not grab a camera frame." << std::endl;
exit(1);
}
// new lines
if(prevFrame.empty()){
prevFrame = cameraFrame;
continue;
}
// end new lines
//camera.set(CV_CAP_PROP_POS_FRAMES, currentFrame);
//camera >> prevFrame;
cv::cvtColor(prevFrame, prevFrame_1C, cv::COLOR_BGR2GRAY);
//camera.set(CV_CAP_PROP_POS_FRAMES, currentFrame ++);
//camera >> nextFrame;
// new line
nextFrame = cameraFrame;
cv::cvtColor(nextFrame, nextFrame_1C, cv::COLOR_BGR2GRAY);
if (targetAcquired)
{
cv::Mat roi (mask, cv::Rect(box.x, box.y, box.width, box.height));
roi = cv::Scalar(255, 255, 255);
findFeatures(mask);
clearMask.copyTo(mask);
trackFeatures();
}
draw(cameraFrame, box);
cv::imshow(name, cameraFrame);
cv::waitKey(20);
// old = new
// new line
prevFrame = cameraFrame.clone();
}

Resources