How to increase BatchSize with Tensorflow's C++ API? - opencv

I took the code in https://gist.github.com/kyrs/9adf86366e9e4f04addb (which takes an opencv cv::Mat image as input and converts it to tensor) and I use it to label images with the model inception_v3_2016_08_28_frozen.pb stated in the Tensorflow tutorial (https://www.tensorflow.org/tutorials/image_recognition#usage_with_the_c_api). Everything worked fine when using a batchsize of 1. However, when I increase the batchsize to 2 (or greater), the size of
finalOutput (which is of type std::vector) is zero.
Here's the code to reproduce the error:
// Only for VisualStudio
#define COMPILER_MSVC
#define NOMINMAX
#include <string>
#include <iostream>
#include <fstream>
#include <opencv2/opencv.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include "tensorflow/core/public/session.h"
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/framework/tensor.h"
int batchSize = 2;
int height = 299;
int width = 299;
int depth = 3;
int mean = 0;
int stdev = 255;
// Set image paths
cv::String pathFilenameImg1 = "D:/IMGS/grace_hopper.jpg";
cv::String pathFilenameImg2 = "D:/IMGS/lenna.jpg";
// Set model paths
std::string graphFile = "D:/Tensorflow/models/inception_v3_2016_08_28_frozen.pb";
std::string labelfile = "D:/Tensorflow/models/imagenet_slim_labels.txt";
std::string InputName = "input";
std::string OutputName = "InceptionV3/Predictions/Reshape_1";
void read_prepare_image(cv::String pathImg, cv::Mat &imgPrepared) {
// Read Color image:
cv::Mat imgBGR = cv::imread(pathImg);
// Now we resize the image to fit Model's expected sizes:
cv::Size s(height, width);
cv::Mat imgResized;
cv::resize(imgBGR, imgResized, s, 0, 0, cv::INTER_CUBIC);
// Convert the image to float and normalize data:
imgResized.convertTo(imgPrepared, CV_32FC1);
imgPrepared = imgPrepared - mean;
imgPrepared = imgPrepared / stdev;
}
int main()
{
// Read and prepare images using OpenCV:
cv::Mat img1, img2;
read_prepare_image(pathFilenameImg1, img1);
read_prepare_image(pathFilenameImg2, img2);
// creating a Tensor for storing the data
tensorflow::Tensor input_tensor(tensorflow::DT_FLOAT, tensorflow::TensorShape({ batchSize, height, width, depth }));
auto input_tensor_mapped = input_tensor.tensor<float, 4>();
// Copy images data into the tensor:
for (int b = 0; b < batchSize; ++b) {
const float * source_data;
if (b == 0)
source_data = (float*)img1.data;
else
source_data = (float*)img2.data;
for (int y = 0; y < height; ++y) {
const float* source_row = source_data + (y * width * depth);
for (int x = 0; x < width; ++x) {
const float* source_pixel = source_row + (x * depth);
const float* source_B = source_pixel + 0;
const float* source_G = source_pixel + 1;
const float* source_R = source_pixel + 2;
input_tensor_mapped(b, y, x, 0) = *source_R;
input_tensor_mapped(b, y, x, 1) = *source_G;
input_tensor_mapped(b, y, x, 2) = *source_B;
}
}
}
// Load the graph:
tensorflow::GraphDef graph_def;
ReadBinaryProto(tensorflow::Env::Default(), graphFile, &graph_def);
// create a session with the graph
std::unique_ptr<tensorflow::Session> session_inception(tensorflow::NewSession(tensorflow::SessionOptions()));
session_inception->Create(graph_def);
// run the loaded graph
std::vector<tensorflow::Tensor> finalOutput;
session_inception->Run({ { InputName,input_tensor } }, { OutputName }, {}, &finalOutput);
// Get Top 5 classes:
std::cerr << "final output size = " << finalOutput.size() << std::endl;
tensorflow::Tensor output = std::move(finalOutput.at(0));
auto scores = output.flat<float>();
std::cerr << "scores size=" << scores.size() << std::endl;
std::ifstream label(labelfile);
std::string line;
std::vector<std::pair<float, std::string>> sorted;
for (unsigned int i = 0; i <= 1000; ++i) {
std::getline(label, line);
sorted.emplace_back(scores(i), line);
}
std::sort(sorted.begin(), sorted.end());
std::reverse(sorted.begin(), sorted.end());
std::cout << "size of the sorted file is " << sorted.size() << std::endl;
for (unsigned int i = 0; i< 5; ++i)
std::cout << "The output of the current graph has category " << sorted[i].second << " with probability " << sorted[i].first << std::endl;
}
Do I miss anything? Any ideas?
Thanks in advance!

I had the same problem. When I changed to the model used in https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/benchmark (differente version of inception) bigger batch sizes work correctly.
Notice you need to change the input size from 299,299,3 to 224,224,3 and the input and output layer names to: input:0 and output:0

Probably the graph in the protobuf file had a fixed batch size of 1 and I was only changing the shape of the input, not the graph. The graph has to accept a variable batch size by setting the shape to (None, width, heihgt, channels). This is done when you freeze the graph. Since the graph we have is already frozen, there is no way to change the batch size at this point.

Related

OpenCV: Wrong result in calibrateHandEye function

I am working in a robot application, in which I have a camera fixed to a robot gripper. In order to calculate the matrix transformation between the camera and the gripper Hcg I am using the calibrateHandEye new function provided in the OpenCV version 4.1.0
I had taken 10 pictures of the chessboard from the camera mounted in the gripper and at the same time I recorded the robot position.
The code I am working on:
// My_handeye.cpp : This file contains the 'main' function. Program execution begins and ends there.
//
#include <iostream>
#include <sstream>
#include <string>
#include <ctime>
#include <cstdio>
#include "pch.h"
#include <opencv2/opencv.hpp>
#include <opencv2/core.hpp>
#include <opencv2/core/utility.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/calib3d.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/videoio.hpp>
#include <opencv2/highgui.hpp>
using namespace cv;
using namespace std;
Mat eulerAnglesToRotationMatrix(Vec3f &theta);
Vec3f rotationMatrixToEulerAngles(Mat &R);
float rad2deg(float radian);
float deg2rad(float degree);
int main()
{
// Camera calibration information
std::vector<double> distortionCoefficients(5); // camera distortion
distortionCoefficients[0] = 2.4472856611074989e-01;
distortionCoefficients[1] = -8.1042032574246325e-01;
distortionCoefficients[2] = 0;
distortionCoefficients[3] = 0;
distortionCoefficients[4] = 7.8769462320821060e-01;
double f_x = 1.3624172121852105e+03; // Focal length in x axis
double f_y = 1.3624172121852105e+03; // Focal length in y axis (usually the same?)
double c_x = 960; // Camera primary point x
double c_y = 540; // Camera primary point y
cv::Mat cameraMatrix(3, 3, CV_32FC1);
cameraMatrix.at<float>(0, 0) = f_x;
cameraMatrix.at<float>(0, 1) = 0.0;
cameraMatrix.at<float>(0, 2) = c_x;
cameraMatrix.at<float>(1, 0) = 0.0;
cameraMatrix.at<float>(1, 1) = f_y;
cameraMatrix.at<float>(1, 2) = c_y;
cameraMatrix.at<float>(2, 0) = 0.0;
cameraMatrix.at<float>(2, 1) = 0.0;
cameraMatrix.at<float>(2, 2) = 1.0;
Mat rvec(3, 1, CV_32F), tvec(3, 1, CV_32F);
//
std::vector<Mat> R_gripper2base;
std::vector<Mat> t_gripper2base;
std::vector<Mat> R_target2cam;
std::vector<Mat> t_target2cam;
Mat R_cam2gripper = (Mat_<float>(3, 3));
Mat t_cam2gripper = (Mat_<float>(3, 1));
vector<String> fn;
glob("images/*.bmp", fn, false);
vector<Mat> images;
size_t num_images = fn.size(); //number of bmp files in images folder
Size patternsize(6, 8); //number of centers
vector<Point2f> centers; //this will be filled by the detected centers
float cell_size = 30;
vector<Point3f> obj_points;
R_gripper2base.reserve(num_images);
t_gripper2base.reserve(num_images);
R_target2cam.reserve(num_images);
t_target2cam.reserve(num_images);
for (int i = 0; i < patternsize.height; ++i)
for (int j = 0; j < patternsize.width; ++j)
obj_points.push_back(Point3f(float(j*cell_size),
float(i*cell_size), 0.f));
for (size_t i = 0; i < num_images; i++)
images.push_back(imread(fn[i]));
Mat frame;
for (size_t i = 0; i < num_images; i++)
{
frame = imread(fn[i]); //source image
bool patternfound = findChessboardCorners(frame, patternsize, centers);
if (patternfound)
{
drawChessboardCorners(frame, patternsize, Mat(centers), patternfound);
//imshow("window", frame);
//int key = cv::waitKey(0) & 0xff;
solvePnP(Mat(obj_points), Mat(centers), cameraMatrix, distortionCoefficients, rvec, tvec);
Mat R;
Rodrigues(rvec, R); // R is 3x3
R_target2cam.push_back(R);
t_target2cam.push_back(tvec);
Mat T = Mat::eye(4, 4, R.type()); // T is 4x4
T(Range(0, 3), Range(0, 3)) = R * 1; // copies R into T
T(Range(0, 3), Range(3, 4)) = tvec * 1; // copies tvec into T
cout << "T = " << endl << " " << T << endl << endl;
}
cout << patternfound << endl;
}
Vec3f theta_01{ deg2rad(-153.61), deg2rad(8.3), deg2rad(-91.91) };
Vec3f theta_02{ deg2rad(-166.71), deg2rad(3.04), deg2rad(-93.31) };
Vec3f theta_03{ deg2rad(-170.04), deg2rad(24.92), deg2rad(-88.29) };
Vec3f theta_04{ deg2rad(-165.71), deg2rad(24.68), deg2rad(-84.85) };
Vec3f theta_05{ deg2rad(-160.18), deg2rad(-15.94),deg2rad(-56.24) };
Vec3f theta_06{ deg2rad(175.68), deg2rad(10.95), deg2rad(180) };
Vec3f theta_07{ deg2rad(175.73), deg2rad(45.78), deg2rad(-179.92) };
Vec3f theta_08{ deg2rad(-165.34), deg2rad(47.37), deg2rad(-166.25) };
Vec3f theta_09{ deg2rad(-165.62), deg2rad(17.95), deg2rad(-166.17) };
Vec3f theta_10{ deg2rad(-151.99), deg2rad(-14.59),deg2rad(-94.19) };
Mat robot_rot_01 = eulerAnglesToRotationMatrix(theta_01);
Mat robot_rot_02 = eulerAnglesToRotationMatrix(theta_02);
Mat robot_rot_03 = eulerAnglesToRotationMatrix(theta_03);
Mat robot_rot_04 = eulerAnglesToRotationMatrix(theta_04);
Mat robot_rot_05 = eulerAnglesToRotationMatrix(theta_05);
Mat robot_rot_06 = eulerAnglesToRotationMatrix(theta_06);
Mat robot_rot_07 = eulerAnglesToRotationMatrix(theta_07);
Mat robot_rot_08 = eulerAnglesToRotationMatrix(theta_08);
Mat robot_rot_09 = eulerAnglesToRotationMatrix(theta_09);
Mat robot_rot_10 = eulerAnglesToRotationMatrix(theta_10);
const Mat robot_tr_01 = (Mat_<float>(3, 1) << 781.2, 338.59, 903.48);
const Mat robot_tr_02 = (Mat_<float>(3, 1) << 867.65, 382.52, 884.42);
const Mat robot_tr_03 = (Mat_<float>(3, 1) << 856.91, 172.99, 964.61);
const Mat robot_tr_04 = (Mat_<float>(3, 1) << 748.81, 146.75, 1043.29);
const Mat robot_tr_05 = (Mat_<float>(3, 1) << 627.66, 554.08, 920.85);
const Mat robot_tr_06 = (Mat_<float>(3, 1) << 715.06, 195.96, 889.38);
const Mat robot_tr_07 = (Mat_<float>(3, 1) << 790.9, 196.29, 1117.38);
const Mat robot_tr_08 = (Mat_<float>(3, 1) << 743.5, 283.93, 1131.92);
const Mat robot_tr_09 = (Mat_<float>(3, 1) << 748.9, 288.19, 910.58);
const Mat robot_tr_10 = (Mat_<float>(3, 1) << 813.18, 400.44, 917.16);
R_gripper2base.push_back(robot_rot_01);
R_gripper2base.push_back(robot_rot_02);
R_gripper2base.push_back(robot_rot_03);
R_gripper2base.push_back(robot_rot_04);
R_gripper2base.push_back(robot_rot_05);
R_gripper2base.push_back(robot_rot_06);
R_gripper2base.push_back(robot_rot_07);
R_gripper2base.push_back(robot_rot_08);
R_gripper2base.push_back(robot_rot_09);
R_gripper2base.push_back(robot_rot_10);
t_gripper2base.push_back(robot_tr_01);
t_gripper2base.push_back(robot_tr_02);
t_gripper2base.push_back(robot_tr_03);
t_gripper2base.push_back(robot_tr_04);
t_gripper2base.push_back(robot_tr_05);
t_gripper2base.push_back(robot_tr_06);
t_gripper2base.push_back(robot_tr_07);
t_gripper2base.push_back(robot_tr_08);
t_gripper2base.push_back(robot_tr_09);
t_gripper2base.push_back(robot_tr_10);
calibrateHandEye(R_gripper2base, t_gripper2base, R_target2cam, t_target2cam, R_cam2gripper, t_cam2gripper, CALIB_HAND_EYE_TSAI);
Vec3f R_cam2gripper_r = rotationMatrixToEulerAngles(R_cam2gripper);
cout << "R_cam2gripper = " << endl << " " << R_cam2gripper << endl << endl;
cout << "R_cam2gripper_r = " << endl << " " << R_cam2gripper_r << endl << endl;
cout << "t_cam2gripper = " << endl << " " << t_cam2gripper << endl << endl;
}
Mat eulerAnglesToRotationMatrix(Vec3f &theta)
{
// Calculate rotation about x axis
Mat R_x = (Mat_<double>(3, 3) <<
1, 0, 0,
0, cos(theta[0]), -sin(theta[0]),
0, sin(theta[0]), cos(theta[0])
);
// Calculate rotation about y axis
Mat R_y = (Mat_<double>(3, 3) <<
cos(theta[1]), 0, sin(theta[1]),
0, 1, 0,
-sin(theta[1]), 0, cos(theta[1])
);
// Calculate rotation about z axis
Mat R_z = (Mat_<double>(3, 3) <<
cos(theta[2]), -sin(theta[2]), 0,
sin(theta[2]), cos(theta[2]), 0,
0, 0, 1);
// Combined rotation matrix
Mat R = R_z * R_y * R_x;
return R;
}
float rad2deg(float radian) {
double pi = 3.14159;
return(radian * (180 / pi));
}
float deg2rad(float degree) {
double pi = 3.14159;
return(degree * (pi / 180));
}
// Checks if a matrix is a valid rotation matrix.
bool isRotationMatrix(Mat &R)
{
Mat Rt;
transpose(R, Rt);
Mat shouldBeIdentity = Rt * R;
Mat I = Mat::eye(3, 3, shouldBeIdentity.type());
return norm(I, shouldBeIdentity) < 1e-6;
}
// Calculates rotation matrix to euler angles
// The result is the same as MATLAB except the order
// of the euler angles ( x and z are swapped ).
Vec3f rotationMatrixToEulerAngles(Mat &R)
{
assert(isRotationMatrix(R));
float sy = sqrt(R.at<double>(0, 0) * R.at<double>(0, 0) + R.at<double>(1, 0) * R.at<double>(1, 0));
bool singular = sy < 1e-6; // If
float x, y, z;
if (!singular)
{
x = atan2(R.at<double>(2, 1), R.at<double>(2, 2));
y = atan2(-R.at<double>(2, 0), sy);
z = atan2(R.at<double>(1, 0), R.at<double>(0, 0));
}
else
{
x = atan2(-R.at<double>(1, 2), R.at<double>(1, 1));
y = atan2(-R.at<double>(2, 0), sy);
z = 0;
}
return Vec3f(x, y, z);
}
The result the function is giving me is the next one:
R_cam2gripper =
[0.3099803593003124, -0.8923086952824562, -0.3281727733547833;
0.7129271761196039, 0.4465219155360299, -0.5406967916458927;
0.6290047840821058, -0.0663579028402444, 0.7745641421680119]
R_cam2gripper_r =
[-0.0854626, -0.680272, 1.16065]
t_cam2gripper =
[-35.02063730299775;
-74.80633768251272;
-307.6725851251873]
I am getting 'good' results provided by other software. With them, the robot got to the exact points I am pointing in the camera (I have a 3D camera, from which I am getting the x, y, z from the camera world) so they are certainly correct, but I am having troubles to repeat the same result with the OpenCV function.
Sorry for the long introduction to my problem. Any understanding of why the solutions are not what is supposed to be? My guess is, that I have a problem understanding the angles or converting them but I couldn't find any way to solve this. Any hint will be well welcome!
I actually managed to solve this problem. The general idea was correct, but:
I was not understanding correctly the vector rotation notation the robot was giving. It was necessary to multiply the actual values by a factor.
I created a new program that extracts directly from the robot and the pictures the matrixes that the algorithm requires and writes these values to a YML file.
The CALIB_HAND_EYE_TSAI method wasn't giving me correct values. But with the four others, the values seem to converge to the actual values
Anyway, thank you for your help. I am stuck to get more precision in the algorithm, but that's for another question.

getting primal form from CvSVM trained file

I am trying to train my own detector based on HOG features and i trained a detector with CvSVM utility of opencv. Now to use this detector in HOGDescriptor.SetSVM(myDetector), i need to get trained detector in row-vector (primal) form to feed. For this i am using this code. my implementation is like given below:
vector<float>primal;
void LinearSVM::getSupportVector(std::vector<float>& support_vector) {
CvSVM svm;
svm.load("Classifier.xml");
cin.get();
int sv_count = svm.get_support_vector_count();
const CvSVMDecisionFunc* df = decision_func;
const double* alphas = df[0].alpha;
double rho = df[0].rho;
int var_count = svm.get_var_count();
support_vector.resize(var_count, 0);
for (unsigned int r = 0; r < (unsigned)sv_count; r++) {
float myalpha = alphas[r];
const float* v = svm.get_support_vector(r);
for (int j = 0; j < var_count; j++,v++) {
support_vector[j] += (-myalpha) * (*v);
}
}
support_vector.push_back(rho);
}
int main()
{
LinearSVM s;
s.getSupportVector(primal);
return 0;
}
When i use built-in CvSVM, it shows me SV as 3 bec i have only 3 SV in my saved file but since the decision_func is in protected mode, hence i can not access it. That's why i tried to use that wrapper but still of no use. Perhaps you guys can help me out here... Thanks alot!
Answer with a test harness. I put in new answer as it would add allot of clutter to the original answer, possibly making it a bit confusing.
//dummy features
std:: vector<float>
dummyDerReaderForOneDer(const vector<float> &pattern)
{
int i = std::rand() % pattern.size();
int j = std::rand() % pattern.size();
vector<float> patternPulNoise(pattern);
std::random_shuffle(patternPulNoise.begin()+std::min(i,j),patternPulNoise.begin()+std::max(i,j));
return patternPulNoise;
};
//extend CvSVM to get access to weights
class mySVM : public CvSVM
{
public:
vector<float>
getWeightVector(const int descriptorSize);
};
//get the weights
vector<float>
mySVM::getWeightVector(const int descriptorSize)
{
vector<float> svmWeightsVec(descriptorSize+1);
int numSupportVectors = get_support_vector_count();
//this is protected, but can access due to inheritance rules
const CvSVMDecisionFunc *dec = CvSVM::decision_func;
const float *supportVector;
float* svmWeight = &svmWeightsVec[0];
for (int i = 0; i < numSupportVectors; ++i)
{
float alpha = *(dec[0].alpha + i);
supportVector = get_support_vector(i);
for(int j=0;j<descriptorSize;j++)
{
*(svmWeight + j) += alpha * *(supportVector+j);
}
}
*(svmWeight + descriptorSize) = - dec[0].rho;
return svmWeightsVec;
}
// main harness entry point for detector test
int main (int argc, const char * argv[])
{
//dummy variables for example
int posFiles = 10;
int negFiles = 10;
int dims = 1000;
int randomFactor = 4;
//setup some dummy data
vector<float> dummyPosPattern;
dummyPosPattern.assign(int(dims/randomFactor),1.f);
dummyPosPattern.resize(dims );
random_shuffle(dummyPosPattern.begin(),dummyPosPattern.end());
vector<float> dummyNegPattern;
dummyNegPattern.assign(int(dims/randomFactor),1.f);
dummyNegPattern.resize(dims );
random_shuffle(dummyNegPattern.begin(),dummyNegPattern.end());
// the labels and lables mat
float posLabel = 1.f;
float negLabel = 2.f;
cv::Mat cSvmLabels;
//the data mat
cv::Mat cSvmTrainingData;
//dummy linear svm parmas
SVMParams cSvmParams;
cSvmParams.svm_type = cv::SVM::C_SVC;
cSvmParams.C = 0.0100;
cSvmParams.kernel_type = cv::SVM::LINEAR;
cSvmParams.term_crit = cv::TermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000000, FLT_EPSILON);
cout << "creating training data. please wait" << endl;
int i;
for(i=0;i<posFiles;i++)
{
//your feature for one box from file
vector<float> d = dummyDerReaderForOneDer(dummyPosPattern);
//push back a new mat made from the vectors data, with copy data flag on
//this shows the format of the mat for a single example, (1 (row) X dims(col) ), as training mat has each **row** as an example;
//the push_back works like vector add adds each example to the bottom of the matrix
cSvmTrainingData.push_back(cv::Mat(1,dims,CV_32FC1,d.data(),true));
//push back a pos label to the labels mat
cSvmLabels.push_back(posLabel);
}
//do same with neg files;
for(i=0;i<negFiles;i++)
{
float a = rand();
vector<float> d = dummyDerReaderForOneDer(dummyNegPattern);
cSvmTrainingData.push_back(cv::Mat(1,dims,CV_32FC1,d.data(),true));
cSvmLabels.push_back(negLabel);
}
//have a look
cv::Mat viz;
cSvmTrainingData.convertTo(viz,CV_8UC3);
viz = viz*255;
cv::imshow("svmData", viz);
cv::waitKey(10);
cout << "press any key to continue" << endl;
getchar();
viz.release();
//create the svm;
cout << "training, please wait" << endl;
mySVM svm;
svm.train(cSvmTrainingData,cSvmLabels,cv::Mat(),cv::Mat(),cSvmParams);
cout << "get weights" << endl;
vector<float> svmWeights = svm.getWeightVector(dims);
for(i=0; i<dims+1; i++)
{
cout << svmWeights[i] << ", ";
if(i==dims)
{
cout << endl << "bias: " << svmWeights[i] << endl;
}
}
cout << "press any key to continue" << endl;
getchar();
cout << "testing, please wait" << endl;
//test the svm with a large amount of new unseen fake one at a time
int totExamples = 10;
int k;
for(i=0;i<totExamples; i++)
{
cout << endl << endl;
vector<float> dPos = dummyDerReaderForOneDer(dummyPosPattern);
cv::Mat dMatPos(1,dims,CV_32FC1,dPos.data(),true);
float predScoreFromDual = svm.predict(dMatPos,true);
float predScoreBFromPrimal = svmWeights[dims];
for( k = 0; k <= dims - 4; k += 4 )
predScoreBFromPrimal += dPos[k]*svmWeights[k] + dPos[k+1]*svmWeights[k+1] +
dPos[k+2]*svmWeights[k+2] + dPos[k+3]*svmWeights[k+3];
for( ; k < dims; k++ )
predScoreBFromPrimal += dPos[k]*svmWeights[k];
cout << "Dual Score:\t" << predScoreFromDual << "\tPrimal Score:\t" << predScoreBFromPrimal << endl;
}
cout << "press any key to continue" << endl;
getchar();
return(0);
}
Hello again :) please extend the cvsm class rather than encapsulating it, as you need access to protected member.
//header
class mySVM : public CvSVM
{
public:
vector<float>
getWeightVector(const int descriptorSize);
};
//cpp
vector<float>
mySVM::getWeightVector(const int descriptorSize)
{
vector<float> svmWeightsVec(descriptorSize+1);
int numSupportVectors = get_support_vector_count();
//this is protected, but can access due to inheritance rules
const CvSVMDecisionFunc *dec = CvSVM::decision_func;
const float *supportVector;
float* svmWeight = &svmWeightsVec[0];
for (int i = 0; i < numSupportVectors; ++i)
{
float alpha = *(dec[0].alpha + i);
supportVector = get_support_vector(i);
for(int j=0;j<descriptorSize;j++)
{
*(svmWeight + j) += alpha * *(supportVector+j);
}
}
*(svmWeight + descriptorSize) = - dec[0].rho;
return svmWeightsVec;
}
something like that.
credits:
Obtaining weights in CvSVM, the SVM implementation of OpenCV

EM clustering based background foreground segmentation in OPENCV

I tried to perform EM based back ground foreground segmentation using a code below...which I also found in Stackoverflow....But seems there is some error somewhere as I dont ever see the second printf statement to get executed... . basically it is never reaching the classification/clustering part of the code..The code is given below..Could someone help me on this ?
#include <opencv2/opencv.hpp>
#include <opencv2/legacy/legacy.hpp>
char str1[60];
int main()
{
cv::Mat source = cv::imread("C:\\Image Input\\part1.bmp" );
if(!source.data)
printf(" No data \n");
//ouput images
cv::Mat meanImg(source.rows, source.cols, CV_32FC3);
cv::Mat fgImg(source.rows, source.cols, CV_8UC3);
cv::Mat bgImg(source.rows, source.cols, CV_8UC3);
//convert the input image to float
cv::Mat floatSource;
source.convertTo(floatSource, CV_32F);
//now convert the float image to column vector
cv::Mat samples(source.rows * source.cols, 3, CV_32FC1);
int idx = 0;
for (int y = 0; y < source.rows; y++) {
cv::Vec3f* row = floatSource.ptr<cv::Vec3f > (y);
for (int x = 0; x < source.cols; x++) {
samples.at<cv::Vec3f > (idx++, 0) = row[x];
}
}
printf(" After Loop \n");
//we need just 2 clusters
cv::EMParams params(2);
cv::ExpectationMaximization em(samples, cv::Mat(), params);
//the two dominating colors
cv::Mat means = em.getMeans();
//the weights of the two dominant colors
cv::Mat weights = em.getWeights();
//we define the foreground as the dominant color with the largest weight
const int fgId = weights.at<float>(0) > weights.at<float>(1) ? 0 : 1;
printf(" After Training \n");
//now classify each of the source pixels
idx = 0;
for (int y = 0; y < source.rows; y++)
{
printf(" Now Classify\n");
for (int x = 0; x < source.cols; x++)
{
//classify
const int result = cvRound(em.predict(samples.row(idx++), NULL));
//get the according mean (dominant color)
const double* ps = means.ptr<double>(result, 0);
//set the according mean value to the mean image
float* pd = meanImg.ptr<float>(y, x);
//float images need to be in [0..1] range
pd[0] = ps[0] / 255.0;
pd[1] = ps[1] / 255.0;
pd[2] = ps[2] / 255.0;
//set either foreground or background
if (result == fgId) {
fgImg.at<cv::Point3_<uchar> >(y, x, 0) = source.at<cv::Point3_<uchar> >(y, x, 0);
} else {
bgImg.at<cv::Point3_<uchar> >(y, x, 0) = source.at<cv::Point3_<uchar> >(y, x, 0);
}
}
}
printf(" Show Images \n");
cv::imshow("Means", meanImg);
cv::imshow("Foreground", fgImg);
cv::imshow("Background", bgImg);
cv::waitKey(0);
return 0;
}
The code works fine. I think that you use too large images, and learning takes too long time. Try process small images.
Just 1 correction, initialize images with zeros:
//ouput images
cv::Mat meanImg=Mat::zeros(source.rows, source.cols, CV_32FC3);
cv::Mat fgImg=Mat::zeros(source.rows, source.cols, CV_8UC3);
cv::Mat bgImg=Mat::zeros(source.rows, source.cols, CV_8UC3);

Algorithm for shrinking/limiting palette of an image

as input data I have a 24 bit RGB image and a palette with 2..20 fixed colours. These colours are in no way spread regularly over the full colour range.
Now I have to modify the colours of input image so that only the colours of the given palette are used - using the colour out of the palette that is closest to the original colour (not closest mathematically but for human's visual impression). So what I need is an algorithm that uses an input colour and finds the colour in target palette that visually fits best to this colour. Please note: I'm not looking for a stupid comparison/difference algorithm but for something that really incorporates the impression a colour has on humans!
Since this is something that already should have been done and because I do not want to re-invent the wheel again: is there some example source code out there that does this job? In best case it is really a piece of code and not a link to a desastrous huge library ;-)
(I'd guess OpenCV does not provide such a function?)
Thanks
You should look at the Lab color space. It was designed so that the distance in the colour space equals the perceptual distance. So once you have converted your image you can compute the distances as you would have done earlier, but should get a better result from a perceptual point of view. In OpenCV you can use the cvtColor(source, destination, CV_BGR2Lab) function.
Another Idea would be to use dithering. The idea is to mix missing colours using neighbouring pixels. A popular algorithm for this is Floyd-Steinberg dithering.
Here is an example of mine, where I combined a optimized palette using k-means with the Lab colourspace and floyd steinberg dithering:
#include <opencv2/opencv.hpp>
#include <iostream>
using namespace cv;
using namespace std;
cv::Mat floydSteinberg(cv::Mat img, cv::Mat palette);
cv::Vec3b findClosestPaletteColor(cv::Vec3b color, cv::Mat palette);
int main(int argc, char** argv)
{
// Number of clusters (colors on result image)
int nrColors = 18;
cv::Mat imgBGR = imread(argv[1],1);
cv::Mat img;
cvtColor(imgBGR, img, CV_BGR2Lab);
cv::Mat colVec = img.reshape(1, img.rows*img.cols); // change to a Nx3 column vector
cv::Mat colVecD;
colVec.convertTo(colVecD, CV_32FC3, 1.0); // convert to floating point
cv::Mat labels, centers;
cv::kmeans(colVecD, nrColors, labels,
cv::TermCriteria(CV_TERMCRIT_ITER, 100, 0.1),
3, cv::KMEANS_PP_CENTERS, centers); // compute k mean centers
// replace pixels by there corresponding image centers
cv::Mat imgPosterized = img.clone();
for(int i = 0; i < img.rows; i++ )
for(int j = 0; j < img.cols; j++ )
for(int k = 0; k < 3; k++)
imgPosterized.at<Vec3b>(i,j)[k] = centers.at<float>(labels.at<int>(j+img.cols*i),k);
// convert palette back to uchar
cv::Mat palette;
centers.convertTo(palette,CV_8UC3,1.0);
// call floyd steinberg dithering algorithm
cv::Mat fs = floydSteinberg(img, palette);
cv::Mat imgPosterizedBGR, fsBGR;
cvtColor(imgPosterized, imgPosterizedBGR, CV_Lab2BGR);
cvtColor(fs, fsBGR, CV_Lab2BGR);
imshow("input",imgBGR); // original image
imshow("result",imgPosterizedBGR); // posterized image
imshow("fs",fsBGR); // floyd steinberg dithering
waitKey();
return 0;
}
cv::Mat floydSteinberg(cv::Mat imgOrig, cv::Mat palette)
{
cv::Mat img = imgOrig.clone();
cv::Mat resImg = img.clone();
for(int i = 0; i < img.rows; i++ )
for(int j = 0; j < img.cols; j++ )
{
cv::Vec3b newpixel = findClosestPaletteColor(img.at<Vec3b>(i,j), palette);
resImg.at<Vec3b>(i,j) = newpixel;
for(int k=0;k<3;k++)
{
int quant_error = (int)img.at<Vec3b>(i,j)[k] - newpixel[k];
if(i+1<img.rows)
img.at<Vec3b>(i+1,j)[k] = min(255,max(0,(int)img.at<Vec3b>(i+1,j)[k] + (7 * quant_error) / 16));
if(i-1 > 0 && j+1 < img.cols)
img.at<Vec3b>(i-1,j+1)[k] = min(255,max(0,(int)img.at<Vec3b>(i-1,j+1)[k] + (3 * quant_error) / 16));
if(j+1 < img.cols)
img.at<Vec3b>(i,j+1)[k] = min(255,max(0,(int)img.at<Vec3b>(i,j+1)[k] + (5 * quant_error) / 16));
if(i+1 < img.rows && j+1 < img.cols)
img.at<Vec3b>(i+1,j+1)[k] = min(255,max(0,(int)img.at<Vec3b>(i+1,j+1)[k] + (1 * quant_error) / 16));
}
}
return resImg;
}
float vec3bDist(cv::Vec3b a, cv::Vec3b b)
{
return sqrt( pow((float)a[0]-b[0],2) + pow((float)a[1]-b[1],2) + pow((float)a[2]-b[2],2) );
}
cv::Vec3b findClosestPaletteColor(cv::Vec3b color, cv::Mat palette)
{
int i=0;
int minI = 0;
cv::Vec3b diff = color - palette.at<Vec3b>(0);
float minDistance = vec3bDist(color, palette.at<Vec3b>(0));
for (int i=0;i<palette.rows;i++)
{
float distance = vec3bDist(color, palette.at<Vec3b>(i));
if (distance < minDistance)
{
minDistance = distance;
minI = i;
}
}
return palette.at<Vec3b>(minI);
}
Try this algorithm (it will reduct color number, but it compute palette by itself):
#include <opencv2/opencv.hpp>
#include "opencv2/legacy/legacy.hpp"
#include <vector>
#include <list>
#include <iostream>
using namespace cv;
using namespace std;
void main(void)
{
// Number of clusters (colors on result image)
int NrGMMComponents = 32;
// Source file name
string fname="D:\\ImagesForTest\\tools.jpg";
cv::Mat SampleImg = imread(fname,1);
//cv::GaussianBlur(SampleImg,SampleImg,Size(5,5),3);
int SampleImgHeight = SampleImg.rows;
int SampleImgWidth = SampleImg.cols;
// Pick datapoints
vector<Vec3d> ListSamplePoints;
for (int y=0; y<SampleImgHeight; y++)
{
for (int x=0; x<SampleImgWidth; x++)
{
// Get pixel color at that position
Vec3b bgrPixel = SampleImg.at<Vec3b>(y, x);
uchar b = bgrPixel.val[0];
uchar g = bgrPixel.val[1];
uchar r = bgrPixel.val[2];
if(rand()%25==0) // Pick not every, bu t every 25-th
{
ListSamplePoints.push_back(Vec3d(b,g,r));
}
} // for (x)
} // for (y)
// Form training matrix
Mat labels;
int NrSamples = ListSamplePoints.size();
Mat samples( NrSamples, 3, CV_32FC1 );
for (int s=0; s<NrSamples; s++)
{
Vec3d v = ListSamplePoints.at(s);
samples.at<float>(s,0) = (float) v[0];
samples.at<float>(s,1) = (float) v[1];
samples.at<float>(s,2) = (float) v[2];
}
cout << "Learning to represent the sample distributions with" << NrGMMComponents << "gaussians." << endl;
// Algorithm parameters
CvEMParams params;
params.covs = NULL;
params.means = NULL;
params.weights = NULL;
params.probs = NULL;
params.nclusters = NrGMMComponents;
params.cov_mat_type = CvEM::COV_MAT_GENERIC; // DIAGONAL, GENERIC, SPHERICAL
params.start_step = CvEM::START_AUTO_STEP;
params.term_crit.max_iter = 1500;
params.term_crit.epsilon = 0.001;
params.term_crit.type = CV_TERMCRIT_ITER|CV_TERMCRIT_EPS;
//params.term_crit.type = CV_TERMCRIT_ITER;
// Train
cout << "Started GMM training" << endl;
CvEM em_model;
em_model.train( samples, Mat(), params, &labels );
cout << "Finished GMM training" << endl;
// Result image
Mat img = Mat::zeros( Size( SampleImgWidth, SampleImgHeight ), CV_8UC3 );
// Ask classifier for each pixel
Mat sample( 1, 3, CV_32FC1 );
Mat means;
means=em_model.getMeans();
for(int i = 0; i < img.rows; i++ )
{
for(int j = 0; j < img.cols; j++ )
{
Vec3b v=SampleImg.at<Vec3b>(i,j);
sample.at<float>(0,0) = (float) v[0];
sample.at<float>(0,1) = (float) v[1];
sample.at<float>(0,2) = (float) v[2];
int response = cvRound(em_model.predict( sample ));
img.at<Vec3b>(i,j)[0]=means.at<double>(response,0);
img.at<Vec3b>(i,j)[1]=means.at<double>(response,1);
img.at<Vec3b>(i,j)[2]=means.at<double>(response,2);
}
}
img.convertTo(img,CV_8UC3);
imshow("result",img);
waitKey();
// Save the result
cv::imwrite("result.png", img);
}
PS: For perceptive color distance measurement it's better to use L*a*b color space. There is converter in opencv for this purpose. For clustering you can use k-means with defined cluster centers (your palette entries). After clustering you'll get points with indexes of palette intries.

Openni opencv kinect Bad Memory allocation

Basically I've got a loop which goes through all the kinects depth pixels. If they are greater than 3000mm it sets the pixel value to black.
For some reason this works only at a close range while pointed to a wall. If I pull the kinect back (giving it a larger area to scan) I get a Bad Memory allocation error. My code can be found below. I get the bad memory allocation error inside that try catch statement. Most of the code is from the opencv kinect sample here and here.
i figured out the problem, its because the depth values are stored in an array instead of matrix, i need a better way of finding out which location in the array, the x.y of the pixels which start from 1,1 point to instead of the (i = x+y*640)
#include <opencv.hpp>
#include <iostream>
#include <string>
#include <stdio.h>
#include <OpenNI.h>
using namespace std;
using namespace cv;
int main()
{
openni::Device device;
openni::VideoStream depth;
const char* device_uri = openni::ANY_DEVICE;
openni::Status ret = openni::OpenNI::initialize();
// Open
ret =device.open( device_uri );
ret = depth.create( device, openni::SENSOR_DEPTH );
if ( ret == openni::STATUS_OK )
{
// Start Depth
depth.start();
}
// Get Depth Stream Min-Max Value
int minDepthValue = depth.getMinPixelValue();
int maxDepthValue = depth.getMaxPixelValue();
//cout << "Depth min-Max Value : " << minDepthValue << "-" << maxDepthValue << endl;
// Frame Information Reference
openni::VideoFrameRef depthFrame;
// Get Sensor Resolution Information
int dImgWidth = depth.getVideoMode().getResolutionX();
int dImgHeight = depth.getVideoMode().getResolutionY();
// Depth Image Matrix
cv::Mat dImg = cv::Mat( dImgHeight, dImgWidth, CV_8UC3 );
Mat grey= cvCreateImage(cvSize(640, 480), 8, 1); ;
for(;;)
{
depth.readFrame( &depthFrame );
openni::DepthPixel* depthImgRaw = (openni::DepthPixel*)depthFrame.getData();
for ( int i = 0 ; i < ( depthFrame.getDataSize() / sizeof( openni::DepthPixel ) ) ; i++ )
{
int idx = i * 3; // Grayscale
unsigned char* data = &dImg.data[idx];
int gray_scale = ( ( depthImgRaw[i] * 255 ) / ( maxDepthValue - minDepthValue ) );
data[0] = (unsigned char)~gray_scale;
data[1] = (unsigned char)~gray_scale;
data[2] = (unsigned char)~gray_scale;
}
openni::DepthPixel* depthpixels = (openni::DepthPixel*)depthFrame.getData();
cvtColor(dImg, grey, CV_RGB2GRAY);
int i ;
try{
for( int y =0; y < 480 ; y++){
//getting in to each pixel in a row
for(int x = 0; x < 640; x++){
//getting out the corresponding pixel value from the array
i = x+y*640;
if (depthpixels[i] >3000)
{
grey.at<unsigned char>(x,y) = 0;
}
}
}
}catch(exception e)
{cout << e.what() <<endl ;
cout <<depthpixels[i] <<endl ;
cout << i <<endl ;
}
// cv:imshow( "depth", dImg );
imshow("dpeth2", grey);
int k = cvWaitKey( 30 ); // About 30fps
if ( k == 0x1b )
break;
}
// Destroy Streams
depth.destroy();
// Close Device
device.close();
// Shutdown OpenNI
openni::OpenNI::shutdown();
return 0;
}
solved the problem simply by swapping my x and y around
for( y =0; y < 480 ; y++)
{
//getting in to each pixel in a row
for( x = 0; x < 640; x++)
{
if (depthpixels[i]>1500)
{
grey.at<unsigned char >(y,x) = 0;
}
if (depthpixels[i] <500)
{
grey.at<unsigned char >(y,x) = 0;
}
i++;
}
}

Resources