Related
I took the code in https://gist.github.com/kyrs/9adf86366e9e4f04addb (which takes an opencv cv::Mat image as input and converts it to tensor) and I use it to label images with the model inception_v3_2016_08_28_frozen.pb stated in the Tensorflow tutorial (https://www.tensorflow.org/tutorials/image_recognition#usage_with_the_c_api). Everything worked fine when using a batchsize of 1. However, when I increase the batchsize to 2 (or greater), the size of
finalOutput (which is of type std::vector) is zero.
Here's the code to reproduce the error:
// Only for VisualStudio
#define COMPILER_MSVC
#define NOMINMAX
#include <string>
#include <iostream>
#include <fstream>
#include <opencv2/opencv.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include "tensorflow/core/public/session.h"
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/framework/tensor.h"
int batchSize = 2;
int height = 299;
int width = 299;
int depth = 3;
int mean = 0;
int stdev = 255;
// Set image paths
cv::String pathFilenameImg1 = "D:/IMGS/grace_hopper.jpg";
cv::String pathFilenameImg2 = "D:/IMGS/lenna.jpg";
// Set model paths
std::string graphFile = "D:/Tensorflow/models/inception_v3_2016_08_28_frozen.pb";
std::string labelfile = "D:/Tensorflow/models/imagenet_slim_labels.txt";
std::string InputName = "input";
std::string OutputName = "InceptionV3/Predictions/Reshape_1";
void read_prepare_image(cv::String pathImg, cv::Mat &imgPrepared) {
// Read Color image:
cv::Mat imgBGR = cv::imread(pathImg);
// Now we resize the image to fit Model's expected sizes:
cv::Size s(height, width);
cv::Mat imgResized;
cv::resize(imgBGR, imgResized, s, 0, 0, cv::INTER_CUBIC);
// Convert the image to float and normalize data:
imgResized.convertTo(imgPrepared, CV_32FC1);
imgPrepared = imgPrepared - mean;
imgPrepared = imgPrepared / stdev;
}
int main()
{
// Read and prepare images using OpenCV:
cv::Mat img1, img2;
read_prepare_image(pathFilenameImg1, img1);
read_prepare_image(pathFilenameImg2, img2);
// creating a Tensor for storing the data
tensorflow::Tensor input_tensor(tensorflow::DT_FLOAT, tensorflow::TensorShape({ batchSize, height, width, depth }));
auto input_tensor_mapped = input_tensor.tensor<float, 4>();
// Copy images data into the tensor:
for (int b = 0; b < batchSize; ++b) {
const float * source_data;
if (b == 0)
source_data = (float*)img1.data;
else
source_data = (float*)img2.data;
for (int y = 0; y < height; ++y) {
const float* source_row = source_data + (y * width * depth);
for (int x = 0; x < width; ++x) {
const float* source_pixel = source_row + (x * depth);
const float* source_B = source_pixel + 0;
const float* source_G = source_pixel + 1;
const float* source_R = source_pixel + 2;
input_tensor_mapped(b, y, x, 0) = *source_R;
input_tensor_mapped(b, y, x, 1) = *source_G;
input_tensor_mapped(b, y, x, 2) = *source_B;
}
}
}
// Load the graph:
tensorflow::GraphDef graph_def;
ReadBinaryProto(tensorflow::Env::Default(), graphFile, &graph_def);
// create a session with the graph
std::unique_ptr<tensorflow::Session> session_inception(tensorflow::NewSession(tensorflow::SessionOptions()));
session_inception->Create(graph_def);
// run the loaded graph
std::vector<tensorflow::Tensor> finalOutput;
session_inception->Run({ { InputName,input_tensor } }, { OutputName }, {}, &finalOutput);
// Get Top 5 classes:
std::cerr << "final output size = " << finalOutput.size() << std::endl;
tensorflow::Tensor output = std::move(finalOutput.at(0));
auto scores = output.flat<float>();
std::cerr << "scores size=" << scores.size() << std::endl;
std::ifstream label(labelfile);
std::string line;
std::vector<std::pair<float, std::string>> sorted;
for (unsigned int i = 0; i <= 1000; ++i) {
std::getline(label, line);
sorted.emplace_back(scores(i), line);
}
std::sort(sorted.begin(), sorted.end());
std::reverse(sorted.begin(), sorted.end());
std::cout << "size of the sorted file is " << sorted.size() << std::endl;
for (unsigned int i = 0; i< 5; ++i)
std::cout << "The output of the current graph has category " << sorted[i].second << " with probability " << sorted[i].first << std::endl;
}
Do I miss anything? Any ideas?
Thanks in advance!
I had the same problem. When I changed to the model used in https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/benchmark (differente version of inception) bigger batch sizes work correctly.
Notice you need to change the input size from 299,299,3 to 224,224,3 and the input and output layer names to: input:0 and output:0
Probably the graph in the protobuf file had a fixed batch size of 1 and I was only changing the shape of the input, not the graph. The graph has to accept a variable batch size by setting the shape to (None, width, heihgt, channels). This is done when you freeze the graph. Since the graph we have is already frozen, there is no way to change the batch size at this point.
I am trying to train my own detector based on HOG features and i trained a detector with CvSVM utility of opencv. Now to use this detector in HOGDescriptor.SetSVM(myDetector), i need to get trained detector in row-vector (primal) form to feed. For this i am using this code. my implementation is like given below:
vector<float>primal;
void LinearSVM::getSupportVector(std::vector<float>& support_vector) {
CvSVM svm;
svm.load("Classifier.xml");
cin.get();
int sv_count = svm.get_support_vector_count();
const CvSVMDecisionFunc* df = decision_func;
const double* alphas = df[0].alpha;
double rho = df[0].rho;
int var_count = svm.get_var_count();
support_vector.resize(var_count, 0);
for (unsigned int r = 0; r < (unsigned)sv_count; r++) {
float myalpha = alphas[r];
const float* v = svm.get_support_vector(r);
for (int j = 0; j < var_count; j++,v++) {
support_vector[j] += (-myalpha) * (*v);
}
}
support_vector.push_back(rho);
}
int main()
{
LinearSVM s;
s.getSupportVector(primal);
return 0;
}
When i use built-in CvSVM, it shows me SV as 3 bec i have only 3 SV in my saved file but since the decision_func is in protected mode, hence i can not access it. That's why i tried to use that wrapper but still of no use. Perhaps you guys can help me out here... Thanks alot!
Answer with a test harness. I put in new answer as it would add allot of clutter to the original answer, possibly making it a bit confusing.
//dummy features
std:: vector<float>
dummyDerReaderForOneDer(const vector<float> &pattern)
{
int i = std::rand() % pattern.size();
int j = std::rand() % pattern.size();
vector<float> patternPulNoise(pattern);
std::random_shuffle(patternPulNoise.begin()+std::min(i,j),patternPulNoise.begin()+std::max(i,j));
return patternPulNoise;
};
//extend CvSVM to get access to weights
class mySVM : public CvSVM
{
public:
vector<float>
getWeightVector(const int descriptorSize);
};
//get the weights
vector<float>
mySVM::getWeightVector(const int descriptorSize)
{
vector<float> svmWeightsVec(descriptorSize+1);
int numSupportVectors = get_support_vector_count();
//this is protected, but can access due to inheritance rules
const CvSVMDecisionFunc *dec = CvSVM::decision_func;
const float *supportVector;
float* svmWeight = &svmWeightsVec[0];
for (int i = 0; i < numSupportVectors; ++i)
{
float alpha = *(dec[0].alpha + i);
supportVector = get_support_vector(i);
for(int j=0;j<descriptorSize;j++)
{
*(svmWeight + j) += alpha * *(supportVector+j);
}
}
*(svmWeight + descriptorSize) = - dec[0].rho;
return svmWeightsVec;
}
// main harness entry point for detector test
int main (int argc, const char * argv[])
{
//dummy variables for example
int posFiles = 10;
int negFiles = 10;
int dims = 1000;
int randomFactor = 4;
//setup some dummy data
vector<float> dummyPosPattern;
dummyPosPattern.assign(int(dims/randomFactor),1.f);
dummyPosPattern.resize(dims );
random_shuffle(dummyPosPattern.begin(),dummyPosPattern.end());
vector<float> dummyNegPattern;
dummyNegPattern.assign(int(dims/randomFactor),1.f);
dummyNegPattern.resize(dims );
random_shuffle(dummyNegPattern.begin(),dummyNegPattern.end());
// the labels and lables mat
float posLabel = 1.f;
float negLabel = 2.f;
cv::Mat cSvmLabels;
//the data mat
cv::Mat cSvmTrainingData;
//dummy linear svm parmas
SVMParams cSvmParams;
cSvmParams.svm_type = cv::SVM::C_SVC;
cSvmParams.C = 0.0100;
cSvmParams.kernel_type = cv::SVM::LINEAR;
cSvmParams.term_crit = cv::TermCriteria(CV_TERMCRIT_ITER+CV_TERMCRIT_EPS, 1000000, FLT_EPSILON);
cout << "creating training data. please wait" << endl;
int i;
for(i=0;i<posFiles;i++)
{
//your feature for one box from file
vector<float> d = dummyDerReaderForOneDer(dummyPosPattern);
//push back a new mat made from the vectors data, with copy data flag on
//this shows the format of the mat for a single example, (1 (row) X dims(col) ), as training mat has each **row** as an example;
//the push_back works like vector add adds each example to the bottom of the matrix
cSvmTrainingData.push_back(cv::Mat(1,dims,CV_32FC1,d.data(),true));
//push back a pos label to the labels mat
cSvmLabels.push_back(posLabel);
}
//do same with neg files;
for(i=0;i<negFiles;i++)
{
float a = rand();
vector<float> d = dummyDerReaderForOneDer(dummyNegPattern);
cSvmTrainingData.push_back(cv::Mat(1,dims,CV_32FC1,d.data(),true));
cSvmLabels.push_back(negLabel);
}
//have a look
cv::Mat viz;
cSvmTrainingData.convertTo(viz,CV_8UC3);
viz = viz*255;
cv::imshow("svmData", viz);
cv::waitKey(10);
cout << "press any key to continue" << endl;
getchar();
viz.release();
//create the svm;
cout << "training, please wait" << endl;
mySVM svm;
svm.train(cSvmTrainingData,cSvmLabels,cv::Mat(),cv::Mat(),cSvmParams);
cout << "get weights" << endl;
vector<float> svmWeights = svm.getWeightVector(dims);
for(i=0; i<dims+1; i++)
{
cout << svmWeights[i] << ", ";
if(i==dims)
{
cout << endl << "bias: " << svmWeights[i] << endl;
}
}
cout << "press any key to continue" << endl;
getchar();
cout << "testing, please wait" << endl;
//test the svm with a large amount of new unseen fake one at a time
int totExamples = 10;
int k;
for(i=0;i<totExamples; i++)
{
cout << endl << endl;
vector<float> dPos = dummyDerReaderForOneDer(dummyPosPattern);
cv::Mat dMatPos(1,dims,CV_32FC1,dPos.data(),true);
float predScoreFromDual = svm.predict(dMatPos,true);
float predScoreBFromPrimal = svmWeights[dims];
for( k = 0; k <= dims - 4; k += 4 )
predScoreBFromPrimal += dPos[k]*svmWeights[k] + dPos[k+1]*svmWeights[k+1] +
dPos[k+2]*svmWeights[k+2] + dPos[k+3]*svmWeights[k+3];
for( ; k < dims; k++ )
predScoreBFromPrimal += dPos[k]*svmWeights[k];
cout << "Dual Score:\t" << predScoreFromDual << "\tPrimal Score:\t" << predScoreBFromPrimal << endl;
}
cout << "press any key to continue" << endl;
getchar();
return(0);
}
Hello again :) please extend the cvsm class rather than encapsulating it, as you need access to protected member.
//header
class mySVM : public CvSVM
{
public:
vector<float>
getWeightVector(const int descriptorSize);
};
//cpp
vector<float>
mySVM::getWeightVector(const int descriptorSize)
{
vector<float> svmWeightsVec(descriptorSize+1);
int numSupportVectors = get_support_vector_count();
//this is protected, but can access due to inheritance rules
const CvSVMDecisionFunc *dec = CvSVM::decision_func;
const float *supportVector;
float* svmWeight = &svmWeightsVec[0];
for (int i = 0; i < numSupportVectors; ++i)
{
float alpha = *(dec[0].alpha + i);
supportVector = get_support_vector(i);
for(int j=0;j<descriptorSize;j++)
{
*(svmWeight + j) += alpha * *(supportVector+j);
}
}
*(svmWeight + descriptorSize) = - dec[0].rho;
return svmWeightsVec;
}
something like that.
credits:
Obtaining weights in CvSVM, the SVM implementation of OpenCV
I have some code to draw a line between two points on an image which are selected by mouse, and then to display a histogram.
However, when I press q as required by code I get an error saying R6010 abort() has been called and saying VC++ run time error.
Please advise me how I can find this error.
#include <vector>
#include "opencv2/highgui/highgui.hpp"
#include <opencv\cv.h>
#include <iostream>
#include<conio.h>
using namespace cv;
using namespace std;
struct Data_point
{
int x;
unsigned short int y;
};
int PlotMeNow(unsigned short int *values, unsigned int nSamples)
{
std::vector<Data_point> graph(nSamples);
for (unsigned int i = 0; i < nSamples; i++)
{
graph[i].x = i;
graph[i].y = values[i];
}
cv::Size imageSize(5000, 500); // your window size
cv::Mat image(imageSize, CV_8UC1);
if (image.empty()) //check whether the image is valid or not
{
std::cout << "Error : Image cannot be created..!!" << std::endl;
system("pause"); //wait for a key press
return 0;
}
else
{
std::cout << "Good job : Image created successfully..!!" << std::endl;
}
// tru to do some ofesseting so the graph do not hide on x or y axis
Data_point dataOffset;
dataOffset.x = 20;
// we have to mirror the y axis!
dataOffset.y = 5000;
for (unsigned int i = 0; i<nSamples; ++i)
{
graph[i].x = (graph[i].x + dataOffset.x) * 3;
graph[i].y = (graph[i].y + dataOffset.y) / 200;
}
// draw the samples
for (unsigned int i = 0; i<nSamples - 1; ++i)
{
cv::Point2f p1;
p1.x = graph[i].x;
p1.y = graph[i].y;
cv::Point2f p2;
p2.x = graph[i + 1].x;
p2.y = graph[i + 1].y;
cv::line(image, p1, p2, 'r', 1, 4, 0);
}
cv::namedWindow("MyWindow1", CV_WINDOW_AUTOSIZE); //create a window with the name "MyWindow"
cv::imshow("MyWindow1", image); //display the image which is stored in the 'img' in the "MyWindow" window
while (true)
{
char c = cv::waitKey(10);
if (c == 'q')
break;
}
destroyWindow("MyWindow1");
destroyWindow("MyWindow"); //destroy the window with the name, "MyWindow"
return 0;
}
void IterateLine(const Mat& image, vector<ushort>& linePixels, Point p2, Point p1, int* count1)
{
LineIterator it(image, p2, p1, 8);
for (int i = 0; i < it.count; i++, it++)
{
linePixels.push_back(image.at<ushort>(it.pos())); //doubt
}
*count1 = it.count;
}
//working line with mouse
void onMouse(int evt, int x, int y, int flags, void* param)
{
if (evt == CV_EVENT_LBUTTONDOWN)
{
std::vector<cv::Point>* ptPtr = (std::vector<cv::Point>*)param;
ptPtr->push_back(cv::Point(x, y));
}
}
void drawline(Mat image, std::vector<Point>& points)
{
cv::namedWindow("Output Window");
cv::setMouseCallback("Output Window", onMouse, (void*)&points);
int X1 = 0, Y1 = 0, X2 = 0, Y2 = 0;
while (1)
{
cv::imshow("Output Window", image);
if (points.size() > 1) //we have 2 points
{
for (auto it = points.begin(); it != points.end(); ++it)
{
}
break;
}
waitKey(10);
}
//just for testing that we are getting pixel values
X1 = points[0].x;
X2 = points[1].x;
Y1 = points[0].y;
Y2 = points[1].y;
// Draw a line
line(image, Point(X1, Y1), Point(X2, Y2), 'r', 2, 8);
cv::imshow("Output Window", image);
//exit image window
while (true)
{
char c = cv::waitKey(10);
if (c == 'q')
break;
}
destroyWindow("Output Window");
}
void show_histogram_image(Mat img1)
{
int sbins = 65536;
int histSize[] = { sbins };
float sranges[] = { 0, 65536 };
const float* ranges[] = { sranges };
cv::MatND hist;
int channels[] = { 0 };
cv::calcHist(&img1, 1, channels, cv::Mat(), // do not use mask
hist, 1, histSize, ranges,
true, // the histogram is uniform
false);
double maxVal = 0;
minMaxLoc(hist, 0, &maxVal, 0, 0);
int xscale = 10;
int yscale = 10;
cv::Mat hist_image;
hist_image = cv::Mat::zeros(65536, sbins*xscale, CV_16UC1);
for int s = 0; s < sbins; s++)
{
float binVal = hist.at<float>(s, 0);
int intensity = cvRound(binVal * 65535 / maxVal);
rectangle(hist_image, cv::Point(s*xscale, hist_image.rows),
cv::Point((s + 1)*xscale - 1, hist_image.rows - intensity),
cv::Scalar::all(65535), 1);
}
imshow("Histogram", hist_image);
waitKey(0);
}
int main()
{
vector<Point> points1;
vector<ushort>linePixels;
Mat img = cvLoadImage("desert.jpg");
if (img.empty()) //check whether the image is valid or not
{
cout << "Error : Image cannot be read..!!" << endl;
system("pause"); //wait for a key press
return -1;
}
//Draw the line
drawline(img, points1);
//now check the collected points
Mat img1 = cvLoadImage("desert.jpg");
if (img1.empty()) //check whether the image is valid or not
{
cout << "Error : Image cannot be read..!!" << endl;
system("pause"); //wait for a key press
return -1;
}
int *t = new int;
IterateLine( img1, linePixels, points1[1], points1[0], t );
PlotMeNow(&linePixels[0], t[0]);
show_histogram_image(img);
delete t;
_getch();
return 0;
}
This is one of the bad smells in your code:
void IterateLine(const Mat& image, vector<ushort>& linePixels, Point p2, Point p1, int* count1)
{
...
linePixels.push_back(image.at<ushort>(it.pos())); //doubt
Now image is a CV_8UC3 image (from Mat img1 = cvLoadImage("desert.jpg");, but you are accessing here like it is CV_16UC1, so what gets put in linePixels is garbage. This will almost certainly cause PlotMeNow() to draw outside its image and corrupt something, which is probably why your code is crashing.
Sine it is very unclear what your code is trying to do, I can't suggest what you should have here instead.
I have just managed to do this, you only have to put "-1" to your loop limit:
for (unsigned int i = 0; i < nSamples-1; i++)
{
graph[i].x = i;
graph[i].y = values[i];
}
as input data I have a 24 bit RGB image and a palette with 2..20 fixed colours. These colours are in no way spread regularly over the full colour range.
Now I have to modify the colours of input image so that only the colours of the given palette are used - using the colour out of the palette that is closest to the original colour (not closest mathematically but for human's visual impression). So what I need is an algorithm that uses an input colour and finds the colour in target palette that visually fits best to this colour. Please note: I'm not looking for a stupid comparison/difference algorithm but for something that really incorporates the impression a colour has on humans!
Since this is something that already should have been done and because I do not want to re-invent the wheel again: is there some example source code out there that does this job? In best case it is really a piece of code and not a link to a desastrous huge library ;-)
(I'd guess OpenCV does not provide such a function?)
Thanks
You should look at the Lab color space. It was designed so that the distance in the colour space equals the perceptual distance. So once you have converted your image you can compute the distances as you would have done earlier, but should get a better result from a perceptual point of view. In OpenCV you can use the cvtColor(source, destination, CV_BGR2Lab) function.
Another Idea would be to use dithering. The idea is to mix missing colours using neighbouring pixels. A popular algorithm for this is Floyd-Steinberg dithering.
Here is an example of mine, where I combined a optimized palette using k-means with the Lab colourspace and floyd steinberg dithering:
#include <opencv2/opencv.hpp>
#include <iostream>
using namespace cv;
using namespace std;
cv::Mat floydSteinberg(cv::Mat img, cv::Mat palette);
cv::Vec3b findClosestPaletteColor(cv::Vec3b color, cv::Mat palette);
int main(int argc, char** argv)
{
// Number of clusters (colors on result image)
int nrColors = 18;
cv::Mat imgBGR = imread(argv[1],1);
cv::Mat img;
cvtColor(imgBGR, img, CV_BGR2Lab);
cv::Mat colVec = img.reshape(1, img.rows*img.cols); // change to a Nx3 column vector
cv::Mat colVecD;
colVec.convertTo(colVecD, CV_32FC3, 1.0); // convert to floating point
cv::Mat labels, centers;
cv::kmeans(colVecD, nrColors, labels,
cv::TermCriteria(CV_TERMCRIT_ITER, 100, 0.1),
3, cv::KMEANS_PP_CENTERS, centers); // compute k mean centers
// replace pixels by there corresponding image centers
cv::Mat imgPosterized = img.clone();
for(int i = 0; i < img.rows; i++ )
for(int j = 0; j < img.cols; j++ )
for(int k = 0; k < 3; k++)
imgPosterized.at<Vec3b>(i,j)[k] = centers.at<float>(labels.at<int>(j+img.cols*i),k);
// convert palette back to uchar
cv::Mat palette;
centers.convertTo(palette,CV_8UC3,1.0);
// call floyd steinberg dithering algorithm
cv::Mat fs = floydSteinberg(img, palette);
cv::Mat imgPosterizedBGR, fsBGR;
cvtColor(imgPosterized, imgPosterizedBGR, CV_Lab2BGR);
cvtColor(fs, fsBGR, CV_Lab2BGR);
imshow("input",imgBGR); // original image
imshow("result",imgPosterizedBGR); // posterized image
imshow("fs",fsBGR); // floyd steinberg dithering
waitKey();
return 0;
}
cv::Mat floydSteinberg(cv::Mat imgOrig, cv::Mat palette)
{
cv::Mat img = imgOrig.clone();
cv::Mat resImg = img.clone();
for(int i = 0; i < img.rows; i++ )
for(int j = 0; j < img.cols; j++ )
{
cv::Vec3b newpixel = findClosestPaletteColor(img.at<Vec3b>(i,j), palette);
resImg.at<Vec3b>(i,j) = newpixel;
for(int k=0;k<3;k++)
{
int quant_error = (int)img.at<Vec3b>(i,j)[k] - newpixel[k];
if(i+1<img.rows)
img.at<Vec3b>(i+1,j)[k] = min(255,max(0,(int)img.at<Vec3b>(i+1,j)[k] + (7 * quant_error) / 16));
if(i-1 > 0 && j+1 < img.cols)
img.at<Vec3b>(i-1,j+1)[k] = min(255,max(0,(int)img.at<Vec3b>(i-1,j+1)[k] + (3 * quant_error) / 16));
if(j+1 < img.cols)
img.at<Vec3b>(i,j+1)[k] = min(255,max(0,(int)img.at<Vec3b>(i,j+1)[k] + (5 * quant_error) / 16));
if(i+1 < img.rows && j+1 < img.cols)
img.at<Vec3b>(i+1,j+1)[k] = min(255,max(0,(int)img.at<Vec3b>(i+1,j+1)[k] + (1 * quant_error) / 16));
}
}
return resImg;
}
float vec3bDist(cv::Vec3b a, cv::Vec3b b)
{
return sqrt( pow((float)a[0]-b[0],2) + pow((float)a[1]-b[1],2) + pow((float)a[2]-b[2],2) );
}
cv::Vec3b findClosestPaletteColor(cv::Vec3b color, cv::Mat palette)
{
int i=0;
int minI = 0;
cv::Vec3b diff = color - palette.at<Vec3b>(0);
float minDistance = vec3bDist(color, palette.at<Vec3b>(0));
for (int i=0;i<palette.rows;i++)
{
float distance = vec3bDist(color, palette.at<Vec3b>(i));
if (distance < minDistance)
{
minDistance = distance;
minI = i;
}
}
return palette.at<Vec3b>(minI);
}
Try this algorithm (it will reduct color number, but it compute palette by itself):
#include <opencv2/opencv.hpp>
#include "opencv2/legacy/legacy.hpp"
#include <vector>
#include <list>
#include <iostream>
using namespace cv;
using namespace std;
void main(void)
{
// Number of clusters (colors on result image)
int NrGMMComponents = 32;
// Source file name
string fname="D:\\ImagesForTest\\tools.jpg";
cv::Mat SampleImg = imread(fname,1);
//cv::GaussianBlur(SampleImg,SampleImg,Size(5,5),3);
int SampleImgHeight = SampleImg.rows;
int SampleImgWidth = SampleImg.cols;
// Pick datapoints
vector<Vec3d> ListSamplePoints;
for (int y=0; y<SampleImgHeight; y++)
{
for (int x=0; x<SampleImgWidth; x++)
{
// Get pixel color at that position
Vec3b bgrPixel = SampleImg.at<Vec3b>(y, x);
uchar b = bgrPixel.val[0];
uchar g = bgrPixel.val[1];
uchar r = bgrPixel.val[2];
if(rand()%25==0) // Pick not every, bu t every 25-th
{
ListSamplePoints.push_back(Vec3d(b,g,r));
}
} // for (x)
} // for (y)
// Form training matrix
Mat labels;
int NrSamples = ListSamplePoints.size();
Mat samples( NrSamples, 3, CV_32FC1 );
for (int s=0; s<NrSamples; s++)
{
Vec3d v = ListSamplePoints.at(s);
samples.at<float>(s,0) = (float) v[0];
samples.at<float>(s,1) = (float) v[1];
samples.at<float>(s,2) = (float) v[2];
}
cout << "Learning to represent the sample distributions with" << NrGMMComponents << "gaussians." << endl;
// Algorithm parameters
CvEMParams params;
params.covs = NULL;
params.means = NULL;
params.weights = NULL;
params.probs = NULL;
params.nclusters = NrGMMComponents;
params.cov_mat_type = CvEM::COV_MAT_GENERIC; // DIAGONAL, GENERIC, SPHERICAL
params.start_step = CvEM::START_AUTO_STEP;
params.term_crit.max_iter = 1500;
params.term_crit.epsilon = 0.001;
params.term_crit.type = CV_TERMCRIT_ITER|CV_TERMCRIT_EPS;
//params.term_crit.type = CV_TERMCRIT_ITER;
// Train
cout << "Started GMM training" << endl;
CvEM em_model;
em_model.train( samples, Mat(), params, &labels );
cout << "Finished GMM training" << endl;
// Result image
Mat img = Mat::zeros( Size( SampleImgWidth, SampleImgHeight ), CV_8UC3 );
// Ask classifier for each pixel
Mat sample( 1, 3, CV_32FC1 );
Mat means;
means=em_model.getMeans();
for(int i = 0; i < img.rows; i++ )
{
for(int j = 0; j < img.cols; j++ )
{
Vec3b v=SampleImg.at<Vec3b>(i,j);
sample.at<float>(0,0) = (float) v[0];
sample.at<float>(0,1) = (float) v[1];
sample.at<float>(0,2) = (float) v[2];
int response = cvRound(em_model.predict( sample ));
img.at<Vec3b>(i,j)[0]=means.at<double>(response,0);
img.at<Vec3b>(i,j)[1]=means.at<double>(response,1);
img.at<Vec3b>(i,j)[2]=means.at<double>(response,2);
}
}
img.convertTo(img,CV_8UC3);
imshow("result",img);
waitKey();
// Save the result
cv::imwrite("result.png", img);
}
PS: For perceptive color distance measurement it's better to use L*a*b color space. There is converter in opencv for this purpose. For clustering you can use k-means with defined cluster centers (your palette entries). After clustering you'll get points with indexes of palette intries.
I wish to run another video in the window of the main video. Here is the attempted code for it :
#include <cv.h>
#include <highgui.h>
#include <iostream>
using namespace std;
void OverlayImage(IplImage* src, IplImage* overlay, CvScalar S, CvScalar D) {
CvPoint location;
//location.x = (0.5*(src->width))-50;
//location.y = src->height-110;
//cout << location.x << " " << location.y << endl;
location.x = 100;
location.y = 100;
for (int i = location.y; i < (location.y + overlay->height); i++) {
for (int j = location.x; j < (location.x + overlay->width); j++) {
CvScalar source = cvGet2D(src, i, j);
CvScalar over = cvGet2D(overlay, i-location.y, j-location.x);
CvScalar merged;
for(int i = 0; i < 4; i++)
merged.val[i] = (S.val[i] * source.val[i] + D.val[i] * over.val[i]);
cvSet2D(src, i + location.y, j + location.x, merged);
}
}
}
int main (int argc, char* argv[]) {
CvCapture* capture = NULL;
CvCapture* ad = NULL;
capture = cvCaptureFromAVI("Cricketc11.avi");
ad = cvCaptureFromAVI("Cricketc1.avi");
assert(ad);
assert(capture);
cvNamedWindow("Video", 0);
int fps = ( int )cvGetCaptureProperty( capture, CV_CAP_PROP_FPS );
int noOfFrames = ( int )cvGetCaptureProperty( capture, CV_CAP_PROP_FRAME_COUNT );
int height = ( int )cvGetCaptureProperty( capture, CV_CAP_PROP_FRAME_HEIGHT );
int width = ( int )cvGetCaptureProperty( capture, CV_CAP_PROP_FRAME_WIDTH );
cout << height << " " << width << endl;
int fpsad = ( int )cvGetCaptureProperty( ad, CV_CAP_PROP_FPS );
int noOfFramesad = ( int )cvGetCaptureProperty( ad, CV_CAP_PROP_FRAME_COUNT );
int heightad = ( int )cvGetCaptureProperty( ad, CV_CAP_PROP_FRAME_HEIGHT );
int widthad = ( int )cvGetCaptureProperty( ad, CV_CAP_PROP_FRAME_WIDTH );
IplImage* tempimg = NULL;
IplImage* tempad = NULL;
while(capture) {
tempimg = cvQueryFrame(capture);
assert(tempimg);
if (ad) {
tempad = cvQueryFrame(ad);
assert(tempad);
IplImage* newimg = cvCreateImage(cvSize(100,100), IPL_DEPTH_8U, tempad->nChannels);
cvResize(tempad, newimg, 1);
OverlayImage(tempimg, newimg, cvScalar(0,0,0,0), cvScalar(1,1,1,1));
}
else
cvReleaseCapture(&ad);
cvWaitKey(1000/fps);
cvShowImage("Video", tempimg);
}
cvReleaseCapture(&capture);
cvDestroyAllWindows();
return 0;
}
This code runs fine only when the input videos are the same. If the videos are of different lengths or fps, it gives an error after the embedded video finishes.
How to correct that ?
What happens
Each time you call cvQueryFrame(source) the inner frame counter of the source is incremented.
This is why your second movie should be as long (speaking in frames) as the main movie.
As a workaround, I would suggest you to use an ad movie that has a number of frames (length * fps) equal to an integer ratio of the master movie and use temporary image buffers to hold the data you need.
An ideal solution would be to first interpolate the shortest (in frames) movie to the size of the longest, then merge them as you do, but temporal upsampling can be challenging to implement if you're not willing to use nearest neighbour or linear interpolation.
If the ad vido is smaller
You can choose among several solutions:
detect that you have reached the end and stop sending an image
detect that you have reached the end and re-open the ad movie from the beginning
use a temporary image to always keep in memory the last valid frame from the ad movie and send this image if there is no new one
etc.