I have a scanner which is big enough to scan multiple pictures at once.
Unfortunatelly, all the pictures are stored in one jpg file, separated only by
white borders. Is there any way to automatically find the sub images and store them
in separate files? I was thinking about using OpenCV to get the job done, but
I can't find the right functions. Does anybody know which OpenCV function would work, or if there is any other approach (using linux)?

My quick and dirty solution that worked with my images looks like this. I hope people with similar problem can use this as a starting point on how to use OpenCV.
// g++ `pkg-config --cflags --libs opencv` parse.cp
// include standard OpenCV headers, same as before
#include <cv.h>
#include <highgui.h>
#include <stdio.h>
// all the new API is put into "cv" namespace. Export its content
using namespace cv;
using namespace std;
int main( int argc, char** argv )
string imagename = argc > 1 ? argv[1] : "lena.jpg";
// the newer cvLoadImage alternative with MATLAB-style name
Mat imgf = imread("original/"+imagename);
if( ! ) // check if the image has been loaded properly
return -1;
int border = 1000;
Mat img(imgf.rows+2*border,imgf.cols+2*border,CV_8UC3,Scalar(255,255,255));
for (int i=0; i<imgf.cols; ++i) {
for (int j=0; j<imgf.rows; ++j) {<Vec3b>(j+border,i+border) =<Vec3b>(j,i);
cout << "created border\n";
Mat mask;
Scalar diff(2,2,2);
floodFill(mask, Point(0,0), Scalar(0,0,255), NULL, diff, diff);
cout << "flood filled\n";
for (int i=0; i<mask.cols; ++i) {
for (int j=0; j<mask.rows; ++j) {
if(<Vec3b>(j,i) != Vec3b(0,0,255)) {<Vec3b>(j,i) = Vec3b(0,0,0);
} else {<Vec3b>(j,i) = Vec3b(255,255,255);
cvtColor( mask, mask, CV_RGB2GRAY );
cout << "mask created\n";
Mat sobelX;
Mat sobelY;
Mat sobel;
sobel = abs(sobelX)+abs(sobelY);
for (int i=0; i<mask.cols; ++i) {
for (int j=0; j<mask.rows; ++j) {<char>(j,i) = abs(<short>(j,i))+abs(<short>(j,i));
threshold(mask, mask, 127, 255, THRESH_BINARY);
cout << "sobel done\n";
vector<vector<Point> > contours;
vector<Vec4i> hierarchy;
findContours(mask, contours, hierarchy,
cout << "contours done\n";
// iterate through all the top-level contours
int idx = 0;
for( ; idx >= 0; idx = hierarchy[idx][0] )
RotatedRect box = minAreaRect(contours[idx]);
if(box.size.width > 100 && box.size.height > 100) {
Mat rot = getRotationMatrix2D(,box.angle,1.0);
Mat rotimg;
Mat subimg(box.size.width,box.size.height,CV_8UC3);
stringstream name;
name << "subimg_"<< imagename << "_" << idx << ".png";
cout << name.str() << "\n";
cout << "Done\n";
return 0;


Warning: Invalid resolution 0 dpi. Using 70 instead

I know this theme already exists, but I didn't find any solution for this.
I am trying to detect characters from picture in this code below:
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>
#include <opencv2/opencv.hpp>
#include <sstream>
#include <memory>
#include <iostream>
#define path "/home/jovan/Pictures/"
void resize(cv::Mat &img);
PIX *mat8ToPix(const cv::Mat *mat8);
cv::Mat pix8ToMat(PIX *pix8);
int main(int argc, char **argv)
// Load image
std::stringstream ss;
ss << path;
ss << argv[1];
cv::Mat im = cv::imread(ss.str() );
if (im.empty())
std::cout<<"Cannot open source image!" << std::endl;
cv::Mat gray;
cv::cvtColor(im, gray, CV_BGR2GRAY);
// Pass it to Tesseract API
tesseract::TessBaseAPI tess;
tess.Init(NULL, "eng", tesseract::OEM_DEFAULT);
tess.SetVariable("tessedit_char_whitelist", "QWERTYUIOPASDFGHJKLZXCVBNM");
PIX *image = mat8ToPix(&im);
//tess.SetImage((uchar*), gray.cols, gray.rows, 1, gray.cols);
// Get the text
char* out = tess.GetUTF8Text();
if(out != nullptr)
std::cout << "here it is: "<< out << std::endl;
cv::imshow("image", im);
cv::imshow("gray", gray);
return 0;
void resize(cv::Mat &img)
while(img.size().width >= 500 && img.size().height >= 500 )
cv::resize(img, img, cv::Size(img.size().width/2, img.size().height/2) );
PIX *mat8ToPix(const cv::Mat *mat8)
PIX *pixd = pixCreate(mat8->size().width, mat8->size().height, 8);
for(int y=0; y<mat8->rows; y++)
for(int x=0; x<mat8->cols; x++)
pixSetPixel(pixd, x, y, (l_uint32) mat8->at<uchar>(y,x));
return pixd;
cv::Mat pix8ToMat(PIX *pix8)
cv::Mat mat(cv::Size(pix8->w, pix8->h), CV_8UC1);
uint32_t *line = pix8->data;
for (uint32_t y = 0; y < pix8->h; ++y)
for (uint32_t x = 0; x < pix8->w; ++x)<uchar>(y, x) = GET_DATA_BYTE(line, x);
line += pix8->wpl;
return mat;
whatever picture I put to process I get this on terminal:
$: Warning: Invalid resolution 0 dpi. Using 70 instead.
Does anyone have some solution?
Thanks in advance.
If you know the input image's resolution, you can call pixSetResolution on Leptonica Pix object.
Or use Tesseract API to pass in the value. See
Tess4j - Pdf to Tiff to tesseract - "Warning: Invalid resolution 0 dpi. Using 70 instead."
Maybe it helps: I used EMGU & C#, but I think it must be the same in C++:
ocr.SetVariable("user_defined_dpi", "70");
... and the message should disappear ;)
I had similar issue. Found out from here that dark background in the image is the problem. Inversion of the image colors worked.

OpenCV: How to use AffineTransformer

Hello and thanks for your help.
I would like to test the use of shapes for matching in OpenCV and managed to do the matching part.
To locate the rotated shape, i tought the AffineTransformer Class would be the right choice. As I don't know how the matching would work internally, it would be nice if someone has a link where the proceedings are described.
As shawshank mentioned my following code throw an Assertion failed-error because the variable matches is empty when passed to estimateTransformation function. Does anybody know how to use this function in the right way -respectively what it really does?
using namespace std;
using namespace cv;
bool rotateImage(Mat src, Mat &dst, double angle)
// get rotation matrix for rotating the image around its center
cv::Point2f center(src.cols/2.0, src.rows/2.0);
cv::Mat rot = cv::getRotationMatrix2D(center, angle, 1.0);
// determine bounding rectangle
cv::Rect bbox = cv::RotatedRect(center,src.size(), angle).boundingRect();
// adjust transformation matrix<double>(0,2) += bbox.width/2.0 - center.x;<double>(1,2) += bbox.height/2.0 - center.y;
cv::warpAffine(src, dst, rot, bbox.size());
return 1;
static vector<Point> sampleContour( const Mat& image, int n=300 )
vector<vector<Point>> contours;
vector<Point> all_points;
findContours(image, contours, cv::RETR_LIST, cv::CHAIN_APPROX_NONE);
for (size_t i=0; i <contours.size(); i++)
for (size_t j=0; j<contours[i].size(); j++)
int dummy=0;
for (int add=(int)all_points.size(); add<n; add++)
// shuffel
random_shuffle(all_points.begin(), all_points.end());
vector<Point> sampled;
for (int i=0; i<n; i++)
return sampled;
int main(void)
Mat img1, img2;
vector<Point> img1Points, img2Points;
float distSC, distHD;
// read images
string img1Path = "testimage.jpg";
img1 = imread(img1Path, IMREAD_GRAYSCALE);
rotateImage(img1, img2, 45);
imshow("original", img1);
imshow("transformed", img2);
// Contours
img1Points = sampleContour(img1);
img2Points = sampleContour(img2);
//Calculate Distances
Ptr<ShapeContextDistanceExtractor> mysc = createShapeContextDistanceExtractor();
Ptr<HausdorffDistanceExtractor> myhd = createHausdorffDistanceExtractor();
distSC = mysc->computeDistance( img1Points, img2Points );
distHD = myhd -> computeDistance( img1Points, img2Points );
cout << distSC << endl << distHD << endl;
vector<DMatch> matches;
Ptr<AffineTransformer> transformerHD = createAffineTransformer(0);
transformerHD -> estimateTransformation(img1Points, img2Points, matches);
return 0;
I have used AffineTransformer class on a 2D image. Below is the basic code which will give you an idea of what it does.
// My OpenCv AffineTransformer demo code
// I have tested this on a 500 x 500 resolution image
#include <iostream>
#include "opencv2/opencv.hpp"
#include <vector>
using namespace cv;
using namespace std;
int arrSize = 10;
int sourcePx[]={154,155,159,167,182,209,238,265,295,316};
int sourcePy[]={190,222,252,285,314,338,344,340,321,290};
int tgtPx[]={120,127,137,150,188,230,258,285,305,313};
int tgtPy[]={207,245,275,305,336,345,342,332,305,274};
int main()
// Prepare 'vector of points' from above hardcoded points
int sInd=0, eInd=arrSize;
vector<Point2f> sourceP; for(int i=sInd; i<eInd; i++) sourceP.push_back(Point2f(sourcePx[i], sourcePy[i]));
vector<Point2f> tgtP; for(int i=sInd; i<eInd; i++) tgtP.push_back(Point2f(tgtPx[i], tgtPy[i]));
// Create object of AffineTransformer
bool fullAffine = true; // change its value and see difference in result
auto aft = cv::createAffineTransformer(fullAffine);
// Prepare vector<cv::DMatch> - this is just mapping of corresponding points indices
std::vector<cv::DMatch> matches;
for(int i=0; i<sourceP.size(); ++i) matches.push_back(cv::DMatch(i, i, 0));
// Read image
Mat srcImg = imread("image1.jpg");
Mat tgtImg;
// estimate points transformation
aft->estimateTransformation(sourceP, tgtP, matches);
// apply transformation
aft->applyTransformation(sourceP, tgtP);
// warp image
aft->warpImage(srcImg, tgtImg);
// show generated output
imshow("warped output", tgtImg);
return 0;

Why is webcam image processing slow while using Xcode in an OpenCV project?

Why is webcam image processing is very slow while using Xcode for this OpenCV project, and only one out of three windows are working (similar spaces and HSV windows are not turning up) and are very slow? How to increase the speed of execution of the program?
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <iostream>
using namespace cv;
using namespace std;
Mat img, hsv, res;
char *win1 = "RGB";
char *win2 = "HSV";
char *win3 = "similar spaces";
uchar thresh = 5;
void setColor(uchar hval){
int i,j;
for (i = 0; i < res.rows; ++i){
for (j = 0; j < res.cols; ++j){
if(<Vec3b>(i,j)[0] <= hval+thresh
&&<Vec3b>(i,j)[0] >= hval-thresh)<uchar>(i,j) = 255;
else<uchar>(i,j) = 0;
imshow(win3, res);
void MouseCallBackFunc(int event, int x, int y, int flags, void* userdata){
cout<<"\t x,y : "<<x<<','<<y<<endl;
int main()
img = imread("/usr/share/opencv/samples/cpp/stuff.jpg", CV_LOAD_IMAGE_COLOR);
hsv = Mat::zeros(img.size(), CV_8UC3);
res = Mat::zeros(img.size(), CV_8UC1);
char c;
int i,j;
namedWindow(win2, CV_WINDOW_NORMAL);
namedWindow(win3, CV_WINDOW_NORMAL);
cvtColor(img, hsv, CV_RGB2HSV);
imshow(win1, img);
imshow(win2, hsv);
imshow(win3, res);
setMouseCallback(win1, MouseCallBackFunc, NULL);
// VideoCapture stream(0); //0 is the id of video device.0 if you have only one camera.
// if (!stream.isOpened()) { //check if video device has been initialised
// cout << "cannot open camera";
// }
// while (true) {
// Mat cameraFrame;
// imshow("test", cameraFrame);
// c = waitKey(30);
// if(c==27)
// break;
// }
return 0;

boundingrect not working for a contour in opencv c++

using namespace std;
using namespace cv;
int main()
int a = 0;
Mat frame, diffimage,back,frame_gray;
VideoCapture cap("D:\\elance\\check\\Sent3.avi");
vector<vector<Point>> contours;
BackgroundSubtractorMOG2 bg;
vector<int> params;
for (int i = 0; i < 200;i++)
cap >> frame;
if (frame.empty())
cvtColor(back, back, CV_BGR2GRAY);
//for (int f = 0; f < 20;f++)
while (1)
a = a + 1;
cout << "Frame no : " << a<<endl;
cap >> frame;
if (frame.empty())
cvtColor(frame, frame_gray, CV_BGR2GRAY);
absdiff(frame_gray, back, diffimage);
threshold(diffimage, diffimage, 10, 255, CV_THRESH_BINARY);
for (int i = 0; i < 2; i++)
cv::erode(diffimage, diffimage, cv::Mat());
cv::dilate(diffimage, diffimage, cv::Mat());
findContours(diffimage, contours, CV_RETR_LIST, CV_CHAIN_APPROX_NONE);
cout << "Contour Size : " << contours.size()<<endl;
vector<Rect> boundRect(contours.size());
for (int i = 0; i < contours.size(); i++)
drawContours(frame, contours, i, cv::Scalar(0, 255, 255), 1);
Mat smoothCont;
smoothCont = cv::Mat(contours[i]);
cout << smoothCont.rows << "\t" << smoothCont.cols <<"\t"<<smoothCont.depth()<< endl << endl;
if (smoothCont.rows > 0 && smoothCont.rows < 10000)
boundRect[i] = boundingRect(Mat(contours[i]));
for (int i = 0; i < contours.size(); i++)
rectangle(frame, boundRect[i], Scalar(0, 255, 255), 1, 8, 0);
imshow("Diff", diffimage);
imshow("frame", frame);
imwrite("D:\\test.jpg", frame, params);
This code basically takes the contours and results are the rectangles on the contours. But only one is bounded by the box and other contour is is still not bounded box.
Can anyone help in this matter ?
Maybe "if (smoothCont.rows > 0 && smoothCont.rows < 10000)" filtered them out?

Algorithm for shrinking/limiting palette of an image

as input data I have a 24 bit RGB image and a palette with 2..20 fixed colours. These colours are in no way spread regularly over the full colour range.
Now I have to modify the colours of input image so that only the colours of the given palette are used - using the colour out of the palette that is closest to the original colour (not closest mathematically but for human's visual impression). So what I need is an algorithm that uses an input colour and finds the colour in target palette that visually fits best to this colour. Please note: I'm not looking for a stupid comparison/difference algorithm but for something that really incorporates the impression a colour has on humans!
Since this is something that already should have been done and because I do not want to re-invent the wheel again: is there some example source code out there that does this job? In best case it is really a piece of code and not a link to a desastrous huge library ;-)
(I'd guess OpenCV does not provide such a function?)
You should look at the Lab color space. It was designed so that the distance in the colour space equals the perceptual distance. So once you have converted your image you can compute the distances as you would have done earlier, but should get a better result from a perceptual point of view. In OpenCV you can use the cvtColor(source, destination, CV_BGR2Lab) function.
Another Idea would be to use dithering. The idea is to mix missing colours using neighbouring pixels. A popular algorithm for this is Floyd-Steinberg dithering.
Here is an example of mine, where I combined a optimized palette using k-means with the Lab colourspace and floyd steinberg dithering:
#include <opencv2/opencv.hpp>
#include <iostream>
using namespace cv;
using namespace std;
cv::Mat floydSteinberg(cv::Mat img, cv::Mat palette);
cv::Vec3b findClosestPaletteColor(cv::Vec3b color, cv::Mat palette);
int main(int argc, char** argv)
// Number of clusters (colors on result image)
int nrColors = 18;
cv::Mat imgBGR = imread(argv[1],1);
cv::Mat img;
cvtColor(imgBGR, img, CV_BGR2Lab);
cv::Mat colVec = img.reshape(1, img.rows*img.cols); // change to a Nx3 column vector
cv::Mat colVecD;
colVec.convertTo(colVecD, CV_32FC3, 1.0); // convert to floating point
cv::Mat labels, centers;
cv::kmeans(colVecD, nrColors, labels,
cv::TermCriteria(CV_TERMCRIT_ITER, 100, 0.1),
3, cv::KMEANS_PP_CENTERS, centers); // compute k mean centers
// replace pixels by there corresponding image centers
cv::Mat imgPosterized = img.clone();
for(int i = 0; i < img.rows; i++ )
for(int j = 0; j < img.cols; j++ )
for(int k = 0; k < 3; k++)<Vec3b>(i,j)[k] =<float>(<int>(j+img.cols*i),k);
// convert palette back to uchar
cv::Mat palette;
// call floyd steinberg dithering algorithm
cv::Mat fs = floydSteinberg(img, palette);
cv::Mat imgPosterizedBGR, fsBGR;
cvtColor(imgPosterized, imgPosterizedBGR, CV_Lab2BGR);
cvtColor(fs, fsBGR, CV_Lab2BGR);
imshow("input",imgBGR); // original image
imshow("result",imgPosterizedBGR); // posterized image
imshow("fs",fsBGR); // floyd steinberg dithering
return 0;
cv::Mat floydSteinberg(cv::Mat imgOrig, cv::Mat palette)
cv::Mat img = imgOrig.clone();
cv::Mat resImg = img.clone();
for(int i = 0; i < img.rows; i++ )
for(int j = 0; j < img.cols; j++ )
cv::Vec3b newpixel = findClosestPaletteColor(<Vec3b>(i,j), palette);<Vec3b>(i,j) = newpixel;
for(int k=0;k<3;k++)
int quant_error = (int)<Vec3b>(i,j)[k] - newpixel[k];
if(i+1<img.rows)<Vec3b>(i+1,j)[k] = min(255,max(0,(int)<Vec3b>(i+1,j)[k] + (7 * quant_error) / 16));
if(i-1 > 0 && j+1 < img.cols)<Vec3b>(i-1,j+1)[k] = min(255,max(0,(int)<Vec3b>(i-1,j+1)[k] + (3 * quant_error) / 16));
if(j+1 < img.cols)<Vec3b>(i,j+1)[k] = min(255,max(0,(int)<Vec3b>(i,j+1)[k] + (5 * quant_error) / 16));
if(i+1 < img.rows && j+1 < img.cols)<Vec3b>(i+1,j+1)[k] = min(255,max(0,(int)<Vec3b>(i+1,j+1)[k] + (1 * quant_error) / 16));
return resImg;
float vec3bDist(cv::Vec3b a, cv::Vec3b b)
return sqrt( pow((float)a[0]-b[0],2) + pow((float)a[1]-b[1],2) + pow((float)a[2]-b[2],2) );
cv::Vec3b findClosestPaletteColor(cv::Vec3b color, cv::Mat palette)
int i=0;
int minI = 0;
cv::Vec3b diff = color -<Vec3b>(0);
float minDistance = vec3bDist(color,<Vec3b>(0));
for (int i=0;i<palette.rows;i++)
float distance = vec3bDist(color,<Vec3b>(i));
if (distance < minDistance)
minDistance = distance;
minI = i;
Try this algorithm (it will reduct color number, but it compute palette by itself):
#include <opencv2/opencv.hpp>
#include "opencv2/legacy/legacy.hpp"
#include <vector>
#include <list>
#include <iostream>
using namespace cv;
using namespace std;
void main(void)
// Number of clusters (colors on result image)
int NrGMMComponents = 32;
// Source file name
string fname="D:\\ImagesForTest\\tools.jpg";
cv::Mat SampleImg = imread(fname,1);
int SampleImgHeight = SampleImg.rows;
int SampleImgWidth = SampleImg.cols;
// Pick datapoints
vector<Vec3d> ListSamplePoints;
for (int y=0; y<SampleImgHeight; y++)
for (int x=0; x<SampleImgWidth; x++)
// Get pixel color at that position
Vec3b bgrPixel =<Vec3b>(y, x);
uchar b = bgrPixel.val[0];
uchar g = bgrPixel.val[1];
uchar r = bgrPixel.val[2];
if(rand()%25==0) // Pick not every, bu t every 25-th
} // for (x)
} // for (y)
// Form training matrix
Mat labels;
int NrSamples = ListSamplePoints.size();
Mat samples( NrSamples, 3, CV_32FC1 );
for (int s=0; s<NrSamples; s++)
Vec3d v =;<float>(s,0) = (float) v[0];<float>(s,1) = (float) v[1];<float>(s,2) = (float) v[2];
cout << "Learning to represent the sample distributions with" << NrGMMComponents << "gaussians." << endl;
// Algorithm parameters
CvEMParams params;
params.covs = NULL;
params.means = NULL;
params.weights = NULL;
params.probs = NULL;
params.nclusters = NrGMMComponents;
params.start_step = CvEM::START_AUTO_STEP;
params.term_crit.max_iter = 1500;
params.term_crit.epsilon = 0.001;
params.term_crit.type = CV_TERMCRIT_ITER|CV_TERMCRIT_EPS;
//params.term_crit.type = CV_TERMCRIT_ITER;
// Train
cout << "Started GMM training" << endl;
CvEM em_model;
em_model.train( samples, Mat(), params, &labels );
cout << "Finished GMM training" << endl;
// Result image
Mat img = Mat::zeros( Size( SampleImgWidth, SampleImgHeight ), CV_8UC3 );
// Ask classifier for each pixel
Mat sample( 1, 3, CV_32FC1 );
Mat means;
for(int i = 0; i < img.rows; i++ )
for(int j = 0; j < img.cols; j++ )
Vec3b<Vec3b>(i,j);<float>(0,0) = (float) v[0];<float>(0,1) = (float) v[1];<float>(0,2) = (float) v[2];
int response = cvRound(em_model.predict( sample ));<Vec3b>(i,j)[0]<double>(response,0);<Vec3b>(i,j)[1]<double>(response,1);<Vec3b>(i,j)[2]<double>(response,2);
// Save the result
cv::imwrite("result.png", img);
PS: For perceptive color distance measurement it's better to use L*a*b color space. There is converter in opencv for this purpose. For clustering you can use k-means with defined cluster centers (your palette entries). After clustering you'll get points with indexes of palette intries.
