Related
I'm new to opencv and would like to use it to crop portions of an image and then use tesseract to read them. I'm not sure what's the best way to crop all the necessary boxes that i need.
Here is an easy example of the document i need to transform:
Any advice on what would be the best?
I tried with ORB and the following image as template:
But without success.
On the template, some lines are selected as keypoints but on the image i want to process it's mainly the text and not the lines. Is it a bad template? Do i need to process the image first?
and my code:
Feature2D f2d = ORB.create(5000); // SIFT.create(1000);
MatOfKeyPoint keypoints1 = new MatOfKeyPoint();
Mat descriptors1 = new Mat();
Mat mask1 = new Mat();
f2d.detectAndCompute(img1, mask1, keypoints1, descriptors1);
MatOfKeyPoint keypoints2 = new MatOfKeyPoint();
Mat descriptors2 = new Mat();
Mat mask2 = new Mat();
f2d.detectAndCompute(img2, mask2, keypoints2, descriptors2);
DescriptorMatcher matcher = DescriptorMatcher.create(DescriptorMatcher.BRUTEFORCE_HAMMING);
MatOfDMatch matches = new MatOfDMatch();
matcher.match(descriptors1, descriptors2, matches);
Mat outputImg = new Mat();
MatOfByte drawnMatches = new MatOfByte();
Features2d.drawMatches(img1, keypoints1, img2, keypoints2, matches, outputImg, new Scalar(0, 255, 0), new Scalar(255, 0, 0), drawnMatches, Features2d.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS);
I could get good results by using a template that contains all the text that never change in the form. Furthermore, creating 2 templates (1 per page) and using SIFT instead of ORB helped a lot too.
Here is my solution:
public static Mat matchTEmplateSIFT(Mat img1, Mat template, boolean showKeypoints, boolean drawMatchs) {
Feature2D f2d = SIFT.create(15000);
DescriptorMatcher matcher = DescriptorMatcher.create(DescriptorMatcher.BRUTEFORCE_SL2); // or FLANNBASED for better performance
return matchTEmplate(img1, template, f2d, matcher);
}
public static Mat matchTEmplate(Mat baseImage, Mat template, Feature2D f2d, DescriptorMatcher matcher) {
int dilateSize = 5;
Mat scene = dilateBitwise(dilateSize, baseImage.clone());
template = dilateBitwise(dilateSize, template.clone());
MatOfKeyPoint keypoints1 = new MatOfKeyPoint();
Mat descriptors1 = new Mat();
f2d.detectAndCompute(scene, new Mat(), keypoints1, descriptors1);
MatOfKeyPoint keypoints2 = new MatOfKeyPoint();
Mat descriptors2 = new Mat();
f2d.detectAndCompute(template, new Mat(), keypoints2, descriptors2);
List<MatOfDMatch> matches = new ArrayList<>();
matcher.knnMatch(descriptors1, descriptors2, matches, 2);
MatOfDMatch goodMatches = getBestMatches(matches);
Mat result = transformAndWarp(baseImage, template, keypoints1, keypoints2, goodMatches);
return result;
}
private static Mat transformAndWarp(Mat baseImage, Mat template, MatOfKeyPoint keypoints1, MatOfKeyPoint keypoints2, MatOfDMatch goodMatches) {
Mat H = findHomographyMatrix(keypoints1, keypoints2, goodMatches);
perspectiveTransform(template, H);
Mat result = new Mat();
Imgproc.warpPerspective(baseImage, result, H, new Size(template.cols(), template.rows()));
return result;
}
private static void perspectiveTransform(Mat template, Mat H) {
Mat obj_corners = new Mat(4, 1, CvType.CV_32FC2);
obj_corners.put(0, 0, new double[]{0, 0});
obj_corners.put(0, 0, new double[]{template.cols(), 0});
obj_corners.put(0, 0, new double[]{template.cols(), template.rows()});
obj_corners.put(0, 0, new double[]{0, template.rows()});
Mat scene_corners = new Mat(4, 1, CvType.CV_32FC2);
Core.perspectiveTransform(obj_corners, scene_corners, H);
}
private static Mat findHomographyMatrix(MatOfKeyPoint keypoints1, MatOfKeyPoint keypoints2, MatOfDMatch goodMatches) {
LinkedList<Point> templateList = new LinkedList<>();
LinkedList<Point> sceneList = new LinkedList<>();
List<KeyPoint> templateKeyPoints = keypoints1.toList();
List<KeyPoint> sceneKeypoints = keypoints2.toList();
for (int i = 0; i < goodMatches.toList().size(); i++) {
templateList.addLast(templateKeyPoints.get(goodMatches.toList().get(i).queryIdx).pt);
sceneList.addLast(sceneKeypoints.get(goodMatches.toList().get(i).trainIdx).pt);
}
MatOfPoint2f templateMat = new MatOfPoint2f();
templateMat.fromList(templateList);
MatOfPoint2f sceneMat = new MatOfPoint2f();
sceneMat.fromList(sceneList);
return Calib3d.findHomography(templateMat, sceneMat, Calib3d.RANSAC);
}
// https://docs.opencv.org/3.4/d5/d6f/tutorial_feature_flann_matcher.html
private static MatOfDMatch getBestMatches(List<MatOfDMatch> knnMatches) {
//-- Filter matches using the Lowe's ratio test
float ratioThresh = 0.5f;
List<DMatch> listOfGoodMatches = new ArrayList<>();
for (int i = 0; i < knnMatches.size(); i++) {
if (knnMatches.get(i).rows() > 1) {
DMatch[] matches = knnMatches.get(i).toArray();
if (matches[0].distance < ratioThresh * matches[1].distance) {
listOfGoodMatches.add(matches[0]);
}
}
}
MatOfDMatch matOfDMatch = new MatOfDMatch();
matOfDMatch.fromList(listOfGoodMatches);
return matOfDMatch;
}
All i can find on the internet is opencv 2.x java code examples. I am using opencv 3.2 and trying to load an image in and turn all black lines that are longer than x amout of pixels to white (remove them). Here is where i'm at from starting with a opencv 2.4 version of a hough transform example...
Mat img = Imgcodecs.imread("C:/Users/user1/Desktop/topdown-6.jpg");
// Mat img = Imgcodecs.imread(fileName)
// generate gray scale and blur
Mat gray = new Mat();
Imgproc.cvtColor(img, gray, Imgproc.COLOR_BGR2GRAY);
Imgproc.blur(gray, gray, new Size(3, 3));
// detect the edges
Mat edges = new Mat();
int lowThreshold = 50;
int ratio = 3;
Imgproc.Canny(gray, edges, lowThreshold, lowThreshold * ratio);
Mat lines = new Mat();
Imgproc.HoughLinesP(edges, lines, 1, Math.PI / 180, 50, 50, 10);
for(int i = 0; i < lines.cols(); i++) {
double[] val = lines.get(0, i);
Imgproc.line(img, new Point(val[0], val[1]), new Point(val[2], val[3]), new Scalar(0, 0, 255), 2);
}
Image edgesImg = toBufferedImage(edges);
Image linesImg = toBufferedImage(lines);
Image imgg = toBufferedImage(img);
And i'm getting the error
OpenCV Error: Assertion failed (scn == 3 || scn == 4) in cv::cvtColor, file
C:\build\master_winpack-bindings-win32-vc14-static\opencv\modules\imgproc\src\color.cpp, line 9748
Exception in thread "main" CvException [org.opencv.core.CvException: cv::Exception: C:\build\master_winpack-bindings-win32-vc14-static\opencv\modules\imgproc\src\color.cpp:9748: error: (-215) scn == 3 || scn == 4 in function cv::cvtColor
]
at org.opencv.imgproc.Imgproc.cvtColor_1(Native Method)
at org.opencv.imgproc.Imgproc.cvtColor(Imgproc.java:1778)
at Main.main(Main.java:174)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:144)
Any help towards my goal would be awesome. Should i just use opencv version 2.x?
Edit:
public Image toBufferedImage(Mat m){
int type = BufferedImage.TYPE_BYTE_GRAY;
if ( m.channels() > 1 ) {
type = BufferedImage.TYPE_3BYTE_BGR;
}
int bufferSize = m.channels()*m.cols()*m.rows();
byte [] b = new byte[bufferSize];
m.get(0,0,b); // ERROR HAPPENING HERE
BufferedImage image = new BufferedImage(m.cols(),m.rows(), type);
final byte[] targetPixels = ((DataBufferByte) image.getRaster().getDataBuffer()).getData();
System.arraycopy(b, 0, targetPixels, 0, b.length);
return image;
}
The problem is that you are loading the image as a grayscale, so you do not need to convert it later. cvtColor expects the input mat to be a color Mat in your case when doing BGR2GRAY
I'm trying to create and HSV Histogram using the following code
Mat image = new Mat(file, LoadImageType.Color);
int hBins = 16;
int sBins = 16;
int vBins = 16;
RangeF hRange = new RangeF(0F, 180F);
RangeF sRange = new RangeF(0f, 255f);
RangeF vRange = new RangeF(0f, 255f);
Image<Bgr, Byte> imageSource = new Image<Bgr, Byte>(image.Bitmap);
Image<Hsv, Byte> imageHsv = imageSource.Convert<Hsv, Byte>();
DenseHistogram hist = new DenseHistogram(new int[] { hBins, sBins, vBins }, new RangeF[] { hRange, sRange, vRange });
hist.Calculate<byte>(imageHsv.Split(), false, null);
Problem is though, that when calling hist.GetBinValues(), all the values of the bins are zero
Computing the histogram channel by channel seems to give the expected output:
Mat image = new Mat(file, LoadImageType.Color);
int hBins = 16;
int sBins = 16;
int vBins = 16;
RangeF hRange = new RangeF(0F, 180F);
RangeF sRange = new RangeF(0f, 256f);
RangeF vRange = new RangeF(0f, 256f);
var imageSource = image.ToImage<Bgr, Byte>();
Image<Hsv, Byte> imageHsv = imageSource.Convert<Hsv, Byte>();
var hsvChannels = imageHsv.Split();
DenseHistogram hHist = new DenseHistogram(hBins, hRange);
DenseHistogram sHist = new DenseHistogram(sBins, sRange);
DenseHistogram vHist = new DenseHistogram(vBins, vRange);
hHist.Calculate<byte>(new Image<Gray, Byte>[] { hsvChannels[0] }, false, null);
sHist.Calculate<byte>(new Image<Gray, Byte>[] { hsvChannels[1] }, false, null);
vHist.Calculate<byte>(new Image<Gray, Byte>[] { hsvChannels[2] }, false, null);
var hVals = hHist.GetBinValues();
var sVals = sHist.GetBinValues();
var vVals = vHist.GetBinValues();.
I can't answer for why your method does not work, though. I see GetBinValues() returns an array of 16 ^ 3 values where I would expect it to be 16 * 3 values.
After battling with this for a few days, I dumped EMGU and just used OpenCV from c++, and this is giving me the correct HSV bins.
I have develop a program to detect motions using JavaCV. up to now i have completed cvFindContours of the processed image. source code is given below,
public class MotionDetect {
public static void main(String args[]) throws Exception, InterruptedException {
//FFmpegFrameGrabber grabber = new FFmpegFrameGrabber(new File("D:/pool.avi"));
OpenCVFrameGrabber grabber = new OpenCVFrameGrabber("D:/2.avi");
final CanvasFrame canvas = new CanvasFrame("My Image");
final CanvasFrame canvas2 = new CanvasFrame("ROI");
canvas.setDefaultCloseOperation(javax.swing.JFrame.EXIT_ON_CLOSE);
grabber.start();
IplImage frame = grabber.grab();
CvSize imgsize = cvGetSize(frame);
IplImage grayImage = cvCreateImage(imgsize, IPL_DEPTH_8U, 1);
IplImage ROIFrame = cvCreateImage(cvSize((265 - 72), (214 - 148)), IPL_DEPTH_8U, 1);
IplImage colorImage;
IplImage movingAvg = cvCreateImage(imgsize, IPL_DEPTH_32F, 3);
IplImage difference = null;
IplImage temp = null;
IplImage motionHistory = cvCreateImage(imgsize, IPL_DEPTH_8U, 3);
CvRect bndRect = cvRect(0, 0, 0, 0);
CvPoint pt1 = new CvPoint(), pt2 = new CvPoint();
CvFont font = null;
//Capture the movie frame by frame.
int prevX = 0;
int numPeople = 0;
char[] wow = new char[65];
int avgX = 0;
//Indicates whether this is the first time in the loop of frames.
boolean first = true;
//Indicates the contour which was closest to the left boundary before the object
//entered the region between the buildings.
int closestToLeft = 0;
//Same as above, but for the right.
int closestToRight = 320;
while (true) {
colorImage = grabber.grab();
if (colorImage != null) {
if (first) {
difference = cvCloneImage(colorImage);
temp = cvCloneImage(colorImage);
cvConvertScale(colorImage, movingAvg, 1.0, 0.0);
first = false;
//cvShowImage("My Window1", difference);
} //else, make a running average of the motion.
else {
cvRunningAvg(colorImage, movingAvg, 0.020, null);
}
//Convert the scale of the moving average.
cvConvertScale(movingAvg, temp, 1.0, 0.0);
//Minus the current frame from the moving average.
cvAbsDiff(colorImage, temp, difference);
//Convert the image to grayscale.
cvCvtColor(difference, grayImage, CV_RGB2GRAY);
//canvas.showImage(grayImage);
//Convert the image to black and white.
cvThreshold(grayImage, grayImage, 70, 255, CV_THRESH_BINARY);
//Dilate and erode to get people blobs
cvDilate(grayImage, grayImage, null, 18);
cvErode(grayImage, grayImage, null, 10);
canvas.showImage(grayImage);
ROIFrame = cvCloneImage(grayImage);
cvSetImageROI(ROIFrame, cvRect(72, 148, (265 - 72), (214 - 148)));
//cvOr(outFrame, tempFrame, outFrame);
cvShowImage("ROI Frame", ROIFrame);
cvRectangle(colorImage, /* the dest image */
cvPoint(72, 148), /* top left point */
cvPoint(265, 214), /* bottom right point */
cvScalar(255, 0, 0, 0), /* the color; blue */
1, 8, 0);
CvMemStorage storage = cvCreateMemStorage(0);
CvSeq contour = new CvSeq(null);
cvFindContours(grayImage, storage, contour, Loader.sizeof(CvContour.class), CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE);
}
//Show the frame.
cvShowImage("My Window", colorImage);
//Wait for the user to see it.
cvWaitKey(10);
}
//If this is the first time, initialize the images.
//Thread.sleep(50);
}
}
}
In this code ROIFrame, i need to calculate white contours area or pixel numbers??.. is there any way that i can proceed with
Use the function cvContourArea() Documentation here.
In your code, after your cvFindContours, do a loop with all of your contours like as:
CvSeq* curr_contour = contour;
while (curr_contour != NULL) {
area = fabs(cvContourArea(curr_contour,CV_WHOLE_SEQ, 0));
current_contour = current_contour->h_next;
}
Don't forget to store the area somewhere.
The image which one can get from OpenNI Image Meta Data is arranged as an RGB image. I would like to convert it to OpenCV IplImage which by default assumes the data to be stored as BGR. I use the following code:
XnUInt8 * pImage = new XnUInt8 [640*480*3];
memcpy(pImage,imageMD.Data(),640*480*3*sizeof(XnUInt8));
XnUInt8 temp;
for(size_t row=0; row<480; row++){
for(size_t col=0;col<3*640; col+=3){
size_t index = row*3*640+col;
temp = pImage[index];
pImage[index] = pImage[index+2];
pImage[index+2] = temp;
}
}
img->imageData = (char*) pImage;
What is the best way (fastest) in C/C++ to perform this conversion such that RGB image becomes BGR (in IplImage format)?
Is it not easy to use the color conversion function of OpenCV?
imgColor->imageData = (char*) pImage;
cvCvtColor( imgColor, imgColor, CV_BGR2RGB);
There are some interesting references out there.
For instance, the QImage to IplImage convertion shown here, that also converts RGB to BGR:
static IplImage* qImage2IplImage(const QImage& qImage)
{
int width = qImage.width();
int height = qImage.height();
// Creates a iplImage with 3 channels
IplImage *img = cvCreateImage(cvSize(width, height), IPL_DEPTH_8U, 3);
char * imgBuffer = img->imageData;
//Remove alpha channel
int jump = (qImage.hasAlphaChannel()) ? 4 : 3;
for (int y=0;y<img->height;y++)
{
QByteArray a((const char*)qImage.scanLine(y), qImage.bytesPerLine());
for (int i=0; i<a.size(); i+=jump)
{
//Swap from RGB to BGR
imgBuffer[2] = a[i];
imgBuffer[1] = a[i+1];
imgBuffer[0] = a[i+2];
imgBuffer+=3;
}
}
return img;
}
There are several posts here besides this one that show how to iterate on IplImage data.
There might be more than that (if the encoding is not openni_wrapper::Image::RGB). A good example can be found in the openni_image.cpp file where they use in line 170 the function fillRGB.