python segment an image of text line by line [duplicate] - opencv

I am trying to find a way to break the split the lines of text in a scanned document that has been adaptive thresholded. Right now, I am storing the pixel values of the document as unsigned ints from 0 to 255, and I am taking the average of the pixels in each line, and I split the lines into ranges based on whether the average of the pixels values is larger than 250, and then I take the median of each range of lines for which this holds. However, this methods sometimes fails, as there can be black splotches on the image.
Is there a more noise-resistant way to do this task?
EDIT: Here is some code. "warped" is the name of the original image, "cuts" is where I want to split the image.
warped = threshold_adaptive(warped, 250, offset = 10)
warped = warped.astype("uint8") * 255
# get areas where we can split image on whitespace to make OCR more accurate
color_level = np.array([np.sum(line) / len(line) for line in warped])
cuts = []
i = 0
while(i < len(color_level)):
if color_level[i] > 250:
begin = i
while(color_level[i] > 250):
i += 1
cuts.append((i + begin)/2) # middle of the whitespace region
else:
i += 1
EDIT 2: Sample image added

From your input image, you need to make text as white, and background as black
You need then to compute the rotation angle of your bill. A simple approach is to find the minAreaRect of all white points (findNonZero), and you get:
Then you can rotate your bill, so that text is horizontal:
Now you can compute horizontal projection (reduce). You can take the average value in each line. Apply a threshold th on the histogram to account for some noise in the image (here I used 0, i.e. no noise). Lines with only background will have a value >0, text lines will have value 0 in the histogram. Then take the average bin coordinate of each continuous sequence of white bins in the histogram. That will be the y coordinate of your lines:
Here the code. It's in C++, but since most of the work is with OpenCV functions, it should be easy convertible to Python. At least, you can use this as a reference:
#include <opencv2/opencv.hpp>
using namespace cv;
using namespace std;
int main()
{
// Read image
Mat3b img = imread("path_to_image");
// Binarize image. Text is white, background is black
Mat1b bin;
cvtColor(img, bin, COLOR_BGR2GRAY);
bin = bin < 200;
// Find all white pixels
vector<Point> pts;
findNonZero(bin, pts);
// Get rotated rect of white pixels
RotatedRect box = minAreaRect(pts);
if (box.size.width > box.size.height)
{
swap(box.size.width, box.size.height);
box.angle += 90.f;
}
Point2f vertices[4];
box.points(vertices);
for (int i = 0; i < 4; ++i)
{
line(img, vertices[i], vertices[(i + 1) % 4], Scalar(0, 255, 0));
}
// Rotate the image according to the found angle
Mat1b rotated;
Mat M = getRotationMatrix2D(box.center, box.angle, 1.0);
warpAffine(bin, rotated, M, bin.size());
// Compute horizontal projections
Mat1f horProj;
reduce(rotated, horProj, 1, CV_REDUCE_AVG);
// Remove noise in histogram. White bins identify space lines, black bins identify text lines
float th = 0;
Mat1b hist = horProj <= th;
// Get mean coordinate of white white pixels groups
vector<int> ycoords;
int y = 0;
int count = 0;
bool isSpace = false;
for (int i = 0; i < rotated.rows; ++i)
{
if (!isSpace)
{
if (hist(i))
{
isSpace = true;
count = 1;
y = i;
}
}
else
{
if (!hist(i))
{
isSpace = false;
ycoords.push_back(y / count);
}
else
{
y += i;
count++;
}
}
}
// Draw line as final result
Mat3b result;
cvtColor(rotated, result, COLOR_GRAY2BGR);
for (int i = 0; i < ycoords.size(); ++i)
{
line(result, Point(0, ycoords[i]), Point(result.cols, ycoords[i]), Scalar(0, 255, 0));
}
return 0;
}

Basic steps as #Miki,
read the source
threshed
find minAreaRect
warp by the rotated matrix
find and draw upper and lower bounds
While code in Python:
#!/usr/bin/python3
# 2018.01.16 01:11:49 CST
# 2018.01.16 01:55:01 CST
import cv2
import numpy as np
## (1) read
img = cv2.imread("img02.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
## (2) threshold
th, threshed = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV|cv2.THRESH_OTSU)
## (3) minAreaRect on the nozeros
pts = cv2.findNonZero(threshed)
ret = cv2.minAreaRect(pts)
(cx,cy), (w,h), ang = ret
if w>h:
w,h = h,w
ang += 90
## (4) Find rotated matrix, do rotation
M = cv2.getRotationMatrix2D((cx,cy), ang, 1.0)
rotated = cv2.warpAffine(threshed, M, (img.shape[1], img.shape[0]))
## (5) find and draw the upper and lower boundary of each lines
hist = cv2.reduce(rotated,1, cv2.REDUCE_AVG).reshape(-1)
th = 2
H,W = img.shape[:2]
uppers = [y for y in range(H-1) if hist[y]<=th and hist[y+1]>th]
lowers = [y for y in range(H-1) if hist[y]>th and hist[y+1]<=th]
rotated = cv2.cvtColor(rotated, cv2.COLOR_GRAY2BGR)
for y in uppers:
cv2.line(rotated, (0,y), (W, y), (255,0,0), 1)
for y in lowers:
cv2.line(rotated, (0,y), (W, y), (0,255,0), 1)
cv2.imwrite("result.png", rotated)
Finally result:

Related

Radius of a disk in a binary image

I have binarized images like this one:
I need to determine the center and radius of the inner solid disk. As you can see, it is surrounded by a textured area which touches it, so that simple connected component detection doesn't work. Anyway, there is a void margin on a large part of the perimeter.
A possible cure could be by eroding until all the texture disappears or disconnects from the disk, but this can be time consuming and the number of iterations is unsure. (In addition, in some unlucky cases there are tiny holes in the disk, which will grow with erosion.)
Any better suggestion to address this problem in a robust and fast way ? (I tagged OpenCV, but this is not mandated, what matters is the approach.)
You can:
Invert the image
Find the largest axis-aligned rectangle containing only zeros, (I used my C++ code from this answer). The algorithm is pretty fast.
Get the center and radius of the circle from the rectangle
Code:
#include <opencv2\opencv.hpp>
using namespace std;
using namespace cv;
// https://stackoverflow.com/a/30418912/5008845
cv::Rect findMaxRect(const cv::Mat1b& src)
{
cv::Mat1f W(src.rows, src.cols, float(0));
cv::Mat1f H(src.rows, src.cols, float(0));
cv::Rect maxRect(0,0,0,0);
float maxArea = 0.f;
for (int r = 0; r < src.rows; ++r)
{
for (int c = 0; c < src.cols; ++c)
{
if (src(r, c) == 0)
{
H(r, c) = 1.f + ((r>0) ? H(r-1, c) : 0);
W(r, c) = 1.f + ((c>0) ? W(r, c-1) : 0);
}
float minw = W(r,c);
for (int h = 0; h < H(r, c); ++h)
{
minw = std::min(minw, W(r-h, c));
float area = (h+1) * minw;
if (area > maxArea)
{
maxArea = area;
maxRect = cv::Rect(cv::Point(c - minw + 1, r - h), cv::Point(c+1, r+1));
}
}
}
}
return maxRect;
}
int main()
{
cv::Mat1b img = cv::imread("path/to/img", cv::IMREAD_GRAYSCALE);
// Correct image
img = img > 127;
cv::Rect r = findMaxRect(~img);
cv::Point center ( std::round(r.x + r.width / 2.f), std::round(r.y + r.height / 2.f));
int radius = std::sqrt(r.width*r.width + r.height*r.height) / 2;
cv::Mat3b out;
cv::cvtColor(img, out, cv::COLOR_GRAY2BGR);
cv::rectangle(out, r, cv::Scalar(0, 255, 0));
cv::circle(out, center, radius, cv::Scalar(0, 0, 255));
return 0;
}
My method is to use morph-open, findcontours, and minEnclosingCircle as follow:
#!/usr/bin/python3
# 2018/11/29 20:03
import cv2
fname = "test.png"
img = cv2.imread(fname)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
th, threshed = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
morphed = cv2.morphologyEx(threshed, cv2.MORPH_OPEN, kernel, iterations = 3)
cnts = cv2.findContours(morphed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2]
cnt = max(cnts, key=cv2.contourArea)
pt, r = cv2.minEnclosingCircle(cnt)
pt = (int(pt[0]), int(pt[1]))
r = int(r)
print("center: {}\nradius: {}".format(pt, r))
The final result:
center: (184, 170)
radius: 103
My second attempt on this case. This time I am using morphological closing operation to weaken the noise and maintain the signal. This is followed by a simple threshold and a connectedcomponent analysis. I hope this code can run faster.
Using this method, i can find the centroid with subpixel accuracy
('center : ', (184.12244328746746, 170.59771290442544))
Radius is derived from the area of the circle.
('radius : ', 101.34704439389715)
Here is the full code
import cv2
import numpy as np
# load image in grayscale
image = cv2.imread('radius.png',0)
r,c = image.shape
# remove noise
blured = cv2.blur(image,(5,5))
# Morphological closing
morph = cv2.erode(blured,None,iterations = 3)
morph = cv2.dilate(morph,None,iterations = 3)
cv2.imshow("morph",morph)
cv2.waitKey(0)
# Get the strong signal
th, th_img = cv2.threshold(morph,200,255,cv2.THRESH_BINARY)
cv2.imshow("th_img",th_img)
cv2.waitKey(0)
# Get connected components
num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(th_img)
print(num_labels)
print(stats)
# displat labels
labels_disp = np.uint8(255*labels/np.max(labels))
cv2.imshow("labels",labels_disp)
cv2.waitKey(0)
# Find center label
cnt_label = labels[r/2,c/2]
# Find circle center and radius
# Radius calculated by averaging the height and width of bounding box
area = stats[cnt_label][4]
radius = np.sqrt(area / np.pi)#stats[cnt_label][2]/2 + stats[cnt_label][3]/2)/2
cnt_pt = ((centroids[cnt_label][0]),(centroids[cnt_label][1]))
print('center : ',cnt_pt)
print('radius : ',radius)
# Display final result
edges_color = cv2.cvtColor(image,cv2.COLOR_GRAY2BGR)
cv2.circle(edges_color,(int(cnt_pt[0]),int(cnt_pt[1])),int(radius),(0,0,255),1)
cv2.circle(edges_color,(int(cnt_pt[0]),int(cnt_pt[1])),5,(0,0,255),-1)
x1 = stats[cnt_label][0]
y1 = stats[cnt_label][1]
w1 = stats[cnt_label][2]
h1 = stats[cnt_label][3]
cv2.rectangle(edges_color,(x1,y1),(x1+w1,y1+h1),(0,255,0))
cv2.imshow("edges_color",edges_color)
cv2.waitKey(0)
Here is an example of using hough circle. It can work if you set the min and max radius to a proper range.
import cv2
import numpy as np
# load image in grayscale
image = cv2.imread('radius.png',0)
r , c = image.shape
# remove noise
dst = cv2.blur(image,(5,5))
# Morphological closing
dst = cv2.erode(dst,None,iterations = 3)
dst = cv2.dilate(dst,None,iterations = 3)
# Find Hough Circle
circles = cv2.HoughCircles(dst
,cv2.HOUGH_GRADIENT
,2
,minDist = 0.5* r
,param2 = 150
,minRadius = int(0.5 * r / 2.0)
,maxRadius = int(0.75 * r / 2.0)
)
# Display
edges_color = cv2.cvtColor(image,cv2.COLOR_GRAY2BGR)
for i in circles[0]:
print(i)
cv2.circle(edges_color,(i[0],i[1]),i[2],(0,0,255),1)
cv2.imshow("edges_color",edges_color)
cv2.waitKey(0)
Here is the result
[185. 167. 103.6]
Have you tried something along the lines of the Circle Hough Transform?
I see that OpenCv has its own implementation. Some preprocessing (median filtering?) might be necessary here, though.
Here is a simple approach:
Erode the image (using a large, circular SE), then find the centroid of the result. This should be really close to the centroid of the central disk.
Compute the mean as a function of the radius of the original image, using the computed centroid as the center.
The output looks like this:
From here, determining the radius is quite simple.
Here is the code, I'm using PyDIP (we don't yet have a binary distribution, you'll need to download and build form sources):
import matplotlib.pyplot as pp
import PyDIP as dip
import numpy as np
img = dip.Image(pp.imread('/home/cris/tmp/FDvQm.png')[:,:,0])
b = dip.Erosion(img, 30)
c = dip.CenterOfMass(b)
rmean = dip.RadialMean(img, center=c)
pp.plot(rmean)
r = np.argmax(rmean < 0.5)
Here, r is 102, as the radius in integer number of pixels, I'm sure it's possible to interpolate to improve precision. c is [184.02, 170.45].

OpenCV detecting TV screen using camera

I am using an iPhone camera to detect a TV screen. My current approach is to compare subsequent frames pixel by pixel and keep track of cumulative differences. The result is binary a image as shown in image.
For me this looks like a rectangle but OpenCV does not think so. It's sides are not perfectly straight and sometimes there is even more color bleed to make detection difficult. Here is my OpenCV code trying to detect rectangle, since I am not very familiar with OpenCV it is copied from some example I found.
uint32_t *ptr = (uint32_t*)CVPixelBufferGetBaseAddress(buffer);
cv::Mat image((int)width, (int)height, CV_8UC4, ptr); // unsigned 8-bit values for 4 channels (ARGB)
cv::Mat image2 = [self matFromPixelBuffer:buffer];
std::vector<std::vector<cv::Point>>squares;
// blur will enhance edge detection
cv::Mat blurred(image2);
GaussianBlur(image2, blurred, cvSize(3,3), 0);//change from median blur to gaussian for more accuracy of square detection
cv::Mat gray0(blurred.size(), CV_8U), gray;
std::vector<std::vector<cv::Point> > contours;
// find squares in every color plane of the image
for (int c = 0; c < 3; c++) {
int ch[] = {c, 0};
mixChannels(&blurred, 1, &gray0, 1, ch, 1);
// try several threshold levels
const int threshold_level = 2;
for (int l = 0; l < threshold_level; l++) {
// Use Canny instead of zero threshold level!
// Canny helps to catch squares with gradient shading
if (l == 0) {
Canny(gray0, gray, 10, 20, 3); //
// Dilate helps to remove potential holes between edge segments
dilate(gray, gray, cv::Mat(), cv::Point(-1,-1));
} else {
gray = gray0 >= (l+1) * 255 / threshold_level;
}
// Find contours and store them in a list
findContours(gray, contours, CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE);
// Test contours
std::vector<cv::Point> approx;
int biggestSize = 0;
for (size_t i = 0; i < contours.size(); i++) {
// approximate contour with accuracy proportional
// to the contour perimeter
approxPolyDP(cv::Mat(contours[i]), approx, arcLength(cv::Mat(contours[i]), true)*0.02, true);
if (approx.size() != 4)
continue;
// Note: absolute value of an area is used because
// area may be positive or negative - in accordance with the
// contour orientation
int areaSize = fabs(contourArea(cv::Mat(approx)));
if (approx.size() == 4 && areaSize > biggestSize)
biggestSize = areaSize;
cv::RotatedRect boundingRect = cv::minAreaRect(approx);
float aspectRatio = boundingRect.size.width / boundingRect.size.height;
cv::Rect boundingRect2 = cv::boundingRect(approx);
float aspectRatio2 = (float)boundingRect2.width / (float)boundingRect2.height;
bool convex = isContourConvex(cv::Mat(approx));
if (approx.size() == 4 &&
fabs(contourArea(cv::Mat(approx))) > minArea &&
(aspectRatio >= minAspectRatio && aspectRatio <= maxAspectRatio) &&
isContourConvex(cv::Mat(approx))) {
double maxCosine = 0;
for (int j = 2; j < 5; j++) {
double cosine = fabs(angle(approx[j%4], approx[j-2], approx[j-1]));
maxCosine = MAXIMUM(maxCosine, cosine);
}
double area = fabs(contourArea(cv::Mat(approx)));
if (maxCosine < 0.3) {
squares.push_back(approx);
}
}
}
}
After Canny-step the image looks like this:
It seems fine to me but for some reason rectangle is not detected. Can anyone explain if there is something wrong with my parameters?
My second approach was to use OpenCV Hough line detection, basically using the same code as above, for Canny image I then call HoughLines function. It gives me quite a few lines as I had to lower threshold to detect vertical lines. The result looks like this:
The problem is that there are some many lines. How can I find out the lines that are touching the sides of blue rectangle as shown in first image?
Or is there a better approach to detect a screen?
First of all, find maximal area contour reference, then compure min area rectangle reference, divide contour area by rectangle area, if it close enough to 1 then your contour similar to rectangle. This will be your required contour and rectangle.

Distinguish rock scences using opencv

I am struggling with finding the appropriate contour algorithm for a low quality image. The example image shows a rock scene:
What I am trying to achieve is to find contours arround features such as:
light areas
dark areas
grey1 areas
grey2 areas
etc. until grey-n areas
(The number of areas shall be a parameter of choice)
I do not want to take a simple binary-threshold but rather use some sort of contour-finding (for example watershed or other). The major feature-lines shall be kept, noise within a feature-are can be flattened.
The result of my code can be seen on the images to the right.
Unfortunately, as you can easily tell, the colors do not really represent the original large-scale image features! For example: check out the two areas that I circled with red - these features are almost completely flooded with another color. What I imagine is that at least the very light and the very dark areas are covered by its own color.
cv::Mat cv_src = cv::imread(argv[1]);
cv::Mat output;
cv::Mat cv_src_gray;
cv::cvtColor(cv_src, cv_src_gray, cv::COLOR_RGB2GRAY);
double clipLimit = 0.1;
cv::Size titleGridSize = cv::Size(8,8);
cv::Ptr<cv::CLAHE> clahe = cv::createCLAHE(clipLimit, titleGridSize);
clahe->apply(cv_src_gray, output);
cv::equalizeHist(output, output);
cv::cvtColor(output, cv_src, cv::COLOR_GRAY2RGB);
// Create binary image from source image
cv::Mat bw;
cv::cvtColor(cv_src, bw, cv::COLOR_BGR2GRAY);
cv::threshold(bw, bw, 180, 255, cv::THRESH_BINARY);
// Perform the distance transform algorithm
cv::Mat dist;
cv::distanceTransform(bw, dist, cv::DIST_L2, CV_32F);
// Normalize the distance image for range = {0.0, 1.0}
cv::normalize(dist, dist, 0, 1., cv::NORM_MINMAX);
// Threshold to obtain the peaks
cv::threshold(dist, dist, .2, 1., cv::THRESH_BINARY);
// Create the CV_8U version of the distance image
cv::Mat dist_8u;
dist.convertTo(dist_8u, CV_8U);
// Find total markers
std::vector<std::vector<cv::Point> > contours;
cv::findContours(dist_8u, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE);
int ncomp = contours.size();
// Create the marker image for the watershed algorithm
cv::Mat markers = cv::Mat::zeros(dist.size(), CV_32S);
// Draw the foreground markers
for (int i = 0; i < ncomp; i++)
cv::drawContours(markers, contours, i, cv::Scalar::all(i+1), -1);
// Draw the background marker
cv::circle(markers, cv::Point(5,5), 3, CV_RGB(255,255,255), -1);
// Perform the watershed algorithm
cv::watershed(cv_src, markers);
// Generate random colors
std::vector<cv::Vec3b> colors;
for (int i = 0; i < ncomp; i++)
{
int b = cv::theRNG().uniform(0, 255);
int g = cv::theRNG().uniform(0, 255);
int r = cv::theRNG().uniform(0, 255);
colors.push_back(cv::Vec3b((uchar)b, (uchar)g, (uchar)r));
}
// Create the result image
cv::Mat dst = cv::Mat::zeros(markers.size(), CV_8UC3);
// Fill labeled objects with random colors
for (int i = 0; i < markers.rows; i++)
{
for (int j = 0; j < markers.cols; j++)
{
int index = markers.at<int>(i,j);
if (index > 0 && index <= ncomp)
dst.at<cv::Vec3b>(i,j) = colors[index-1];
else
dst.at<cv::Vec3b>(i,j) = cv::Vec3b(0,0,0);
}
}
// Show me what you got
imshow("final_result", dst);
I think you can use a simple clustering such as k-means for this, then examine the cluster centers (or the mean and standard deviations of each cluster). I quickly tried it in matlab.
im = imread('tvBqt.jpg');
gr = rgb2gray(im);
x = double(gr(:));
idx = kmeans(x, 4);
cl = reshape(idx, 600, 472);
figure,
subplot(1, 2, 1), imshow(gr, []), title('original')
subplot(1, 2, 2), imshow(label2rgb(cl), []), title('clustered')
The result:
You could try using SLIC Superpixels. I tried it and showed some good results. You could vary the parameters to get better clustering.
SLIC Superpixels
SLIC Superpixels with OpenCV C++
SLIC Superpixels with OpenCV Python

Detect semicircle in OpenCV

I am trying to detect full circles and semicircles in an image.
I am following the below mentioned procedure:
Process image (including Canny edge detection).
Find contours and draw them on an empty image, so that I can eliminate unwanted components
(The processed image is exactly what I want).
Detect circles using HoughCircles. And, this is what I get:
I tried varying the parameters in HoughCircles but the results are not consistent as it varies based on lighting and the position of circles in the image.
I accept or reject a circle based on its size. So, the result is not acceptable. Also, I have a long list of "acceptable" circles. So, I need some allowance in the HoughCircle params.
As for the full circles, it's easy - I can simply find the "roundness" of the contour. The problem is semicircles!
Please find the edited image before Hough transform
Use houghCircle directly on your image, don't extract edges first.
Then test for each detected circle, how much percentage is really present in the image:
int main()
{
cv::Mat color = cv::imread("../houghCircles.png");
cv::namedWindow("input"); cv::imshow("input", color);
cv::Mat canny;
cv::Mat gray;
/// Convert it to gray
cv::cvtColor( color, gray, CV_BGR2GRAY );
// compute canny (don't blur with that image quality!!)
cv::Canny(gray, canny, 200,20);
cv::namedWindow("canny2"); cv::imshow("canny2", canny>0);
std::vector<cv::Vec3f> circles;
/// Apply the Hough Transform to find the circles
cv::HoughCircles( gray, circles, CV_HOUGH_GRADIENT, 1, 60, 200, 20, 0, 0 );
/// Draw the circles detected
for( size_t i = 0; i < circles.size(); i++ )
{
Point center(cvRound(circles[i][0]), cvRound(circles[i][1]));
int radius = cvRound(circles[i][2]);
cv::circle( color, center, 3, Scalar(0,255,255), -1);
cv::circle( color, center, radius, Scalar(0,0,255), 1 );
}
//compute distance transform:
cv::Mat dt;
cv::distanceTransform(255-(canny>0), dt, CV_DIST_L2 ,3);
cv::namedWindow("distance transform"); cv::imshow("distance transform", dt/255.0f);
// test for semi-circles:
float minInlierDist = 2.0f;
for( size_t i = 0; i < circles.size(); i++ )
{
// test inlier percentage:
// sample the circle and check for distance to the next edge
unsigned int counter = 0;
unsigned int inlier = 0;
cv::Point2f center((circles[i][0]), (circles[i][1]));
float radius = (circles[i][2]);
// maximal distance of inlier might depend on the size of the circle
float maxInlierDist = radius/25.0f;
if(maxInlierDist<minInlierDist) maxInlierDist = minInlierDist;
//TODO: maybe paramter incrementation might depend on circle size!
for(float t =0; t<2*3.14159265359f; t+= 0.1f)
{
counter++;
float cX = radius*cos(t) + circles[i][0];
float cY = radius*sin(t) + circles[i][1];
if(dt.at<float>(cY,cX) < maxInlierDist)
{
inlier++;
cv::circle(color, cv::Point2i(cX,cY),3, cv::Scalar(0,255,0));
}
else
cv::circle(color, cv::Point2i(cX,cY),3, cv::Scalar(255,0,0));
}
std::cout << 100.0f*(float)inlier/(float)counter << " % of a circle with radius " << radius << " detected" << std::endl;
}
cv::namedWindow("output"); cv::imshow("output", color);
cv::imwrite("houghLinesComputed.png", color);
cv::waitKey(-1);
return 0;
}
For this input:
It gives this output:
The red circles are Hough results.
The green sampled dots on the circle are inliers.
The blue dots are outliers.
Console output:
100 % of a circle with radius 27.5045 detected
100 % of a circle with radius 25.3476 detected
58.7302 % of a circle with radius 194.639 detected
50.7937 % of a circle with radius 23.1625 detected
79.3651 % of a circle with radius 7.64853 detected
If you want to test RANSAC instead of Hough, have a look at this.
Here is another way to do it, a simple RANSAC version (much optimization to be done to improve speed), that works on the Edge Image.
the method loops these steps until it is cancelled
choose randomly 3 edge pixel
estimate circle from them (3 points are enough to identify a circle)
verify or falsify that it's really a circle: count how much percentage of the circle is represented by the given edges
if a circle is verified, remove the circle from input/egdes
int main()
{
//RANSAC
//load edge image
cv::Mat color = cv::imread("../circleDetectionEdges.png");
// convert to grayscale
cv::Mat gray;
cv::cvtColor(color, gray, CV_RGB2GRAY);
// get binary image
cv::Mat mask = gray > 0;
//erode the edges to obtain sharp/thin edges (undo the blur?)
cv::erode(mask, mask, cv::Mat());
std::vector<cv::Point2f> edgePositions;
edgePositions = getPointPositions(mask);
// create distance transform to efficiently evaluate distance to nearest edge
cv::Mat dt;
cv::distanceTransform(255-mask, dt,CV_DIST_L1, 3);
//TODO: maybe seed random variable for real random numbers.
unsigned int nIterations = 0;
char quitKey = 'q';
std::cout << "press " << quitKey << " to stop" << std::endl;
while(cv::waitKey(-1) != quitKey)
{
//RANSAC: randomly choose 3 point and create a circle:
//TODO: choose randomly but more intelligent,
//so that it is more likely to choose three points of a circle.
//For example if there are many small circles, it is unlikely to randomly choose 3 points of the same circle.
unsigned int idx1 = rand()%edgePositions.size();
unsigned int idx2 = rand()%edgePositions.size();
unsigned int idx3 = rand()%edgePositions.size();
// we need 3 different samples:
if(idx1 == idx2) continue;
if(idx1 == idx3) continue;
if(idx3 == idx2) continue;
// create circle from 3 points:
cv::Point2f center; float radius;
getCircle(edgePositions[idx1],edgePositions[idx2],edgePositions[idx3],center,radius);
float minCirclePercentage = 0.4f;
// inlier set unused at the moment but could be used to approximate a (more robust) circle from alle inlier
std::vector<cv::Point2f> inlierSet;
//verify or falsify the circle by inlier counting:
float cPerc = verifyCircle(dt,center,radius, inlierSet);
if(cPerc >= minCirclePercentage)
{
std::cout << "accepted circle with " << cPerc*100.0f << " % inlier" << std::endl;
// first step would be to approximate the circle iteratively from ALL INLIER to obtain a better circle center
// but that's a TODO
std::cout << "circle: " << "center: " << center << " radius: " << radius << std::endl;
cv::circle(color, center,radius, cv::Scalar(255,255,0),1);
// accept circle => remove it from the edge list
cv::circle(mask,center,radius,cv::Scalar(0),10);
//update edge positions and distance transform
edgePositions = getPointPositions(mask);
cv::distanceTransform(255-mask, dt,CV_DIST_L1, 3);
}
cv::Mat tmp;
mask.copyTo(tmp);
// prevent cases where no fircle could be extracted (because three points collinear or sth.)
// filter NaN values
if((center.x == center.x)&&(center.y == center.y)&&(radius == radius))
{
cv::circle(tmp,center,radius,cv::Scalar(255));
}
else
{
std::cout << "circle illegal" << std::endl;
}
++nIterations;
cv::namedWindow("RANSAC"); cv::imshow("RANSAC", tmp);
}
std::cout << nIterations << " iterations performed" << std::endl;
cv::namedWindow("edges"); cv::imshow("edges", mask);
cv::namedWindow("color"); cv::imshow("color", color);
cv::imwrite("detectedCircles.png", color);
cv::waitKey(-1);
return 0;
}
float verifyCircle(cv::Mat dt, cv::Point2f center, float radius, std::vector<cv::Point2f> & inlierSet)
{
unsigned int counter = 0;
unsigned int inlier = 0;
float minInlierDist = 2.0f;
float maxInlierDistMax = 100.0f;
float maxInlierDist = radius/25.0f;
if(maxInlierDist<minInlierDist) maxInlierDist = minInlierDist;
if(maxInlierDist>maxInlierDistMax) maxInlierDist = maxInlierDistMax;
// choose samples along the circle and count inlier percentage
for(float t =0; t<2*3.14159265359f; t+= 0.05f)
{
counter++;
float cX = radius*cos(t) + center.x;
float cY = radius*sin(t) + center.y;
if(cX < dt.cols)
if(cX >= 0)
if(cY < dt.rows)
if(cY >= 0)
if(dt.at<float>(cY,cX) < maxInlierDist)
{
inlier++;
inlierSet.push_back(cv::Point2f(cX,cY));
}
}
return (float)inlier/float(counter);
}
inline void getCircle(cv::Point2f& p1,cv::Point2f& p2,cv::Point2f& p3, cv::Point2f& center, float& radius)
{
float x1 = p1.x;
float x2 = p2.x;
float x3 = p3.x;
float y1 = p1.y;
float y2 = p2.y;
float y3 = p3.y;
// PLEASE CHECK FOR TYPOS IN THE FORMULA :)
center.x = (x1*x1+y1*y1)*(y2-y3) + (x2*x2+y2*y2)*(y3-y1) + (x3*x3+y3*y3)*(y1-y2);
center.x /= ( 2*(x1*(y2-y3) - y1*(x2-x3) + x2*y3 - x3*y2) );
center.y = (x1*x1 + y1*y1)*(x3-x2) + (x2*x2+y2*y2)*(x1-x3) + (x3*x3 + y3*y3)*(x2-x1);
center.y /= ( 2*(x1*(y2-y3) - y1*(x2-x3) + x2*y3 - x3*y2) );
radius = sqrt((center.x-x1)*(center.x-x1) + (center.y-y1)*(center.y-y1));
}
std::vector<cv::Point2f> getPointPositions(cv::Mat binaryImage)
{
std::vector<cv::Point2f> pointPositions;
for(unsigned int y=0; y<binaryImage.rows; ++y)
{
//unsigned char* rowPtr = binaryImage.ptr<unsigned char>(y);
for(unsigned int x=0; x<binaryImage.cols; ++x)
{
//if(rowPtr[x] > 0) pointPositions.push_back(cv::Point2i(x,y));
if(binaryImage.at<unsigned char>(y,x) > 0) pointPositions.push_back(cv::Point2f(x,y));
}
}
return pointPositions;
}
input:
output:
console output:
press q to stop
accepted circle with 50 % inlier
circle: center: [358.511, 211.163] radius: 193.849
accepted circle with 85.7143 % inlier
circle: center: [45.2273, 171.591] radius: 24.6215
accepted circle with 100 % inlier
circle: center: [257.066, 197.066] radius: 27.819
circle illegal
30 iterations performed`
optimization should include:
use all inlier to fit a better circle
dont compute distance transform after each detected circles (it's quite expensive). compute inlier from point/edge set directly and remove the inlier edges from that list.
if there are many small circles in the image (and/or a lot of noise), it's unlikely to hit randomly 3 edge pixels or a circle. => try contour detection first and detect circles for each contour. after that try to detect all "other" circles left in the image.
a lot of other stuff
#Micka's answer is great, here it is roughly translated into python
The method takes a thresholded image mask as an argument, leaving that part as an exercise for the reader
def get_circle_percentages(image):
#compute distance transform of image
dist = cv2.distanceTransform(image, cv2.DIST_L2, 0)
rows = image.shape[0]
circles = cv2.HoughCircles(image, cv2.HOUGH_GRADIENT, 1, rows / 8, 50, param1=50, param2=10, minRadius=40, maxRadius=90)
minInlierDist = 2.0
for c in circles[0, :]:
counter = 0
inlier = 0
center = (c[0], c[1])
radius = c[2]
maxInlierDist = radius/25.0
if maxInlierDist < minInlierDist: maxInlierDist = minInlierDist
for i in np.arange(0, 2*np.pi, 0.1):
counter += 1
x = center[0] + radius * np.cos(i)
y = center[1] + radius * np.sin(i)
if dist.item(int(y), int(x)) < maxInlierDist:
inlier += 1
print(str(100.0*inlier/counter) + ' percent of a circle with radius ' + str(radius) + " detected")
I know that it's little bit late, but I used different approach which is much easier.
From the cv2.HoughCircles(...) you get centre of the circle and the diameter (x,y,r). So I simply go through all centre points of the circles and I check if they are further away from the edge of the image than their diameter.
Here is my code:
height, width = img.shape[:2]
#test top edge
up = (circles[0, :, 0] - circles[0, :, 2]) >= 0
#test left edge
left = (circles[0, :, 1] - circles[0, :, 2]) >= 0
#test right edge
right = (circles[0, :, 0] + circles[0, :, 2]) <= width
#test bottom edge
down = (circles[0, :, 1] + circles[0, :, 2]) <= height
circles = circles[:, (up & down & right & left), :]
The semicircle detected by the hough algorithm is most probably correct. The issue here might be that unless you strictly control the geometry of the scene, i.e. exact position of the camera relative to the target, so that the image axis is normal to the target plane, you will get ellipsis rather than circles projected on the image plane. Not to mention the distortions caused by the optical system, which further degenerate the geometric figure. If you rely on precision here, I would recommend camera calibration.
You better try with different kernel for gaussian blur.That will help you
GaussianBlur( src_gray, src_gray, Size(11, 11), 5,5);
so change size(i,i),j,j)

Extracting text from an image with tesseract [duplicate]

I asked a similar question here but that is focused more on tesseract.
I have a sample image as below. I would like to make the white square my Region of Interest and then crop out that part (square) and create a new image with it. I will be working with different images so the square won't always be at the same location in all images. So I will need to somehow detect the edges of the square.
What are some pre-processing methods I can perform to achieve the result?
Using your test image I was able to remove all the noises with a simple erosion operation.
After that, a simple iteration on the Mat to find for the corner pixels is trivial, and I talked about that on this answer. For testing purposes we can draw green lines between those points to display the area we are interested at in the original image:
At the end, I set the ROI in the original image and crop out that part.
The final result is displayed on the image below:
I wrote a sample code that performs this task using the C++ interface of OpenCV. I'm confident in your skills to translate this code to Python. If you can't do it, forget the code and stick with the roadmap I shared on this answer.
#include <cv.h>
#include <highgui.h>
int main(int argc, char* argv[])
{
cv::Mat img = cv::imread(argv[1]);
std::cout << "Original image size: " << img.size() << std::endl;
// Convert RGB Mat to GRAY
cv::Mat gray;
cv::cvtColor(img, gray, CV_BGR2GRAY);
std::cout << "Gray image size: " << gray.size() << std::endl;
// Erode image to remove unwanted noises
int erosion_size = 5;
cv::Mat element = cv::getStructuringElement(cv::MORPH_CROSS,
cv::Size(2 * erosion_size + 1, 2 * erosion_size + 1),
cv::Point(erosion_size, erosion_size) );
cv::erode(gray, gray, element);
// Scan the image searching for points and store them in a vector
std::vector<cv::Point> points;
cv::Mat_<uchar>::iterator it = gray.begin<uchar>();
cv::Mat_<uchar>::iterator end = gray.end<uchar>();
for (; it != end; it++)
{
if (*it)
points.push_back(it.pos());
}
// From the points, figure out the size of the ROI
int left, right, top, bottom;
for (int i = 0; i < points.size(); i++)
{
if (i == 0) // initialize corner values
{
left = right = points[i].x;
top = bottom = points[i].y;
}
if (points[i].x < left)
left = points[i].x;
if (points[i].x > right)
right = points[i].x;
if (points[i].y < top)
top = points[i].y;
if (points[i].y > bottom)
bottom = points[i].y;
}
std::vector<cv::Point> box_points;
box_points.push_back(cv::Point(left, top));
box_points.push_back(cv::Point(left, bottom));
box_points.push_back(cv::Point(right, bottom));
box_points.push_back(cv::Point(right, top));
// Compute minimal bounding box for the ROI
// Note: for some unknown reason, width/height of the box are switched.
cv::RotatedRect box = cv::minAreaRect(cv::Mat(box_points));
std::cout << "box w:" << box.size.width << " h:" << box.size.height << std::endl;
// Draw bounding box in the original image (debugging purposes)
//cv::Point2f vertices[4];
//box.points(vertices);
//for (int i = 0; i < 4; ++i)
//{
// cv::line(img, vertices[i], vertices[(i + 1) % 4], cv::Scalar(0, 255, 0), 1, CV_AA);
//}
//cv::imshow("Original", img);
//cv::waitKey(0);
// Set the ROI to the area defined by the box
// Note: because the width/height of the box are switched,
// they were switched manually in the code below:
cv::Rect roi;
roi.x = box.center.x - (box.size.height / 2);
roi.y = box.center.y - (box.size.width / 2);
roi.width = box.size.height;
roi.height = box.size.width;
std::cout << "roi # " << roi.x << "," << roi.y << " " << roi.width << "x" << roi.height << std::endl;
// Crop the original image to the defined ROI
cv::Mat crop = img(roi);
// Display cropped ROI
cv::imshow("Cropped ROI", crop);
cv::waitKey(0);
return 0;
}
Seeing that the text is the only large blob, and everything else is barely larger than a pixel, a simple morphological opening should suffice
You can do this in opencv
or with imagemagic
Afterwards the white rectangle should be the only thing left in the image. You can find it with opencvs findcontours, with the CvBlobs library for opencv or with the imagemagick -crop function
Here is your image with 2 steps of erosion followed by 2 steps of dilation applied:
You can simply plug this image into the opencv findContours function as in the Squares tutorial example to get the position
input
#objective:
#1)compress large images to less than 1000x1000
#2)identify region of interests
#3)save rois in top to bottom order
import cv2
import os
def get_contour_precedence(contour, cols):
tolerance_factor = 10
origin = cv2.boundingRect(contour)
return ((origin[1] // tolerance_factor) * tolerance_factor) * cols + origin[0]
# Load image, grayscale, Gaussian blur, adaptive threshold
image = cv2.imread('./images/sample_0.jpg')
#compress the image if image size is >than 1000x1000
height, width, color = image.shape #unpacking tuple (height, width, colour) returned by image.shape
while(width > 1000):
height = height/2
width = width/2
print(int(height), int(width))
height = int(height)
width = int(width)
image = cv2.resize(image, (width, height))
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (9,9), 0)
thresh = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,30)
# Dilate to combine adjacent text contours
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,9))
ret,thresh3 = cv2.threshold(image,127,255,cv2.THRESH_BINARY_INV)
dilate = cv2.dilate(thresh, kernel, iterations=4)
# Find contours, highlight text areas, and extract ROIs
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#cnts = cv2.findContours(thresh3, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
#ORDER CONTOURS top to bottom
cnts.sort(key=lambda x:get_contour_precedence(x, image.shape[1]))
#delete previous roi images in folder roi to avoid
dir = './roi/'
for f in os.listdir(dir):
os.remove(os.path.join(dir, f))
ROI_number = 0
for c in cnts:
area = cv2.contourArea(c)
if area > 10000:
x,y,w,h = cv2.boundingRect(c)
#cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 3)
cv2.rectangle(image, (x, y), (x + w, y + h), (100,100,100), 1)
#use below code to write roi when results are good
ROI = image[y:y+h, x:x+w]
cv2.imwrite('roi/ROI_{}.jpg'.format(ROI_number), ROI)
ROI_number += 1
cv2.imshow('thresh', thresh)
cv2.imshow('dilate', dilate)
cv2.imshow('image', image)
cv2.waitKey()
roi detection
output

Resources