OpenCV - Computing distance between two edges in an image - image-processing

I am trying to compute distance (in # of pixels) between two edges in an image. I have corrected for image perspective using cv2.warpPerspective method and have converted the resulting image into grayscale followed by filtering using gaussian blur. I have tried various thresholding methods and found out that cv2.ADAPTIVE_THRESH_GAUSSIAN works best. Other methods are too noisy or miss the second edge in the left side of the object as seen in result of adaptive gaussian thresholding.
import cv2
import numpy as np
import matplotlib.pyplot as plt
# Load the image
imgRoadvR10 = cv2.imread('sampleimage.jpg') # image is already corrected for perspective warp using cv2.warpPerspective
# convert to grayscale
imgRoadvR10_GrayPersp = cv2.cvtColor(imgRoadvR10, cv2.COLOR_BGR2GRAY)
# gaussian blur
a10lvR10_gblur = cv2.GaussianBlur(imgRoadvR10_GrayPersp,(5,5),0)
# Try different thresholding methods
ret,a10lvR10_th1 = cv2.threshold(a10lvR10_gblur,127,255,cv2.THRESH_BINARY)
a10lvR10_th2 = cv2.adaptiveThreshold(a10lvR10_gblur,255,cv2.ADAPTIVE_THRESH_MEAN_C,\
a10lvR10_th3 = cv2.adaptiveThreshold(a10lvR10_gblur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
# Otsu's thresholding
ret2,a10lvR10_th4 = cv2.threshold(a10lvR10_gblur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
# Plot results
titles = ['Original Image', 'Global Thresholding (v = 127)',
'Adaptive Mean Thresholding', 'Adaptive Gaussian Thresholding','OTSU Thresholding']
images = [a10lvR10_gblur, a10lvR10_th1, a10lvR10_th2, a10lvR10_th3, a10lvR10_th4]
for i in range(5):
Closer look at result of adaptive gaussian thresholding:
I want to find the width of this rectangular object. The width is measured from the second edge on the left side to the edge on the right side (see image below):
How can I measure the width? I have been reading upon morphological operations and edge detection, But not sure how to proceed next. Any suggestions will be appreciated

This is not the best idea and I think a more logical and simple solution can be obtained. However, this idea may help you.
import cv2
import numpy as np
#load image
im = cv2.imread("test3.jpg", 1)
#Convert to gray
mask = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
#convert to black and white
mask = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)[1]
#try to remove noise
#you can just use median blur or any other method
mask = cv2.erode(mask, np.ones((8, 0), "uint8"))
mask = cv2.dilate(mask, np.ones((32, 0), "uint8"))
mask = cv2.medianBlur(mask, 9)
#save cleaned image
cv2.imwrite("out1.jpg", mask)
A cleaner version of your output image:
Next we can get the coordinates of the lines. I got the coordinates of the first line from the left. I think you have to change the code a bit to get the coordinates of the sidebar.
h = len(mask) - 1
def count(row):
counter = 0
for i in range(0, len(row)):
if row[i] == 255:
counter += 1
return counter
def line(im, pt1, pt2, color, thickness):
im = cv2.line(
return im
def center(x1, y1, x2, y2):
return (int((x1 + x2) / 2), int((y1 + y2) / 2))
topLeft = count(mask[0])
bottomLeft = count(mask[h])
# to shadow and hide the old left line
mask = line(mask, (topLeft, 0), (bottomLeft, h), (0, 0, 0), 80)
topRight = count(mask[0])
bottomRight = count(mask[h])
# to shadow and hide the old right line
mask = line(mask, (topRight, 0), (bottomRight, h), (0, 0, 0), 80)
mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)
# to draw new clean left line
mask = line(mask, (topLeft, 0), (bottomLeft, h), (128, 0, 255), 25)
# to draw new clean right line
mask = line(mask, (topRight, 0), (bottomRight, h), (128, 0, 255), 25)
a = center(topLeft, 0, bottomLeft, h)
b = center(topRight, 0, bottomRight, h)
mask = line(mask, a, b, (128, 0, 255), 25)
cv2.imwrite("out2.jpg", mask)
Now you can calculate the distance between "a" and "b".


Remove Yellow rectangle from image

I am using this code to remove this yellow stamp from an image :
import cv2
import numpy as np
# read image
img = cv2.imread('input.jpg')
# threshold on yellow
lower = (0, 200, 200)
upper = (100, 255, 255)
thresh = cv2.inRange(img, lower, upper)
# apply dilate morphology
kernel = np.ones((9, 9), np.uint8)
mask = cv2.morphologyEx(thresh, cv2.MORPH_DILATE, kernel)
# get largest contour
contours = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
big_contour = max(contours, key=cv2.contourArea)
x, y, w, h = cv2.boundingRect(big_contour)
# draw filled white contour on input
result = img.copy()
cv2.drawContours(result, [big_contour], 0, (255, 255, 255), -1)
cv2.imwrite('yellow_removed.png', result)
# show the images
cv2.imshow("RESULT", result)
I get the following error:
big_contour = max(contours, key=cv2.contourArea) ValueError: max() arg
is an empty sequence
Obviously, it is not detecting any contours, and the contours array is empty, but I could not figure out why that is or how to fix it.
Help is appreciated!
Check your lower thresholds. It worked for me for both images when I changed the lower threshold to lower = (0, 120, 120).
The thresholds is the reason due to the second image being darker. Lowering these thresholds captures more of the yellow area, but will still leave some holes when drawing the contour.
lower = (0, 130, 130)
You can fix this by drawing the bounding rectangle instead.
Using HSV color space is great for figuring out a particular shade/tone of color. When you have dominant colors to isolate, you can opt for the LAB color space. I have explained as to why this is better in this answer.
img = cv2.imread('bill.jpg')
# create another copy for the result
img2 = img.copy()
# convert to LAB space and store b-channel
lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
b_channel = lab[:,:,-1]
Notice how bright the yellow region is above.
# Perform Otsu threshold
th = cv2.threshold(b_channel, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
# Find the contour with largest area
contours, hierarchy = cv2.findContours(th, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
c = max(contours, key = cv2.contourArea)
# draw the contour on plain black image of same shape as original
mask = np.zeros((img.shape[0], img.shape[1]), np.uint8)
mask = cv2.drawContours(mask,[c],0,255, -1)
# dilation to avoid border effects
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
dilate = cv2.dilate(mask, kernel, iterations=1)
img2[dilate == 255] = (255, 255, 255)
Another example:

How to connect disjointed lines or edges in images?

I am currently working on lines extraction from a binary image. I initially performed a few image processing steps including threshold segmentation and obtained the following binary image.
As can be seen in the binary image the lines are splitted or broken. And I wanted to join the broken line as shown in the image below marked in red. I marked the red line manually for a demonstration.
FYI, I used the following code to perform the preprocessing.
img = cv2.imread('original_image.jpg') # loading image
gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # coverting to gray scale
median_filter = cv2.medianBlur (gray_image, ksize = 5) # median filtering
th, thresh = cv2.threshold (median_filter, median_filter.mean(), 255, cv2.THRESH_BINARY) # theshold segmentation
# small dots and noise removing
nlabels, labels, stats, centroids = cv2.connectedComponentsWithStats(thresh, None, None, None, 8, cv2.CV_32S)
areas = stats[1:,cv2.CC_STAT_AREA]
result = np.zeros((labels.shape), np.uint8)
min_size = 150
for i in range(0, nlabels - 1):
if areas[i] >= min_size: #keep
result[labels == i + 1] = 255
fig, ax = plt.subplots(2,1, figsize=(30,20))
ax[0].set_title('Original image')
ax[1].imshow(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
ax[1].set_title('preprocessed image')
I would really appreciate it if you have any suggestions or steps on how to connect the lines? Thank you
Using the following sequence of methods I was able to get a rough approximation. It is a very simple solution and might not work for all cases.
1. Morphological operations
To merge neighboring lines perform morphological (dilation) operations on the binary image.
img = cv2.imread('image_path', 0) # grayscale image
img1 = cv2.imread('image_path', 1) # color image
th = cv2.threshold(img, 150, 255, cv2.THRESH_BINARY)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (19, 19))
morph = cv2.morphologyEx(th, cv2.MORPH_DILATE, kernel)
2. Finding contours and extreme points
My idea now is to find contours.
Then find the extreme points of each contour.
Finally find the closest distance among these extreme points between neighboring contours. And draw a line between them.
cnts1 = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts1[0] # storing contours in a variable
Lets take a quick detour to visualize where these extreme points are present:
# visualize extreme points for each contour
for c in cnts:
left = tuple(c[c[:, :, 0].argmin()][0])
right = tuple(c[c[:, :, 0].argmax()][0])
top = tuple(c[c[:, :, 1].argmin()][0])
bottom = tuple(c[c[:, :, 1].argmax()][0])
# Draw dots onto image, left, 8, (0, 50, 255), -1), right, 8, (0, 255, 255), -1), top, 8, (255, 50, 0), -1), bottom, 8, (255, 255, 0), -1)
(Note: The extreme points points are based of contours from morphological operations, but drawn on the original image)
3. Finding closest distances between neighboring contours
Sorry for the many loops.
First, iterate through every contour (split line) in the image.
Find the extreme points for them. Extreme points mean top-most, bottom-most, right-most and left-most points based on its respective bounding box.
Compare the distance between every extreme point of a contour with those of every other contour. And draw a line between points with the least distance.
for i in range(len(cnts)):
min_dist = max(img.shape[0], img.shape[1])
cl = []
ci = cnts[i]
ci_left = tuple(ci[ci[:, :, 0].argmin()][0])
ci_right = tuple(ci[ci[:, :, 0].argmax()][0])
ci_top = tuple(ci[ci[:, :, 1].argmin()][0])
ci_bottom = tuple(ci[ci[:, :, 1].argmax()][0])
ci_list = [ci_bottom, ci_left, ci_right, ci_top]
for j in range(i + 1, len(cnts)):
cj = cnts[j]
cj_left = tuple(cj[cj[:, :, 0].argmin()][0])
cj_right = tuple(cj[cj[:, :, 0].argmax()][0])
cj_top = tuple(cj[cj[:, :, 1].argmin()][0])
cj_bottom = tuple(cj[cj[:, :, 1].argmax()][0])
cj_list = [cj_bottom, cj_left, cj_right, cj_top]
for pt1 in ci_list:
for pt2 in cj_list:
dist = int(np.linalg.norm(np.array(pt1) - np.array(pt2))) #dist = sqrt( (x2 - x1)**2 + (y2 - y1)**2 )
if dist < min_dist:
min_dist = dist
cl = []
cl.append([pt1, pt2, min_dist])
if len(cl) > 0:
cv2.line(img1, cl[0][0], cl[0][1], (255, 255, 255), thickness = 5)
4. Post-processing
Since the final output is not perfect, you can perform additional morphology operations and then skeletonize it.

Adjusting pytesseract parameters

Note: I am migrating this question from Data Science Stack Exchange, where it received little exposure.
I am trying to implement an OCR solution to identify the numbers read from the picture of a screen.
I am adapting this pyimagesearch tutorial to my problem.
Because I am dealing with a dark background, I first invert the image, before converting it to grayscale and thresholding it:
inverted_cropped_image = cv2.bitwise_not(cropped_image)
gray = get_grayscale(inverted_cropped_image)
thresholded_image = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY)[1]
Then I call pytesseract's image_to_data function to output a dictionary containing the different text regions and their confidence intervals:
from pytesseract import Output
results = pytesseract.image_to_data(thresholded_image, output_type=Output.DICT)
Finally I iterate over results and plot them when their confidence exceeds a user defined threshold (70%). What bothers me, is that my script identifies everything in the image except the number that I would like to recognize (1227.938).
My first guess is that the image_to_data parameters are not set properly.
Checking this website, I selected a page segmentation mode (psm) of 11 (sparse text) and tried whitelisting numbers only (tessedit_char_whitelist=0123456789m.'):
results = pytesseract.image_to_data(thresholded_image, config='--psm 11 --oem 3 -c tessedit_char_whitelist=0123456789m.', output_type=Output.DICT)
Alas, this is even worse, and the script now identifies nothing at all!
Do you have any suggestion? Am I missing something obvious here?
EDIT #1:
At Ann Zen's request, here's the code used to obtain the first image:
import imutils
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pytesseract
from pytesseract import Output
def get_grayscale(image):
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
filename = "IMAGE.JPG"
cropped_image = cv2.imread(filename)
inverted_cropped_image = cv2.bitwise_not(cropped_image)
gray = get_grayscale(inverted_cropped_image)
thresholded_image = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY)[1]
results = pytesseract.image_to_data(thresholded_image, config='--psm 11 --oem 3 -c tessedit_char_whitelist=0123456789m.', output_type=Output.DICT)
color = (255, 255, 255)
for i in range(0, len(results["text"])):
x = results["left"][i]
y = results["top"][i]
w = results["width"][i]
h = results["height"][i]
text = results["text"][i]
conf = int(results["conf"][i])
print("Confidence: {}".format(conf))
if conf > 70:
print("Confidence: {}".format(conf))
print("Text: {}".format(text))
text = "".join([c if ord(c) < 128 else "" for c in text]).strip()
cv2.rectangle(cropped_image, (x, y), (x + w, y + h), color, 2)
cv2.putText(cropped_image, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX,1.2, color, 3)
cv2.imshow('Image', cropped_image)
EDIT #2:
Rarely have I spent reputation points so well! All three replies posted so far helped me refine my algorithm.
First, I wrote a Tkinter program allowing me to manually crop the image around the number of interest (modifying the one found in this SO post)
Then I used Ann Zen's idea of narrowing down the search area around the fractional part. I am using her nifty process function to prepare my grayscale image for contour extraction: contours, _ = cv2.findContours(process(img_gray), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE). I am using RETR_EXTERNAL to avoid dealing with overlapping bounding rectangles.
I then sorted my contours from left to right. Bounding rectangles exceeding a user-defined threshold are associated with the integral part (white rectangles); otherwise they are associated with the fractional part (black rectangles).
I then extracted the characters using Esraa's approach i.e. applying a Gaussian blur prior to calling Tesseract. I used a much larger kernel (15x15 vs 3x3) to achieve this.
I am not out of the woods yet, but hopefully I will get better results by using Ahx's adaptive thresholding.
The Concept
As you have probably heard, pytesseract is not good at detecting text of different sizes on the same line as one piece of text. In your case, you want to detect the 1227.938, where the 1227 is much larger than the .938.
One way to go about solving this is to have the program estimate where the .938 is, and enlarge that part of the image. After that, pytesseract will have no problem in returning the text.
The Code
import cv2
import numpy as np
import pytesseract
def process(img):
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(img_gray, 200, 255, cv2.THRESH_BINARY)
img_canny = cv2.Canny(thresh, 100, 100)
kernel = np.ones((3, 3))
img_dilate = cv2.dilate(img_canny, kernel, iterations=2)
return cv2.erode(img_dilate, kernel, iterations=2)
img = cv2.imread("image.png")
img_copy = img.copy()
hh = 50
contours, _ = cv2.findContours(process(img), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
for cnt in contours:
if 20 * hh < cv2.contourArea(cnt) < 30 * hh:
x, y, w, h = cv2.boundingRect(cnt)
ww = int(hh / h * w)
src_seg = img[y: y + h, x: x + w]
dst_seg = img_copy[y: y + hh, x: x + ww]
h_seg, w_seg = dst_seg.shape[:2]
dst_seg[:] = cv2.resize(src_seg, (ww, hh))[:h_seg, :w_seg]
gray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 180, 255, cv2.THRESH_BINARY)
results = pytesseract.image_to_data(thresh)
for b in map(str.split, results.splitlines()[1:]):
if len(b) == 12:
x, y, w, h = map(int, b[6: 10])
cv2.putText(img, b[11], (x, y + h + 15), cv2.FONT_HERSHEY_COMPLEX, 0.6, 0)
cv2.imshow("Result", img)
The Output
Here is the input image:
And here is the output image:
As you have said in your post, the only part you need the the decimal 1227.938. If you want to filter out the rest of the detected text, you can try tweaking some parameters. For example, replacing the 180 from _, thresh = cv2.threshold(gray, 180, 255, cv2.THRESH_BINARY) with 230 will result in the output image:
The Explanation
Import the necessary libraries:
import cv2
import numpy as np
import pytesseract
Define a function, process(), that will take in an image array, and return a binary image array that is the processed version of the image that will allow proper contour detection:
def process(img):
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(img_gray, 200, 255, cv2.THRESH_BINARY)
img_canny = cv2.Canny(thresh, 100, 100)
kernel = np.ones((3, 3))
img_dilate = cv2.dilate(img_canny, kernel, iterations=2)
return cv2.erode(img_dilate, kernel, iterations=2)
I'm sure that you don't have to do this, but due to a problem in my environment, I have to add pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe' before I can call the pytesseract.image_to_data() method, or it throws an error:
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
Read in the original image, make a copy of it, and define the rough height of the large part of the decimal:
img = cv2.imread("image.png")
img_copy = img.copy()
hh = 50
Detect the contours of the processed version of the image, and add a filter that roughly filters out the contours so that the small text remains:
contours, _ = cv2.findContours(process(img), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
for cnt in contours:
if 20 * hh < cv2.contourArea(cnt) < 30 * hh:
Define the bounding box of each contour that didn't get filtered out, and use the properties to enlarge those parts of the image to the height defined for the large text (making sure to also scale the width accordingly):
x, y, w, h = cv2.boundingRect(cnt)
ww = int(hh / h * w)
src_seg = img[y: y + h, x: x + w]
dst_seg = img_copy[y: y + hh, x: x + ww]
h_seg, w_seg = dst_seg.shape[:2]
dst_seg[:] = cv2.resize(src_seg, (ww, hh))[:h_seg, :w_seg]
Finally, we can use the pytesseract.image_to_data() method to detect the text. Of course, we'll need to threshold the image again:
gray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 180, 255, cv2.THRESH_BINARY)
results = pytesseract.image_to_data(thresh)
for b in map(str.split, results.splitlines()[1:]):
if len(b) == 12:
x, y, w, h = map(int, b[6: 10])
cv2.putText(img, b[11], (x, y + h + 15), cv2.FONT_HERSHEY_COMPLEX, 0.6, 0)
cv2.imshow("Result", img)
I have been working with Tesseract for quite some time, so let me clarify something for you. Tesseract is extremely helpful if you're trying to recognize text in documents more than any other computer vision projects. It usually needs a binarized image to get a good output. Therefore, you will always need some image pre-processing.
However, after several trials in the past with all page segmentation modes, I realized that it fails when font size differs on the same line without having a space. Sometimes PSM 6 is helpful if the difference is low, but in your condition, you may try an alternative. If you don't care about the decimals, you may try the following solution:
img = cv2.imread(r'E:\Downloads\Iwzrg.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_blur = cv2.GaussianBlur(gray, (3,3),0)
_,thresh = cv2.threshold(img_blur,200,255,cv2.THRESH_BINARY_INV)
# If using a fixed camera
new_img = thresh[0:100, 80:320]
text = pytesseract.image_to_string(new_img, lang='eng', config='--psm 6 --oem 3 -c tessedit_char_whitelist=0123456789')
OUTPUT: 1227
I would like to recommend applying another image processing method.
Because I am dealing with a dark background, I first invert the image, before converting it to grayscale and thresholding it:
You applied global thresholding and couldn't achieve the desired result.
Then you can apply either adaptive-thresholding or inRange
For the given image, if we apply the inRange threshold:
To be able to recognize the image as accurately as possible we can add a border to the top of the image and resize the image (Optional)
In the OCR section, check if the detected region contains a digit
if text.isdigit():
Then display on the image:
The result is nearly the desired value. Now you can try with the other suggested methods to find the exact value.
The problem is .938 recognized as 235, maybe resizing using different values might improve the result.
from cv2 import imread, cvtColor, COLOR_BGR2HSV as HSV, inRange, getStructuringElement, resize
from cv2 import imshow, waitKey, MORPH_RECT, dilate, bitwise_and, rectangle, putText
from cv2 import copyMakeBorder as addBorder, BORDER_CONSTANT as CONSTANT, FONT_HERSHEY_SIMPLEX
from numpy import array
from pytesseract import image_to_data, Output
bgr = imread("Iwzrg.png")
resized = resize(bgr, (800, 600), fx=0.75, fy=0.75)
bordered = addBorder(resized, 200, 0, 0, 0, CONSTANT, value=0)
hsv = cvtColor(bordered, HSV)
mask = inRange(hsv, array([0, 0, 250]), array([179, 255, 255]))
kernel = getStructuringElement(MORPH_RECT, (50, 30))
dilated = dilate(mask, kernel, iterations=1)
thresh = 255 - bitwise_and(dilated, mask)
data = image_to_data(thresh, output_type=Output.DICT)
for i in range(0, len(data["text"])):
x = data["left"][i]
y = data["top"][i]
w = data["width"][i]
h = data["height"][i]
text = data["text"][i]
if text.isdigit():
print("Text: {}".format(text))
text = "".join([c if ord(c) < 128 else "" for c in text]).strip()
rectangle(thresh, (x, y), (x + w, y + h), (0, 255, 0), 2)
putText(thresh, text, (x, y - 10), FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 255), 3)
imshow("", thresh)

I want to detect all the underlined words in a paragraph

Original Image
Click here for the image
For this, I am trying to detect the underlines first. But as the underlines might be tilted, this code:
import time
from google.colab.patches import cv2_imshow
from collections import OrderedDict
# Let's load a simple image with 3 black squares
image = cv2.imread("line_detected.png")
# Grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Find Canny edges
edged = cv2.Canny(gray, 30, 200)
# Finding Contours
# Use a copy of the image e.g. edged.copy()
# since findContours alters the image
contours, hierarchy = cv2.findContours(edged, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
print("Number of Contours found = " + str(len(contours)))
# Draw all contours
# -1 signifies drawing all contours
# cv2.drawContours(image, contours, -1, (0, 255, 0), 3)
mask = np.ones(image.shape[:2], dtype="uint8") * 255
nuclei = []
contours = contours[::-1]
for cnt in (contours):
peri = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, 0.04 * peri, True)
if (len(approx==2)):
x, y, w, h = cv2.boundingRect(cnt)
# print(h)
cv2.rectangle(img,(x, y), (x+w, y+h),(0, 0, 255), 2)
is not able to detect the slanting underlines very properly. Also, I want this code to extend to detecting only the gray underlines. "minor differences" has a single underline as it is slanted/tilted, it reads it as two straight lines. Also, it is reading the images in the left which it should not read(tesseract giving weird outputs).
For the gray shade only I found this mask thing online:
lower_range = np.array([110,50,50])
upper_range = np.array([130,255,255])
mask = cv2.inRange(hsv, lower_range, upper_range)
But Don't know how to incorporate in code... I'm a beginner, any help is much appreciated!

How to find corners on a Image using OpenCv

I´m trying to find the corners on a image, I don´t need the contours, only the 4 corners. I will change the perspective using 4 corners.
I´m using Opencv, but I need to know the steps to find the corners and what function I will use.
My images will be like this:(without red points, I will paint the points after)
After suggested steps, I writed the code: (Note: I´m not using pure OpenCv, I´m using javaCV, but the logic it´s the same).
// Load two images and allocate other structures (I´m using other image)
IplImage colored = cvLoadImage(
IplImage gray = cvCreateImage(cvGetSize(colored), IPL_DEPTH_8U, 1);
IplImage smooth = cvCreateImage(cvGetSize(colored), IPL_DEPTH_8U, 1);
//Step 1 - Convert from RGB to grayscale (cvCvtColor)
cvCvtColor(colored, gray, CV_RGB2GRAY);
//2 Smooth (cvSmooth)
cvSmooth( gray, smooth, CV_BLUR, 9, 9, 2, 2);
//3 - cvThreshold - What values?
cvThreshold(gray,gray, 155, 255, CV_THRESH_BINARY);
//4 - Detect edges (cvCanny) -What values?
int N = 7;
int aperature_size = N;
double lowThresh = 20;
double highThresh = 40;
cvCanny( gray, gray, lowThresh*N*N, highThresh*N*N, aperature_size );
//5 - Find contours (cvFindContours)
int total = 0;
CvSeq contour2 = new CvSeq(null);
CvMemStorage storage2 = cvCreateMemStorage(0);
CvMemStorage storageHull = cvCreateMemStorage(0);
total = cvFindContours(gray, storage2, contour2, Loader.sizeof(CvContour.class), CV_RETR_CCOMP, CV_CHAIN_APPROX_NONE);
if(total > 1){
while (contour2 != null && !contour2.isNull()) {
if (contour2.elem_size() > 0) {
//6 - Approximate contours with linear features (cvApproxPoly)
CvSeq points = cvApproxPoly(contour2,Loader.sizeof(CvContour.class), storage2, CV_POLY_APPROX_DP,cvContourPerimeter(contour2)*0.005, 0);
cvDrawContours(gray, points,CvScalar.BLUE, CvScalar.BLUE, -1, 1, CV_AA);
contour2 = contour2.h_next();
So, I want to find the cornes, but I don´t know how to use corners function like cvCornerHarris and others.
First, check out /samples/c/squares.c in your OpenCV distribution. This example provides a square detector, and it should be a pretty good start on how to detect corner-like features. Then, take a look at OpenCV's feature-oriented functions like cvCornerHarris() and cvGoodFeaturesToTrack().
The above methods can return many corner-like features - most will not be the "true corners" you are looking for. In my application, I had to detect squares that had been rotated or skewed (due to perspective). My detection pipeline consisted of:
Convert from RGB to grayscale (cvCvtColor)
Smooth (cvSmooth)
Threshold (cvThreshold)
Detect edges (cvCanny)
Find contours (cvFindContours)
Approximate contours with linear features (cvApproxPoly)
Find "rectangles" which were structures that: had polygonalized contours possessing 4 points, were of sufficient area, had adjacent edges were ~90 degrees, had distance between "opposite" vertices was of sufficient size, etc.
Step 7 was necessary because a slightly noisy image can yield many structures that appear rectangular after polygonalization. In my application, I also had to deal with square-like structures that appeared within, or overlapped the desired square. I found the contour's area property and center of gravity to be helpful in discerning the proper rectangle.
At a first glance, for a human eye there are 4 corners. But in computer vision, a corner is considered to be a point that has large gradient change in intensity across its neighborhood. The neighborhood can be a 4 pixel neighborhood or an 8 pixel neighborhood.
In the equation provided to find the gradient of intensity, it has been considered for 4-pixel neighborhood SEE DOCUMENTATION.
Here is my approach for the image in question. I have the code in python as well:
path = r'C:\Users\selwyn77\Desktop\Stack\corner'
filename = 'env.jpg'
img = cv2.imread(os.path.join(path, filename))
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY) #--- convert to grayscale
It is a good choice to always blur the image to remove less possible gradient changes and preserve the more intense ones. I opted to choose the bilateral filter which unlike the Gaussian filter doesn't blur all the pixels in the neighborhood. It rather blurs pixels which has similar pixel intensity to that of the central pixel. In short it preserves edges/corners of high gradient change but blurs regions that have minimal gradient changes.
bi = cv2.bilateralFilter(gray, 5, 75, 75)
To a human it is not so much of a difference compared to the original image. But it does matter. Now finding possible corners:
dst = cv2.cornerHarris(bi, 2, 3, 0.04)
dst returns an array (the same 2D shape of the image) with eigen values obtained from the final equation mentioned HERE.
Now a threshold has to be applied to select those corners beyond a certain value. I will use the one in the documentation:
#--- create a black image to see where those corners occur ---
mask = np.zeros_like(gray)
#--- applying a threshold and turning those pixels above the threshold to white ---
mask[dst>0.01*dst.max()] = 255
cv2.imshow('mask', mask)
The white pixels are regions of possible corners. You can find many corners neighboring each other.
To draw the selected corners on the image:
img[dst > 0.01 * dst.max()] = [0, 0, 255] #--- [0, 0, 255] --> Red ---
cv2.imshow('dst', img)
(Red colored pixels are the corners, not so visible)
In order to get an array of all pixels with corners:
coordinates = np.argwhere(mask)
Variable coor is an array of arrays. Converting it to list of lists
coor_list = [l.tolist() for l in list(coor)]
Converting the above to list of tuples
coor_tuples = [tuple(l) for l in coor_list]
I have an easy and rather naive way to find the 4 corners. I simply calculated the distance of each corner to every other corner. I preserved those corners whose distance exceeded a certain threshold.
Here is the code:
thresh = 50
def distance(pt1, pt2):
(x1, y1), (x2, y2) = pt1, pt2
dist = math.sqrt( (x2 - x1)**2 + (y2 - y1)**2 )
return dist
coor_tuples_copy = coor_tuples
i = 1
for pt1 in coor_tuples:
print(' I :', i)
for pt2 in coor_tuples[i::1]:
print(pt1, pt2)
print('Distance :', distance(pt1, pt2))
if(distance(pt1, pt2) < thresh):
Prior to running the snippet above coor_tuples had all corner points:
[(4, 42),
(4, 43),
(5, 43),
(5, 44),
(6, 44),
(7, 219),
(133, 36),
(133, 37),
(133, 38),
(134, 37),
(135, 224),
(135, 225),
(136, 225),
(136, 226),
(137, 225),
(137, 226),
(137, 227),
(138, 226)]
After running the snippet I was left with 4 corners:
[(4, 42), (7, 219), (133, 36), (135, 224)]
Now all you have to do is just mark these 4 points on a copy of the original image.
img2 = img.copy()
for pt in coor_tuples:, tuple(reversed(pt)), 3, (0, 0, 255), -1)
cv2.imshow('Image with 4 corners', img2)
Here's an implementation using cv2.goodFeaturesToTrack() to detect corners. The approach is
Convert image to grayscale
Perform canny edge detection
Detect corners
Optionally perform 4-point perspective transform to get top-down view of image
Using this starting image,
After converting to grayscale, we perform canny edge detection
Now that we have a decent binary image, we can use cv2.goodFeaturesToTrack()
corners = cv2.goodFeaturesToTrack(canny, 4, 0.5, 50)
For the parameters, we give it the canny image, set the maximum number of corners to 4 (maxCorners), use a minimum accepted quality of 0.5 (qualityLevel), and set the minimum possible Euclidean distance between the returned corners to 50 (minDistance). Here's the result
Now that we have identified the corners, we can perform a 4-point perspective transform to obtain a top-down view of the object. We first order the points clockwise then draw the result onto a mask.
Note: We could have just found contours on the Canny image instead of doing this step to create the mask, but pretend we only had the 4 corner points to work with
Next we find contours on this mask and filter using cv2.arcLength() and cv2.approxPolyDP(). The idea is that if the contour has 4 points, then it must be our object. Once we have this contour, we perform a perspective transform
Finally we rotate the image depending on the desired orientation. Here's the result
Code for only detecting corners
import cv2
image = cv2.imread('1.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
canny = cv2.Canny(gray, 120, 255, 1)
corners = cv2.goodFeaturesToTrack(canny,4,0.5,50)
for corner in corners:
x,y = corner.ravel(),(x,y),5,(36,255,12),-1)
cv2.imshow('canny', canny)
cv2.imshow('image', image)
Code for detecting corners and performing perspective transform
import cv2
import numpy as np
def rotate_image(image, angle):
# Grab the dimensions of the image and then determine the center
(h, w) = image.shape[:2]
(cX, cY) = (w / 2, h / 2)
# grab the rotation matrix (applying the negative of the
# angle to rotate clockwise), then grab the sine and cosine
# (i.e., the rotation components of the matrix)
M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
# Compute the new bounding dimensions of the image
nW = int((h * sin) + (w * cos))
nH = int((h * cos) + (w * sin))
# Adjust the rotation matrix to take into account translation
M[0, 2] += (nW / 2) - cX
M[1, 2] += (nH / 2) - cY
# Perform the actual rotation and return the image
return cv2.warpAffine(image, M, (nW, nH))
def order_points_clockwise(pts):
# sort the points based on their x-coordinates
xSorted = pts[np.argsort(pts[:, 0]), :]
# grab the left-most and right-most points from the sorted
# x-roodinate points
leftMost = xSorted[:2, :]
rightMost = xSorted[2:, :]
# now, sort the left-most coordinates according to their
# y-coordinates so we can grab the top-left and bottom-left
# points, respectively
leftMost = leftMost[np.argsort(leftMost[:, 1]), :]
(tl, bl) = leftMost
# now, sort the right-most coordinates according to their
# y-coordinates so we can grab the top-right and bottom-right
# points, respectively
rightMost = rightMost[np.argsort(rightMost[:, 1]), :]
(tr, br) = rightMost
# return the coordinates in top-left, top-right,
# bottom-right, and bottom-left order
return np.array([tl, tr, br, bl], dtype="int32")
def perspective_transform(image, corners):
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
image = cv2.imread('1.png')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
canny = cv2.Canny(gray, 120, 255, 1)
corners = cv2.goodFeaturesToTrack(canny,4,0.5,50)
c_list = []
for corner in corners:
x,y = corner.ravel()
c_list.append([int(x), int(y)]),(x,y),5,(36,255,12),-1)
corner_points = np.array([c_list[0], c_list[1], c_list[2], c_list[3]])
ordered_corner_points = order_points_clockwise(corner_points)
mask = np.zeros(image.shape, dtype=np.uint8)
cv2.fillPoly(mask, [ordered_corner_points], (255,255,255))
mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
cnts = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
if len(approx) == 4:
transformed = perspective_transform(original, approx)
result = rotate_image(transformed, -90)
cv2.imshow('canny', canny)
cv2.imshow('image', image)
cv2.imshow('mask', mask)
cv2.imshow('transformed', transformed)
cv2.imshow('result', result)
find contours with RETR_EXTERNAL option.(gray -> gaussian filter -> canny edge -> find contour)
find the largest size contour -> this will be the edge of the rectangle
find corners with little calculation
Mat m;//image file
findContours(m, contours_, hierachy_, RETR_EXTERNAL);
auto it = max_element(contours_.begin(), contours_.end(),
[](const vector<Point> &a, const vector<Point> &b) {
return a.size() < b.size(); });
Point2f xy[4] = {{9000,9000}, {0, 1000}, {1000, 0}, {0,0}};
for(auto &[x, y] : *it) {
if(x + y < xy[0].x + xy[0].y) xy[0] = {x, y};
if(x - y > xy[1].x - xy[1].y) xy[1] = {x, y};
if(y - x > xy[2].y - xy[2].x) xy[2] = {x, y};
if(x + y > xy[3].x + xy[3].y) xy[3] = {x, y};
xy[4] will be the four corners.
I was able to extract four corners this way.
Apply houghlines to the canny image - you will get a list of points
apply convex hull to this set of points
