Though I realize that there is no "one size fits all" setting for OpenCV's HoughCircles, I'm having quite a bit of trouble finding even one reasonable set of parameters.
My input image is the following photo, which contains some pretty obvious big black circles, as well as some noise around it:
I tried playing with the p1 and p2 arguments, to try and get precisely the four black circles detected (and optionally the tape roll at the top -- that's not required but I wouldn't mind if it matched either).
import numpy as np
import cv2
gray = frame = cv2.imread('testframe2.png')
gray = cv2.cvtColor(gray, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray,(5,5),0)
# gray = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 5, 2)
p1 = 200
p2 = 55
while True:
out = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1, 10, param1=p1, param2=p2, minRadius=10, maxRadius=0)
if circles is not None:
for (x, y, r) in circles[0]:
cv2.rectangle(out, (int(x - r), int(y - r)), (int(x + r), int(y + r)), (255, 0, 0))
cv2.putText(out, "r = %d" % int(r), (int(x + r), int(y)), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 0, 0))
cv2.putText(out, "p: (%d, %d)" % (p1, p2), (0, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 4)
cv2.imshow('debug', out)
if cv2.waitKey(0) & 0xFF == ord('x'):
break
elif cv2.waitKey(0) & 0xFF == ord('q'):
p1 += 5
elif cv2.waitKey(0) & 0xFF == ord('a'):
p1 -= 5
elif cv2.waitKey(0) & 0xFF == ord('w'):
p2 += 5
elif cv2.waitKey(0) & 0xFF == ord('s'):
p2 -= 5
cv2.destroyAllWindows()
It seems the best I can do is detect the big circle several times but not the small one at all, or get a lot of false positives:
I've Read The F** Manual but it does not help me further: how do I somewhat reliably detect the circles and nothing but the circles in this image?
There was a bit of manual tweaking with the HoughCircles params, but this gives the result you're looking for. I've used the OpenCV Wrapper library which just simplifies some things.
import cv2
import opencv_wrapper as cvw
import numpy as np
frame = cv2.imread("tape.png")
gray = cvw.bgr2gray(frame)
thresh = cvw.threshold_otsu(gray, inverse=True)
opened = cvw.morph_open(thresh, 9)
circles = cv2.HoughCircles(
opened, cv2.HOUGH_GRADIENT, 1, 10, param1=100, param2=17, minRadius=5, maxRadius=-1
)
if circles is not None:
circles = np.around(circles).astype(int)
for circle in circles[0]:
cv2.floodFill(thresh, None, (circle[0], circle[1]), 155)
only_circles = thresh.copy()
only_circles[only_circles != 155] = 0
contours = cvw.find_external_contours(only_circles)
cvw.draw_contours(frame, contours, (255, 0, 255), thickness=2)
cv2.imwrite("tape_result.png", frame)
I used HoughCircles to find just the centers, as suggested in the documentation note.
I then used floodFill to fill the circles. Note that the left-most circle is very close to the edge. If the image was blurred, the flood filling would go into the background.
Disclosure: I'm the author of OpenCV Wrapper. Haven't added Hough Circles and flood filling yet.
Related
I am currently working on lines extraction from a binary image. I initially performed a few image processing steps including threshold segmentation and obtained the following binary image.
As can be seen in the binary image the lines are splitted or broken. And I wanted to join the broken line as shown in the image below marked in red. I marked the red line manually for a demonstration.
FYI, I used the following code to perform the preprocessing.
img = cv2.imread('original_image.jpg') # loading image
gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # coverting to gray scale
median_filter = cv2.medianBlur (gray_image, ksize = 5) # median filtering
th, thresh = cv2.threshold (median_filter, median_filter.mean(), 255, cv2.THRESH_BINARY) # theshold segmentation
# small dots and noise removing
nlabels, labels, stats, centroids = cv2.connectedComponentsWithStats(thresh, None, None, None, 8, cv2.CV_32S)
areas = stats[1:,cv2.CC_STAT_AREA]
result = np.zeros((labels.shape), np.uint8)
min_size = 150
for i in range(0, nlabels - 1):
if areas[i] >= min_size: #keep
result[labels == i + 1] = 255
fig, ax = plt.subplots(2,1, figsize=(30,20))
ax[0].imshow(img)
ax[0].set_title('Original image')
ax[1].imshow(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
ax[1].set_title('preprocessed image')
I would really appreciate it if you have any suggestions or steps on how to connect the lines? Thank you
Using the following sequence of methods I was able to get a rough approximation. It is a very simple solution and might not work for all cases.
1. Morphological operations
To merge neighboring lines perform morphological (dilation) operations on the binary image.
img = cv2.imread('image_path', 0) # grayscale image
img1 = cv2.imread('image_path', 1) # color image
th = cv2.threshold(img, 150, 255, cv2.THRESH_BINARY)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (19, 19))
morph = cv2.morphologyEx(th, cv2.MORPH_DILATE, kernel)
2. Finding contours and extreme points
My idea now is to find contours.
Then find the extreme points of each contour.
Finally find the closest distance among these extreme points between neighboring contours. And draw a line between them.
cnts1 = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts1[0] # storing contours in a variable
Lets take a quick detour to visualize where these extreme points are present:
# visualize extreme points for each contour
for c in cnts:
left = tuple(c[c[:, :, 0].argmin()][0])
right = tuple(c[c[:, :, 0].argmax()][0])
top = tuple(c[c[:, :, 1].argmin()][0])
bottom = tuple(c[c[:, :, 1].argmax()][0])
# Draw dots onto image
cv2.circle(img1, left, 8, (0, 50, 255), -1)
cv2.circle(img1, right, 8, (0, 255, 255), -1)
cv2.circle(img1, top, 8, (255, 50, 0), -1)
cv2.circle(img1, bottom, 8, (255, 255, 0), -1)
(Note: The extreme points points are based of contours from morphological operations, but drawn on the original image)
3. Finding closest distances between neighboring contours
Sorry for the many loops.
First, iterate through every contour (split line) in the image.
Find the extreme points for them. Extreme points mean top-most, bottom-most, right-most and left-most points based on its respective bounding box.
Compare the distance between every extreme point of a contour with those of every other contour. And draw a line between points with the least distance.
for i in range(len(cnts)):
min_dist = max(img.shape[0], img.shape[1])
cl = []
ci = cnts[i]
ci_left = tuple(ci[ci[:, :, 0].argmin()][0])
ci_right = tuple(ci[ci[:, :, 0].argmax()][0])
ci_top = tuple(ci[ci[:, :, 1].argmin()][0])
ci_bottom = tuple(ci[ci[:, :, 1].argmax()][0])
ci_list = [ci_bottom, ci_left, ci_right, ci_top]
for j in range(i + 1, len(cnts)):
cj = cnts[j]
cj_left = tuple(cj[cj[:, :, 0].argmin()][0])
cj_right = tuple(cj[cj[:, :, 0].argmax()][0])
cj_top = tuple(cj[cj[:, :, 1].argmin()][0])
cj_bottom = tuple(cj[cj[:, :, 1].argmax()][0])
cj_list = [cj_bottom, cj_left, cj_right, cj_top]
for pt1 in ci_list:
for pt2 in cj_list:
dist = int(np.linalg.norm(np.array(pt1) - np.array(pt2))) #dist = sqrt( (x2 - x1)**2 + (y2 - y1)**2 )
if dist < min_dist:
min_dist = dist
cl = []
cl.append([pt1, pt2, min_dist])
if len(cl) > 0:
cv2.line(img1, cl[0][0], cl[0][1], (255, 255, 255), thickness = 5)
4. Post-processing
Since the final output is not perfect, you can perform additional morphology operations and then skeletonize it.
Note: I am migrating this question from Data Science Stack Exchange, where it received little exposure.
I am trying to implement an OCR solution to identify the numbers read from the picture of a screen.
I am adapting this pyimagesearch tutorial to my problem.
Because I am dealing with a dark background, I first invert the image, before converting it to grayscale and thresholding it:
inverted_cropped_image = cv2.bitwise_not(cropped_image)
gray = get_grayscale(inverted_cropped_image)
thresholded_image = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY)[1]
Then I call pytesseract's image_to_data function to output a dictionary containing the different text regions and their confidence intervals:
from pytesseract import Output
results = pytesseract.image_to_data(thresholded_image, output_type=Output.DICT)
Finally I iterate over results and plot them when their confidence exceeds a user defined threshold (70%). What bothers me, is that my script identifies everything in the image except the number that I would like to recognize (1227.938).
My first guess is that the image_to_data parameters are not set properly.
Checking this website, I selected a page segmentation mode (psm) of 11 (sparse text) and tried whitelisting numbers only (tessedit_char_whitelist=0123456789m.'):
results = pytesseract.image_to_data(thresholded_image, config='--psm 11 --oem 3 -c tessedit_char_whitelist=0123456789m.', output_type=Output.DICT)
Alas, this is even worse, and the script now identifies nothing at all!
Do you have any suggestion? Am I missing something obvious here?
EDIT #1:
At Ann Zen's request, here's the code used to obtain the first image:
import imutils
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pytesseract
from pytesseract import Output
def get_grayscale(image):
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
filename = "IMAGE.JPG"
cropped_image = cv2.imread(filename)
inverted_cropped_image = cv2.bitwise_not(cropped_image)
gray = get_grayscale(inverted_cropped_image)
thresholded_image = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY)[1]
results = pytesseract.image_to_data(thresholded_image, config='--psm 11 --oem 3 -c tessedit_char_whitelist=0123456789m.', output_type=Output.DICT)
color = (255, 255, 255)
for i in range(0, len(results["text"])):
x = results["left"][i]
y = results["top"][i]
w = results["width"][i]
h = results["height"][i]
text = results["text"][i]
conf = int(results["conf"][i])
print("Confidence: {}".format(conf))
if conf > 70:
print("Confidence: {}".format(conf))
print("Text: {}".format(text))
print("")
text = "".join([c if ord(c) < 128 else "" for c in text]).strip()
cv2.rectangle(cropped_image, (x, y), (x + w, y + h), color, 2)
cv2.putText(cropped_image, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX,1.2, color, 3)
cv2.imshow('Image', cropped_image)
cv2.waitKey(0)
EDIT #2:
Rarely have I spent reputation points so well! All three replies posted so far helped me refine my algorithm.
First, I wrote a Tkinter program allowing me to manually crop the image around the number of interest (modifying the one found in this SO post)
Then I used Ann Zen's idea of narrowing down the search area around the fractional part. I am using her nifty process function to prepare my grayscale image for contour extraction: contours, _ = cv2.findContours(process(img_gray), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE). I am using RETR_EXTERNAL to avoid dealing with overlapping bounding rectangles.
I then sorted my contours from left to right. Bounding rectangles exceeding a user-defined threshold are associated with the integral part (white rectangles); otherwise they are associated with the fractional part (black rectangles).
I then extracted the characters using Esraa's approach i.e. applying a Gaussian blur prior to calling Tesseract. I used a much larger kernel (15x15 vs 3x3) to achieve this.
I am not out of the woods yet, but hopefully I will get better results by using Ahx's adaptive thresholding.
The Concept
As you have probably heard, pytesseract is not good at detecting text of different sizes on the same line as one piece of text. In your case, you want to detect the 1227.938, where the 1227 is much larger than the .938.
One way to go about solving this is to have the program estimate where the .938 is, and enlarge that part of the image. After that, pytesseract will have no problem in returning the text.
The Code
import cv2
import numpy as np
import pytesseract
def process(img):
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(img_gray, 200, 255, cv2.THRESH_BINARY)
img_canny = cv2.Canny(thresh, 100, 100)
kernel = np.ones((3, 3))
img_dilate = cv2.dilate(img_canny, kernel, iterations=2)
return cv2.erode(img_dilate, kernel, iterations=2)
img = cv2.imread("image.png")
img_copy = img.copy()
hh = 50
contours, _ = cv2.findContours(process(img), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
for cnt in contours:
if 20 * hh < cv2.contourArea(cnt) < 30 * hh:
x, y, w, h = cv2.boundingRect(cnt)
ww = int(hh / h * w)
src_seg = img[y: y + h, x: x + w]
dst_seg = img_copy[y: y + hh, x: x + ww]
h_seg, w_seg = dst_seg.shape[:2]
dst_seg[:] = cv2.resize(src_seg, (ww, hh))[:h_seg, :w_seg]
gray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 180, 255, cv2.THRESH_BINARY)
results = pytesseract.image_to_data(thresh)
for b in map(str.split, results.splitlines()[1:]):
if len(b) == 12:
x, y, w, h = map(int, b[6: 10])
cv2.putText(img, b[11], (x, y + h + 15), cv2.FONT_HERSHEY_COMPLEX, 0.6, 0)
cv2.imshow("Result", img)
cv2.waitKey(0)
The Output
Here is the input image:
And here is the output image:
As you have said in your post, the only part you need the the decimal 1227.938. If you want to filter out the rest of the detected text, you can try tweaking some parameters. For example, replacing the 180 from _, thresh = cv2.threshold(gray, 180, 255, cv2.THRESH_BINARY) with 230 will result in the output image:
The Explanation
Import the necessary libraries:
import cv2
import numpy as np
import pytesseract
Define a function, process(), that will take in an image array, and return a binary image array that is the processed version of the image that will allow proper contour detection:
def process(img):
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(img_gray, 200, 255, cv2.THRESH_BINARY)
img_canny = cv2.Canny(thresh, 100, 100)
kernel = np.ones((3, 3))
img_dilate = cv2.dilate(img_canny, kernel, iterations=2)
return cv2.erode(img_dilate, kernel, iterations=2)
I'm sure that you don't have to do this, but due to a problem in my environment, I have to add pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe' before I can call the pytesseract.image_to_data() method, or it throws an error:
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
Read in the original image, make a copy of it, and define the rough height of the large part of the decimal:
img = cv2.imread("image.png")
img_copy = img.copy()
hh = 50
Detect the contours of the processed version of the image, and add a filter that roughly filters out the contours so that the small text remains:
contours, _ = cv2.findContours(process(img), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
for cnt in contours:
if 20 * hh < cv2.contourArea(cnt) < 30 * hh:
Define the bounding box of each contour that didn't get filtered out, and use the properties to enlarge those parts of the image to the height defined for the large text (making sure to also scale the width accordingly):
x, y, w, h = cv2.boundingRect(cnt)
ww = int(hh / h * w)
src_seg = img[y: y + h, x: x + w]
dst_seg = img_copy[y: y + hh, x: x + ww]
h_seg, w_seg = dst_seg.shape[:2]
dst_seg[:] = cv2.resize(src_seg, (ww, hh))[:h_seg, :w_seg]
Finally, we can use the pytesseract.image_to_data() method to detect the text. Of course, we'll need to threshold the image again:
gray = cv2.cvtColor(img_copy, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 180, 255, cv2.THRESH_BINARY)
results = pytesseract.image_to_data(thresh)
for b in map(str.split, results.splitlines()[1:]):
if len(b) == 12:
x, y, w, h = map(int, b[6: 10])
cv2.putText(img, b[11], (x, y + h + 15), cv2.FONT_HERSHEY_COMPLEX, 0.6, 0)
cv2.imshow("Result", img)
cv2.waitKey(0)
I have been working with Tesseract for quite some time, so let me clarify something for you. Tesseract is extremely helpful if you're trying to recognize text in documents more than any other computer vision projects. It usually needs a binarized image to get a good output. Therefore, you will always need some image pre-processing.
However, after several trials in the past with all page segmentation modes, I realized that it fails when font size differs on the same line without having a space. Sometimes PSM 6 is helpful if the difference is low, but in your condition, you may try an alternative. If you don't care about the decimals, you may try the following solution:
img = cv2.imread(r'E:\Downloads\Iwzrg.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_blur = cv2.GaussianBlur(gray, (3,3),0)
_,thresh = cv2.threshold(img_blur,200,255,cv2.THRESH_BINARY_INV)
# If using a fixed camera
new_img = thresh[0:100, 80:320]
text = pytesseract.image_to_string(new_img, lang='eng', config='--psm 6 --oem 3 -c tessedit_char_whitelist=0123456789')
OUTPUT: 1227
I would like to recommend applying another image processing method.
Because I am dealing with a dark background, I first invert the image, before converting it to grayscale and thresholding it:
You applied global thresholding and couldn't achieve the desired result.
Then you can apply either adaptive-thresholding or inRange
For the given image, if we apply the inRange threshold:
To be able to recognize the image as accurately as possible we can add a border to the top of the image and resize the image (Optional)
In the OCR section, check if the detected region contains a digit
if text.isdigit():
Then display on the image:
The result is nearly the desired value. Now you can try with the other suggested methods to find the exact value.
The problem is .938 recognized as 235, maybe resizing using different values might improve the result.
Code:
from cv2 import imread, cvtColor, COLOR_BGR2HSV as HSV, inRange, getStructuringElement, resize
from cv2 import imshow, waitKey, MORPH_RECT, dilate, bitwise_and, rectangle, putText
from cv2 import copyMakeBorder as addBorder, BORDER_CONSTANT as CONSTANT, FONT_HERSHEY_SIMPLEX
from numpy import array
from pytesseract import image_to_data, Output
bgr = imread("Iwzrg.png")
resized = resize(bgr, (800, 600), fx=0.75, fy=0.75)
bordered = addBorder(resized, 200, 0, 0, 0, CONSTANT, value=0)
hsv = cvtColor(bordered, HSV)
mask = inRange(hsv, array([0, 0, 250]), array([179, 255, 255]))
kernel = getStructuringElement(MORPH_RECT, (50, 30))
dilated = dilate(mask, kernel, iterations=1)
thresh = 255 - bitwise_and(dilated, mask)
data = image_to_data(thresh, output_type=Output.DICT)
for i in range(0, len(data["text"])):
x = data["left"][i]
y = data["top"][i]
w = data["width"][i]
h = data["height"][i]
text = data["text"][i]
if text.isdigit():
print("Text: {}".format(text))
print("")
text = "".join([c if ord(c) < 128 else "" for c in text]).strip()
rectangle(thresh, (x, y), (x + w, y + h), (0, 255, 0), 2)
putText(thresh, text, (x, y - 10), FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 255), 3)
imshow("", thresh)
waitKey(0)
I am trying to compute distance (in # of pixels) between two edges in an image. I have corrected for image perspective using cv2.warpPerspective method and have converted the resulting image into grayscale followed by filtering using gaussian blur. I have tried various thresholding methods and found out that cv2.ADAPTIVE_THRESH_GAUSSIAN works best. Other methods are too noisy or miss the second edge in the left side of the object as seen in result of adaptive gaussian thresholding.
import cv2
import numpy as np
import matplotlib.pyplot as plt
# Load the image
imgRoadvR10 = cv2.imread('sampleimage.jpg') # image is already corrected for perspective warp using cv2.warpPerspective
# convert to grayscale
imgRoadvR10_GrayPersp = cv2.cvtColor(imgRoadvR10, cv2.COLOR_BGR2GRAY)
# gaussian blur
a10lvR10_gblur = cv2.GaussianBlur(imgRoadvR10_GrayPersp,(5,5),0)
# Try different thresholding methods
ret,a10lvR10_th1 = cv2.threshold(a10lvR10_gblur,127,255,cv2.THRESH_BINARY)
a10lvR10_th2 = cv2.adaptiveThreshold(a10lvR10_gblur,255,cv2.ADAPTIVE_THRESH_MEAN_C,\
cv2.THRESH_BINARY,11,2)
a10lvR10_th3 = cv2.adaptiveThreshold(a10lvR10_gblur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
cv2.THRESH_BINARY_INV,11,2)
# Otsu's thresholding
ret2,a10lvR10_th4 = cv2.threshold(a10lvR10_gblur,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
print(ret2)
# Plot results
plt.figure()
titles = ['Original Image', 'Global Thresholding (v = 127)',
'Adaptive Mean Thresholding', 'Adaptive Gaussian Thresholding','OTSU Thresholding']
images = [a10lvR10_gblur, a10lvR10_th1, a10lvR10_th2, a10lvR10_th3, a10lvR10_th4]
for i in range(5):
plt.subplot(2,3,i+1),plt.imshow(images[i],'gray')
plt.title(titles[i])
plt.xticks([]),plt.yticks([])
plt.show()
Closer look at result of adaptive gaussian thresholding:
I want to find the width of this rectangular object. The width is measured from the second edge on the left side to the edge on the right side (see image below):
How can I measure the width? I have been reading upon morphological operations and edge detection, But not sure how to proceed next. Any suggestions will be appreciated
This is not the best idea and I think a more logical and simple solution can be obtained. However, this idea may help you.
import cv2
import numpy as np
#load image
im = cv2.imread("test3.jpg", 1)
#Convert to gray
mask = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
#convert to black and white
mask = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)[1]
#try to remove noise
#you can just use median blur or any other method
mask = cv2.erode(mask, np.ones((8, 0), "uint8"))
mask = cv2.dilate(mask, np.ones((32, 0), "uint8"))
mask = cv2.medianBlur(mask, 9)
#save cleaned image
cv2.imwrite("out1.jpg", mask)
A cleaner version of your output image:
out1:
Next we can get the coordinates of the lines. I got the coordinates of the first line from the left. I think you have to change the code a bit to get the coordinates of the sidebar.
h = len(mask) - 1
def count(row):
counter = 0
for i in range(0, len(row)):
if row[i] == 255:
break
counter += 1
return counter
def line(im, pt1, pt2, color, thickness):
im = cv2.line(
img=im,
pt1=pt1,
pt2=pt2,
color=color,
thickness=thickness,
lineType=cv2.LINE_AA,
)
return im
def center(x1, y1, x2, y2):
return (int((x1 + x2) / 2), int((y1 + y2) / 2))
topLeft = count(mask[0])
bottomLeft = count(mask[h])
# to shadow and hide the old left line
mask = line(mask, (topLeft, 0), (bottomLeft, h), (0, 0, 0), 80)
topRight = count(mask[0])
bottomRight = count(mask[h])
# to shadow and hide the old right line
mask = line(mask, (topRight, 0), (bottomRight, h), (0, 0, 0), 80)
mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)
# to draw new clean left line
mask = line(mask, (topLeft, 0), (bottomLeft, h), (128, 0, 255), 25)
# to draw new clean right line
mask = line(mask, (topRight, 0), (bottomRight, h), (128, 0, 255), 25)
a = center(topLeft, 0, bottomLeft, h)
b = center(topRight, 0, bottomRight, h)
mask = line(mask, a, b, (128, 0, 255), 25)
cv2.imwrite("out2.jpg", mask)
out2:
Now you can calculate the distance between "a" and "b".
I'm working on a computer vision project that understands mathematical signs. It detects all correctly except the "Equal" mark. But the "equal" sign is perceived as two separate "minus" signs.
image = cv2.imread('./deneme.png')
grey = cv2.cvtColor(image.copy(), cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(grey.copy(), 0, 255, cv2.THRESH_BINARY_INV)
contours, _ = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
preprocessed_digits = []
for c in contours:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x,y), (x+w, y+h), color=(0, 255, 0), thickness=2)
digit = thresh[y:y+h, x:x+w]
digit = make_square(digit)
preprocessed_digits.append(digit)
plt.imshow(image, cmap="gray")
plt.show()
I have no idea how to fix this problem. How can I detect the equal sign?
Is that the result you're trying to achieve?
Since it seems that your math expressions are written horizontally, as #Micka points out, you have very strong priors on the relationship between the two different components of an equal sign, so there is a straight-forward (but hacky) way to detect when to minus signs are actually an equal:
import cv2
import matplotlib.pyplot as plt
import numpy as np
class Rect:
def __init__(self,
a,
b,
c,
d):
self.a=a
self.b=b
self.c=c
self.d=d
self.center=np.array([(a+c)/2, (b+d)/2])
def merge_rectangles(r_1, r_2):
a_m= min(r_1.a, r_2.a)
b_m= min(r_1.b, r_2.b)
c_M= max(r_1.c, r_2.c)
d_M= max(r_1.d, r_2.d)
return Rect(a_m, b_m, c_M, d_M)
def they_form_an_equal_sign(rect_1,
rect_2,
tol_w=10,#in pixels
tol_h=10):
#check if the bounding boxes approximately align
b0 = (np.abs(rect_1.a - rect_2.a) < tol_w ) and (np.abs(rect_1.c - rect_2.c) < tol_w)
#check if the bounding boxes have approximately the same height
h1 = np.abs(rect_1.d - rect_1.b)
h2 = np.abs(rect_2.d - rect_2.b)
b1 = np.abs(h1-h2) < tol_h
return b0 and b1
image = cv2.imread('/tmp/m.png')
grey = cv2.cvtColor(image.copy(), cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(grey.copy(), 0, 255, cv2.THRESH_BINARY_INV)
contours, _ = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
candidate_rectangles=[]
for c in contours:
x,y,w,h = cv2.boundingRect(c)
candidate_rectangles.append(Rect(x,y,x+w,y+h))
kept=np.ones(len(candidate_rectangles))
new_rectangles=[]
for i in range(len(candidate_rectangles)):
for j in range(i+1,len(candidate_rectangles)):
b=they_form_an_equal_sign(candidate_rectangles[i], candidate_rectangles[j])
if b:
new_rect=merge_rectangles(candidate_rectangles[i], candidate_rectangles[j])
new_rectangles.append(new_rect)
kept[i]=0
kept[j]=0
for i in range(len(kept)):
if kept[i]:
rect=candidate_rectangles[i]
cv2.rectangle(image, (rect.a, rect.b), (rect.c, rect.d), color=(0, 255, 0), thickness=2)
for rect in new_rectangles:
cv2.rectangle(image, (rect.a, rect.b), (rect.c, rect.d), color=(0, 255, 0), thickness=2)
plt.imshow(image, cmap="gray")
plt.show()
Basically, this takes the brute-force approach of comparing every two bounding boxes that have been detected in your code, and merging them into a larger one if they meet the requirements from your prior: i.e. if they are horizontally algined (as the top and bottom parts of a minus sign should) and if their height is approximately the same.
However, this is obviously not robust: you need to adjust thresholds and the entire thing will fall apart if your expressions aren't horizontal and clearly separated. If you want a more robust/useful system, basic ML approaches for character recognition are a better place to start.
I am interested in the human following using a real robot.
I'd like to use the color of clothes as a key feature to identify the target person in front of the robot to follow him/ her but I am suffering due to it is a weak feature with a very simple illumination changing. So, I need to alter this algorithm to another or update values (RGB) online in real-time but I don't have enough experience with image processing.
this is my full code for color detection:
import cv2
import numpy as np
from imutils.video import FPS
# capturing video through webcam
import time
cap = cv2.VideoCapture(0)
width = cap.get(3) # float
height = cap.get(4) # float
print width, height
time.sleep(2.0)
fps = FPS().start()
while (1):
_, img = cap.read()
if _ is True:
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
else:
continue
# blue color
blue_lower = np.array([99, 115, 150], np.uint8)
blue_upper = np.array([110, 255, 255], np.uint8)
blue = cv2.inRange(hsv, blue_lower, blue_upper)
kernal = np.ones((5, 5), "uint8")
blue = cv2.dilate(blue, kernal)
res_blue = cv2.bitwise_and(img, img, mask=blue)
# Tracking blue
(_, contours, hierarchy) = cv2.findContours(blue, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
for pic, contour in enumerate(contours):
area = cv2.contourArea(contour)
if (area > 300):
x, y, w, h = cv2.boundingRect(contour)
img = cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2)
cv2.putText(img, "Blue Colour", (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0))
cv2.imshow("Color Tracking", img)
if cv2.waitKey(10) & 0xFF == ord('q'):
cap.release()
cv2.destroyAllWindows()
break
fps.update()
# stop the timer and display FPS information
fps.stop()
# print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
# print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))
these are the outputs:
1- recognize a person by his clothes color
2- it is lost, the illumination changing is a very simple not severe
Any ideas or suggestions will be appreciated
It does look like you need to use a bit more advanced color similarity function to handle complex cases. Delta E will be the right starting point.
Proper threshold or several colors with associated thresholds will help to achieve pretty accurate results:
See the list of colours on the right side
Complete example.