How to use opencv to perfectly extract the digits text from the below image? The color of the text are dynamic.
It's simple. The follow code can be highly optimized (I did it fast - #Silencer could do this) and work (tested) also with other images (for some you will have to tweak some values).
import cv2
import numpy as np
# import image
image = cv2.imread('image.png')
cv2.imshow('original', image)
cv2.waitKey(0)
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
cv2.imshow('hsv', hsv[:, :, 1])
cv2.waitKey(0)
# this calculate the histogram of the image you input
# if this is under/below a certain value (which depend of the colors in the image), a certain thresh will be choosed among another
hist, bins = np.histogram(hsv.ravel(), 256, [0, 256])
print(hist[-1])
if hist[-1] > 15000:
# binary
ret, thresh = cv2.threshold(hsv[:, :, 0], 55, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
cv2.imshow('second', thresh)
cv2.waitKey(0)
# dilation
kernel = np.ones((1, 1), np.uint8)
img_dilation = cv2.dilate(thresh, kernel, iterations=1)
cv2.imshow('dilated', img_dilation)
cv2.waitKey(0)
# find contours
im2, ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# sort contours
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
for i, ctr in enumerate(sorted_ctrs):
# Get bounding box
x, y, w, h = cv2.boundingRect(ctr)
# Getting ROI
roi = image[y:y + h, x:x + w]
# show ROI
# cv2.imshow('segment no:'+str(i),roi)
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
# cv2.waitKey(0)
if w > 15 and h > 15:
cv2.imwrite('roi{}.png'.format(i), roi)
cv2.imshow('marked areas', image)
cv2.waitKey(0)
else:
# binary
ret, thresh = cv2.threshold(hsv[:, :, 0], 55, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
cv2.imshow('second', thresh)
cv2.waitKey(0)
# dilation
kernel = np.ones((1, 1), np.uint8)
img_dilation = cv2.dilate(thresh, kernel, iterations=1)
cv2.imshow('dilated', img_dilation)
cv2.waitKey(0)
# find contours
im2, ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# sort contours
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
for i, ctr in enumerate(sorted_ctrs):
# Get bounding box
x, y, w, h = cv2.boundingRect(ctr)
# Getting ROI
roi = image[y:y + h, x:x + w]
# show ROI
# cv2.imshow('segment no:'+str(i),roi)
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
# cv2.waitKey(0)
if w > 15 and h > 15:
cv2.imwrite('roi{}.png'.format(i), roi)
cv2.imshow('marked areas', image)
cv2.waitKey(0)
I would look at this post from pyimage search which essentially does this for you: https://www.pyimagesearch.com/2017/07/17/credit-card-ocr-with-opencv-and-python/. There are definitely classifiers that work better but it's a good starting point.
Related
I have samples images of stones present in the images. I need to identify the visible stones only. The approach which I tried is threshold based filtering and detecting cv2.contours. Also, I am looking into ENet Architecture for semantic segmentation based deep learning approach. The samples images are below.
Example image1:
Example image2:
The code which I tried for contour based detection is as below
image = cv2.imread(os.path.join(img_path, img_name2))
# threshold based customization
lower_bound = np.array([0, 0, 0])
upper_bound = np.array([250,55,100])
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
#masking the image using inRange() function
imagemask = cv2.inRange(hsv, lower_bound, upper_bound)
plt.figure(figsize=(20,10))
plt.imshow(imagemask, cmap="gray")
# erode and diluation to smoothen the edeges
final_mask = cv2.erode(imagemask, np.ones((3, 3), dtype=np.uint8))
final_mask = cv2.dilate(imagemask, np.ones((5, 5), dtype=np.uint8))
# find contours based on the mask
contours = cv2.findContours(final_mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
# draw contours
img_conts = cv2.drawContours(image.copy(), contours[0], -1, (0,255,0), 3)
plt.figure(figsize=(20,10))
plt.imshow(img_conts, cmap="gray")
The sample contours ouput. I know that the thresholds can be tuned for better results here.
But, what I am looking here for the any better approach or solution can work in this heavy environment for detection small particles like stones. Any ideas to solve in better way?
Here is how you can use the Canny edge detector to detect the rocks in your images:
import cv2
import numpy as np
def process(img):
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(img_gray, 103, 255, cv2.THRESH_BINARY)
img_blur = cv2.GaussianBlur(thresh, (23, 23), 0)
img_canny = cv2.Canny(img_blur, 65, 0)
img_dilate = cv2.dilate(img_canny, None, iterations=2)
return cv2.erode(img_dilate, None, iterations=2)
imgs = [cv2.imread("image1.jpg"), cv2.imread("image2.jpg")]
for i, img in enumerate(imgs):
contours = cv2.findContours(process(img), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[0]
cv2.drawContours(img, contours, -1, (0, 255, 0), 1)
cv2.imshow(str(i), img)
cv2.waitKey(0)
cv2.destroyAllWindows()
Output for sample images 1 and 2:
You can also tweak the parameters using OpenCV trackbars using the code below:
import cv2
import numpy as np
from random import randint, sample
def process(img, c_t1, c_t2):
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(img_gray, 103, 255, cv2.THRESH_BINARY)
img_blur = cv2.GaussianBlur(thresh, (23, 23), 0)
img_canny = cv2.Canny(img_blur, c_t1, c_t2)
img_dilate = cv2.dilate(img_canny, None, iterations=2)
return cv2.erode(img_dilate, None, iterations=2)
def show(imgs, win="Image", scale=1):
imgs = [cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) if len(img.shape) == 2 else img for img in imgs]
img_concat = np.concatenate(imgs, 1)
h, w = img_concat.shape[:2]
cv2.imshow(win, cv2.resize(img_concat, (int(w * scale), int(h * scale))))
d = {"Canny Threshold 1": (65, 500),
"Canny Threshold 2": (0, 500)}
imgs = [cv2.imread("image1.jpg"), cv2.imread("image2.jpg")]
cv2.namedWindow("Track Bars")
for i in d:
cv2.createTrackbar(i, "Track Bars", *d[i], id)
while True:
c_t1, c_t2 = (cv2.getTrackbarPos(i, "Track Bars") for i in d)
for i, img in enumerate(imgs):
img_copy = img.copy()
processed = process(img, c_t1, c_t2)
contours = cv2.findContours(processed, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[0]
cv2.drawContours(img_copy, contours, -1, (0, 255, 0), 1)
show([img_copy, processed], str(i))
if cv2.waitKey(1) & 0xFF == ord("q"):
break
cv2.destroyAllWindows()
Output:
(Click image to expand)
I am trying to use Pytesseract to read the digits from the following image:
Low Resolution Image
Unfortunately, the program is not returning with any solution, even after using greyscale, thresholding, noise detection or canny edge detection. When using a config to whitelist only digits and $/, the program stops detecting even the high resolution image. (here)
The code is as follows:
class NumberAnalyser:
# boilerplate code to pre-process image
# get grayscale image
def get_grayscale(self, image):
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# noise removal
def remove_noise(self, image):
return cv2.medianBlur(image, 5)
# thresholding
def thresholding(self, image):
gray = self.get_grayscale(image)
(T, threshInv) = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
# visualize only the masked regions in the image
masked = cv2.bitwise_not(gray, gray, mask=threshInv)
ret, thresh1 = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
ret, thresh2 = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
ret, thresh3 = cv2.threshold(gray, 127, 255, cv2.THRESH_TRUNC)
ret, thresh4 = cv2.threshold(gray, 127, 255, cv2.THRESH_TOZERO)
ret, thresh5 = cv2.threshold(gray, 127, 255, cv2.THRESH_TOZERO_INV)
return thresh4
# dilation
def dilate(self, image):
kernel = np.ones((5, 5), np.uint8)
return cv2.dilate(image, kernel, iterations=1)
# erosion
def erode(self, image):
kernel = np.ones((5, 5), np.uint8)
return cv2.erode(image, kernel, iterations=1)
# opening - erosion followed by dilation
def opening(self, image):
kernel = np.ones((5, 5), np.uint8)
return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
# canny edge detection
def canny(self, image):
return cv2.Canny(image, 100, 200)
# skew correction
def deskew(self, image):
coords = np.column_stack(np.where(image > 0))
angle = cv2.minAreaRect(coords)[-1]
if angle < -45:
angle = -(90 + angle)
else:
angle = -angle
(h, w) = image.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
return rotated
# template matching
def match_template(self, image, template):
return cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED)
def numbers(self, img_path):
reader = cv2.imread(img_path)
# reader = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_RGB2BGR)'
gray = self.get_grayscale(reader)
thresh = self.thresholding(reader)
opening = self.opening(reader)
canny = self.canny(reader)
noiseless = self.remove_noise(reader)
# cv2.imshow('canny', canny)
# cv2.waitKey(0)
# cv2.imshow('gray', gray)
# cv2.waitKey(0)
cv2.imshow('threshold', thresh)
cv2.waitKey(0)
# cv2.imshow('opening', opening)
# cv2.waitKey(0)
# cv2.imshow('noise removal', noiseless)
# cv2.waitKey(0)
# cv2.imshow('og', reader)
# cv2.waitKey(0)
print('yes')
print(pt.image_to_string(thresh, config='--psm 11, -c tessedit_char_whitelist=$,0123456789'))
The --psm 11 configuration addition/deletion does not change anything.
Any help would be super appreciated!
You apply multiple simple thresholding consecutively, but you should also test it with other types of thresholding such as adaptive and inRange.
For example, if you use inRange thresholding for the given example:
The result for the high resolution image will be:
The output for the 0.38 version:
20000
4.000
100
The result for the low resolution image will be:
The output for the 0.38 version:
44.900
16.000
34
Unfortunately, only the middle number is recognized correctly. If you set the range values, the resulting image may give a better result.
For more read: Improving the quality of the output
Tesseract documentation
Code:
import cv2
import pytesseract
from numpy import array
img = cv2.imread("eO1XG.png") # Load the images: high-res: l9Zbt.png, low-res: eO1XG.png
img = cv2.cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
msk = cv2.inRange(img, array([94, 0, 196]), array([179, 84, 255])) # for low resolution
# msk = cv2.inRange(img, array([0, 0, 0]), array([179, 26, 255])) # for high resolution
krn = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 3))
dlt = cv2.dilate(msk, krn, iterations=1)
thr = 255 - cv2.bitwise_and(dlt, msk)
txt = pytesseract.image_to_string(thr, config='--psm 6 digits')
print(txt)
cv2.imshow("", thr)
cv2.waitKey(0)
I have a problem with the contour, i.e. the mask I get after that. I am bothered by all the peaks, I marked them with a red circle, which remain after the graph. Is there any easy way to get rid of them?
h, w = img.shape[:2]
mask = np.zeros((h, w), np.uint8)
# Transform to gray colorspace and threshold the image
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# Search for contours and select the biggest one and draw it on mask
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
cntsSorted = sorted(contours, key=lambda x: cv2.contourArea(x), reverse=True)
for n in range(0,2):
cnt = cntsSorted[n]
cv2.drawContours(mask, [cnt], 0, 255, -1)
# Perform a bitwise operation
color = cv2.bitwise_and(img, img, mask=mask)
x, y, w, h = cv2.boundingRect(cnt)
crop = color[y:(y + h)-3, x+3:x + w]
Is there a simple way that I can do digit segmentation from files like in the screenshot below?
I want to use OpenCV to do it as it's the library I'm using to do the rest of processing but other suggestions are welcomed.
Paper form:
Text box:
Simple OpenCV Contours method will not work here because digits are present in some sort of box template hence you need to detect boxes first check out this blog
Take this piece of code and adapt for your problem. It's not difficult with your situation:
import cv2
import numpy as np
# import image
image = cv2.imread('C:\\Users\\PC\\Desktop\\roi.png')
# grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
cv2.imshow('gray', gray)
# binary
ret, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
cv2.imshow('threshold', thresh)
# dilation
kernel = np.ones((10, 1), np.uint8)
img_dilation = cv2.dilate(thresh, kernel, iterations=1)
cv2.imshow('dilated', img_dilation)
# find contours
# cv2.findCountours() function changed from OpenCV3 to OpenCV4: now it have only two parameters instead of 3
cv2MajorVersion = cv2.__version__.split(".")[0]
# check for contours on thresh
if int(cv2MajorVersion) >= 4:
ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
else:
im2, ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# sort contours
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
for i, ctr in enumerate(sorted_ctrs):
# Get bounding box
x, y, w, h = cv2.boundingRect(ctr)
# Getting ROI
roi = image[y:y + h, x:x + w]
# show ROI
# cv2.imshow('segment no:'+str(i),roi)
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
if w > 15 and h > 15:
cv2.imwrite('C:\\Users\\PC\\Desktop\\output\\{}.png'.format(i), roi)
cv2.imshow('marked areas', image)
cv2.waitKey(0)
Source: https://lucians.dev/extract-roi-from-image-with-python-and-opencv
I have the above number plate image with me. My goal is to segment each character individually and pass into my neural network. I have tried to find countours and use bounding rectangles to segment these characters using the following code:
img = cv2.imread('download.jpeg')
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray_img = cv2.GaussianBlur(gray_img, (5,5), 0)
ret, im_th = cv2.threshold(gray_img, 90, 255, cv2.THRESH_BINARY_INV)
im_th = cv2.adaptiveThreshold(gray_img, 255,
cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY,75, 10)
im_th = cv2.bitwise_not(im_th)
ctrs, hier = cv2.findContours(im_th.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
cv2.drawContours(img, ctrs, -1, (0,255,0), 3)
rects = [cv2.boundingRect(ctr) for ctr in ctrs]
print len(rects)
for rect in rects:
cv2.rectangle(img,(rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (0,255,0),3)
length = int(rect[3] * 1.6)
pt1 = int(rect[1] + rect[3] // 2 - length // 2)
pt2 = int(rect[0] + rect[2] // 2 - length // 2)
roi = img[pt1:pt1+length, pt2:pt2+length]
The above code creates regions that include bounding rectangles other than the characters. Although I could manually filter out these regions, it would vary from image to image. How would I go about this if I need to only extract the regions with characters?
#read image
img = cv2.imread('input_image.png')
#grayscale
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
cv2.waitKey(0)
#binarize
ret,thresh = cv2.threshold(gray,127,255,cv2.THRESH_BINARY_INV)
cv2.waitKey(0)
#find contours
im2,ctrs, hier = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
#sort contours
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
for i, ctr in enumerate(sorted_ctrs):
# Get bounding box
x, y, w, h = cv2.boundingRect(ctr)
# Getting ROI
roi = img[y:y+h, x:x+w]
# show ROI
#cv2.imwrite('roi_imgs.png', roi)
cv2.imshow('charachter'+str(i), roi)
cv2.rectangle(img,(x,y),( x + w, y + h ),(90,0,255),2)
cv2.waitKey(0)
cv2.imshow('marked areas',img)
cv2.waitKey(0)