Not getting results in OCR - Pytesseract - opencv

Just used the following code a OCR application. The OCR to be read is on a metal milled surface with a unique font. The below code work well for embossed surfaces but not engraved surfaces. I have tried tweaking the blur and dilate iterations, still no results.
How i can add new fonts into tesseract , if the unique font is an issue ?
Any tips on how i can get better results ?
import cv2
import numpy as np
import imutils
import pytesseract
# read image from disk
image = cv2.imread('test.jpg')
# make it gray
img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# blur it to remove noise
img = cv2.GaussianBlur(img, (7,7), 0)
# perform edge detection, then perform a dilation + erosion to
# close gaps in between object edges
edged = cv2.Canny(img, 40, 90)
dilate = cv2.dilate(edged, None, iterations=1)
# perform erosion if necessay, it completely depends on the image
#erode = cv2.erode(dilate, None, iterations=1)
# create an empty masks
mask = np.ones(img.shape[:2], dtype="uint8") * 255
# find contours
cnts = cv2.findContours(dilate.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[1] if imutils.is_cv2() else cnts[0]
orig = img.copy()
for c in cnts:
# if the contour is not sufficiently large, ignore it
if cv2.contourArea(c) < 300:
cv2.drawContours(mask, [c], -1, 0, -1)
x,y,w,h = cv2.boundingRect(c)
# filter more contours if nessesary
if(w>h):
cv2.drawContours(mask, [c], -1, 0, -1)
newimage = cv2.bitwise_and(dilate.copy(), dilate.copy(), mask=mask)
img2 = cv2.dilate(newimage, None, iterations=5)
ret2,th1 = cv2.threshold(img2 ,0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
pytesseract.pytesseract.tesseract_cmd = r'root\folder'
# Tesseract OCR on the image
temp = pytesseract.image_to_string(th1)
# Write results on the image
cv2.putText(image, temp, (100,100), cv2.FONT_HERSHEY_SIMPLEX, 1.8, (0,255,255), 3)
# show the outputs
cv2.imshow('Original image', cv2.resize(image,(640,480)))
cv2.imshow('Dilated', cv2.resize(dilate,(640,480)))
cv2.imshow('New Image', cv2.resize(newimage,(640,480)))
cv2.imshow('Inverted Threshold', cv2.resize(th1,(640,480)))
cv2.waitKey(0)
cv2.destroyAllWindows()

Related

Identify visible stones in the image - Any Approach in OpenCV & Deeplearning

I have samples images of stones present in the images. I need to identify the visible stones only. The approach which I tried is threshold based filtering and detecting cv2.contours. Also, I am looking into ENet Architecture for semantic segmentation based deep learning approach. The samples images are below.
Example image1:
Example image2:
The code which I tried for contour based detection is as below
image = cv2.imread(os.path.join(img_path, img_name2))
# threshold based customization
lower_bound = np.array([0, 0, 0])
upper_bound = np.array([250,55,100])
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
#masking the image using inRange() function
imagemask = cv2.inRange(hsv, lower_bound, upper_bound)
plt.figure(figsize=(20,10))
plt.imshow(imagemask, cmap="gray")
# erode and diluation to smoothen the edeges
final_mask = cv2.erode(imagemask, np.ones((3, 3), dtype=np.uint8))
final_mask = cv2.dilate(imagemask, np.ones((5, 5), dtype=np.uint8))
# find contours based on the mask
contours = cv2.findContours(final_mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
# draw contours
img_conts = cv2.drawContours(image.copy(), contours[0], -1, (0,255,0), 3)
plt.figure(figsize=(20,10))
plt.imshow(img_conts, cmap="gray")
The sample contours ouput. I know that the thresholds can be tuned for better results here.
But, what I am looking here for the any better approach or solution can work in this heavy environment for detection small particles like stones. Any ideas to solve in better way?
Here is how you can use the Canny edge detector to detect the rocks in your images:
import cv2
import numpy as np
def process(img):
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(img_gray, 103, 255, cv2.THRESH_BINARY)
img_blur = cv2.GaussianBlur(thresh, (23, 23), 0)
img_canny = cv2.Canny(img_blur, 65, 0)
img_dilate = cv2.dilate(img_canny, None, iterations=2)
return cv2.erode(img_dilate, None, iterations=2)
imgs = [cv2.imread("image1.jpg"), cv2.imread("image2.jpg")]
for i, img in enumerate(imgs):
contours = cv2.findContours(process(img), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[0]
cv2.drawContours(img, contours, -1, (0, 255, 0), 1)
cv2.imshow(str(i), img)
cv2.waitKey(0)
cv2.destroyAllWindows()
Output for sample images 1 and 2:
You can also tweak the parameters using OpenCV trackbars using the code below:
import cv2
import numpy as np
from random import randint, sample
def process(img, c_t1, c_t2):
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(img_gray, 103, 255, cv2.THRESH_BINARY)
img_blur = cv2.GaussianBlur(thresh, (23, 23), 0)
img_canny = cv2.Canny(img_blur, c_t1, c_t2)
img_dilate = cv2.dilate(img_canny, None, iterations=2)
return cv2.erode(img_dilate, None, iterations=2)
def show(imgs, win="Image", scale=1):
imgs = [cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) if len(img.shape) == 2 else img for img in imgs]
img_concat = np.concatenate(imgs, 1)
h, w = img_concat.shape[:2]
cv2.imshow(win, cv2.resize(img_concat, (int(w * scale), int(h * scale))))
d = {"Canny Threshold 1": (65, 500),
"Canny Threshold 2": (0, 500)}
imgs = [cv2.imread("image1.jpg"), cv2.imread("image2.jpg")]
cv2.namedWindow("Track Bars")
for i in d:
cv2.createTrackbar(i, "Track Bars", *d[i], id)
while True:
c_t1, c_t2 = (cv2.getTrackbarPos(i, "Track Bars") for i in d)
for i, img in enumerate(imgs):
img_copy = img.copy()
processed = process(img, c_t1, c_t2)
contours = cv2.findContours(processed, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[0]
cv2.drawContours(img_copy, contours, -1, (0, 255, 0), 1)
show([img_copy, processed], str(i))
if cv2.waitKey(1) & 0xFF == ord("q"):
break
cv2.destroyAllWindows()
Output:
(Click image to expand)

Pytesseract Not Recognising Text

I am trying to use Pytesseract to read the digits from the following image:
Low Resolution Image
Unfortunately, the program is not returning with any solution, even after using greyscale, thresholding, noise detection or canny edge detection. When using a config to whitelist only digits and $/, the program stops detecting even the high resolution image. (here)
The code is as follows:
class NumberAnalyser:
# boilerplate code to pre-process image
# get grayscale image
def get_grayscale(self, image):
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# noise removal
def remove_noise(self, image):
return cv2.medianBlur(image, 5)
# thresholding
def thresholding(self, image):
gray = self.get_grayscale(image)
(T, threshInv) = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
# visualize only the masked regions in the image
masked = cv2.bitwise_not(gray, gray, mask=threshInv)
ret, thresh1 = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
ret, thresh2 = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
ret, thresh3 = cv2.threshold(gray, 127, 255, cv2.THRESH_TRUNC)
ret, thresh4 = cv2.threshold(gray, 127, 255, cv2.THRESH_TOZERO)
ret, thresh5 = cv2.threshold(gray, 127, 255, cv2.THRESH_TOZERO_INV)
return thresh4
# dilation
def dilate(self, image):
kernel = np.ones((5, 5), np.uint8)
return cv2.dilate(image, kernel, iterations=1)
# erosion
def erode(self, image):
kernel = np.ones((5, 5), np.uint8)
return cv2.erode(image, kernel, iterations=1)
# opening - erosion followed by dilation
def opening(self, image):
kernel = np.ones((5, 5), np.uint8)
return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
# canny edge detection
def canny(self, image):
return cv2.Canny(image, 100, 200)
# skew correction
def deskew(self, image):
coords = np.column_stack(np.where(image > 0))
angle = cv2.minAreaRect(coords)[-1]
if angle < -45:
angle = -(90 + angle)
else:
angle = -angle
(h, w) = image.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
return rotated
# template matching
def match_template(self, image, template):
return cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED)
def numbers(self, img_path):
reader = cv2.imread(img_path)
# reader = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_RGB2BGR)'
gray = self.get_grayscale(reader)
thresh = self.thresholding(reader)
opening = self.opening(reader)
canny = self.canny(reader)
noiseless = self.remove_noise(reader)
# cv2.imshow('canny', canny)
# cv2.waitKey(0)
# cv2.imshow('gray', gray)
# cv2.waitKey(0)
cv2.imshow('threshold', thresh)
cv2.waitKey(0)
# cv2.imshow('opening', opening)
# cv2.waitKey(0)
# cv2.imshow('noise removal', noiseless)
# cv2.waitKey(0)
# cv2.imshow('og', reader)
# cv2.waitKey(0)
print('yes')
print(pt.image_to_string(thresh, config='--psm 11, -c tessedit_char_whitelist=$,0123456789'))
The --psm 11 configuration addition/deletion does not change anything.
Any help would be super appreciated!
You apply multiple simple thresholding consecutively, but you should also test it with other types of thresholding such as adaptive and inRange.
For example, if you use inRange thresholding for the given example:
The result for the high resolution image will be:
The output for the 0.38 version:
20000
4.000
100
The result for the low resolution image will be:
The output for the 0.38 version:
44.900
16.000
34
Unfortunately, only the middle number is recognized correctly. If you set the range values, the resulting image may give a better result.
For more read: Improving the quality of the output
Tesseract documentation
Code:
import cv2
import pytesseract
from numpy import array
img = cv2.imread("eO1XG.png") # Load the images: high-res: l9Zbt.png, low-res: eO1XG.png
img = cv2.cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
msk = cv2.inRange(img, array([94, 0, 196]), array([179, 84, 255])) # for low resolution
# msk = cv2.inRange(img, array([0, 0, 0]), array([179, 26, 255])) # for high resolution
krn = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 3))
dlt = cv2.dilate(msk, krn, iterations=1)
thr = 255 - cv2.bitwise_and(dlt, msk)
txt = pytesseract.image_to_string(thr, config='--psm 6 digits')
print(txt)
cv2.imshow("", thr)
cv2.waitKey(0)

OpenCV Python Contour Approximation

I want to detect a rectangle shape in the digital meter, to detect the shape contour approximation, but not able to find the exact contour of rectangle .I don't know where is the mistake .please have a look and suggest
digitalMeter.jpg
required-Output-digitalMeter-contour
import imutils
import cv2
image = cv2.imread('C:\\digitalMeter.jpg')
image = imutils.resize(image, height=500)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(blurred, 50, 200, 255)
cnts = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
displayCnt = None
for c in (cnts):
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
if len(approx) == 4:
print(displayCnt)[enter image description here][2]
displayCnt = approx
break
cv2.drawContours(image, [displayCnt], -1, (0, 230, 255), 6)
cv2.imshow('cnts', image)
cv2.waitKey(0)
Here is one way to do that in Python/OpenCV.
Read the input
Convert to gray
Threshold
Apply morphology to clean up the threshold image
Invert so that the meter is white on a black background
Find the contours and extract the largest (actually only) contour
Draw the contour on the input image
Save the results
Input:
import cv2
import numpy as np
# read image
img = cv2.imread('digital_meter.jpg')
hh, ww = img.shape[:2]
# convert to grayscale
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# threshold
thresh = cv2.threshold(gray,30,255,cv2.THRESH_BINARY)[1]
# apply close and open morphology
kernel = np.ones((3,3), np.uint8)
mask = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
kernel = np.ones((11,11), np.uint8)
mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
# invert
mask = 255 - mask
# get largest contour
contours = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
big_contour = max(contours, key=cv2.contourArea)
# draw green contour on input
contour_img = img.copy()
cv2.drawContours(contour_img,[big_contour],0,(0,255,0),2)
# save cropped image
cv2.imwrite('digital_meter_thresh.png',thresh)
cv2.imwrite('digital_meter_mask.png',mask)
cv2.imwrite('digital_meter_contour.png',contour_img)
# show the images
cv2.imshow("THRESH", thresh)
cv2.imshow("MASK", mask)
cv2.imshow("CONTOUR", contour_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
Threshold Image:
Morphology cleaned and inverted image:
Resulting contour on input:

How to do digit segmentation for hand-written account numbers & sort codes in OPENCV?

Is there a simple way that I can do digit segmentation from files like in the screenshot below?
I want to use OpenCV to do it as it's the library I'm using to do the rest of processing but other suggestions are welcomed.
Paper form:
Text box:
Simple OpenCV Contours method will not work here because digits are present in some sort of box template hence you need to detect boxes first check out this blog
Take this piece of code and adapt for your problem. It's not difficult with your situation:
import cv2
import numpy as np
# import image
image = cv2.imread('C:\\Users\\PC\\Desktop\\roi.png')
# grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
cv2.imshow('gray', gray)
# binary
ret, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
cv2.imshow('threshold', thresh)
# dilation
kernel = np.ones((10, 1), np.uint8)
img_dilation = cv2.dilate(thresh, kernel, iterations=1)
cv2.imshow('dilated', img_dilation)
# find contours
# cv2.findCountours() function changed from OpenCV3 to OpenCV4: now it have only two parameters instead of 3
cv2MajorVersion = cv2.__version__.split(".")[0]
# check for contours on thresh
if int(cv2MajorVersion) >= 4:
ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
else:
im2, ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# sort contours
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
for i, ctr in enumerate(sorted_ctrs):
# Get bounding box
x, y, w, h = cv2.boundingRect(ctr)
# Getting ROI
roi = image[y:y + h, x:x + w]
# show ROI
# cv2.imshow('segment no:'+str(i),roi)
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
if w > 15 and h > 15:
cv2.imwrite('C:\\Users\\PC\\Desktop\\output\\{}.png'.format(i), roi)
cv2.imshow('marked areas', image)
cv2.waitKey(0)
Source: https://lucians.dev/extract-roi-from-image-with-python-and-opencv

opencv area of contour

I am trying to find the contours of an animal from a picture. Let's assume it is a chicken. From the picture I could find its contours but they aren't closed. Also, I am getting a lot of noise from the background which is white ( same as the chicken).
I am using a simple code found on stackoverflow.
import numpy as np
import cv2
img = cv2.imread('lateral.jpg')
imgray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# blurred = cv2.GaussianBlur(imgray, (5, 5), 0)
# edged = cv2.Canny(blurred, 10, 11) # 10 and 40 to be more perceptive
# contours_canny= cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)[-2]
edges = cv2.Canny(imgray, 10,30)
cv2.imshow('edges', edges)
k = cv2.waitKey()
Is there a way to find just the contour of this chicken?
Thanks in advance.
Finding contour is quite easy. The problem is that your image has low contrast between the chicken and the background. So, your idea of using canny edges was not bad, it just needed some post processing.
I guess this is what you are looking for:
import cv2
import numpy as np
image = cv2.imread("./chicken.jpg", cv2.IMREAD_COLOR)
image = cv2.resize(image, (0,0), fx=0.5, fy=0.5)
imgray = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)[...,0]
edges = cv2.Canny(imgray, 10,30)
blurred = cv2.GaussianBlur(edges, (9, 9), 0)
clahe = cv2.createCLAHE(clipLimit=5.0, tileGridSize=(32,32))
contrast = clahe.apply(blurred)
ret, thresh = cv2.threshold(contrast, 20, 255, cv2.THRESH_BINARY|cv2.THRESH_OTSU)
_, contours, _ = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
maxArea = 0
best = None
for contour in contours:
area = cv2.contourArea(contour)
print (area)
if area > maxArea :
maxArea = area
best = contour
cv2.drawContours(image, [best], 0, (0, 0, 255), -1)
while True:
cv2.imshow("result", image)
k = cv2.waitKey(30) & 0xff
if k == 27:
break

Resources