Pytesseract Not Recognising Text - opencv

I am trying to use Pytesseract to read the digits from the following image:
Low Resolution Image
Unfortunately, the program is not returning with any solution, even after using greyscale, thresholding, noise detection or canny edge detection. When using a config to whitelist only digits and $/, the program stops detecting even the high resolution image. (here)
The code is as follows:
class NumberAnalyser:
# boilerplate code to pre-process image
# get grayscale image
def get_grayscale(self, image):
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# noise removal
def remove_noise(self, image):
return cv2.medianBlur(image, 5)
# thresholding
def thresholding(self, image):
gray = self.get_grayscale(image)
(T, threshInv) = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
# visualize only the masked regions in the image
masked = cv2.bitwise_not(gray, gray, mask=threshInv)
ret, thresh1 = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
ret, thresh2 = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
ret, thresh3 = cv2.threshold(gray, 127, 255, cv2.THRESH_TRUNC)
ret, thresh4 = cv2.threshold(gray, 127, 255, cv2.THRESH_TOZERO)
ret, thresh5 = cv2.threshold(gray, 127, 255, cv2.THRESH_TOZERO_INV)
return thresh4
# dilation
def dilate(self, image):
kernel = np.ones((5, 5), np.uint8)
return cv2.dilate(image, kernel, iterations=1)
# erosion
def erode(self, image):
kernel = np.ones((5, 5), np.uint8)
return cv2.erode(image, kernel, iterations=1)
# opening - erosion followed by dilation
def opening(self, image):
kernel = np.ones((5, 5), np.uint8)
return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
# canny edge detection
def canny(self, image):
return cv2.Canny(image, 100, 200)
# skew correction
def deskew(self, image):
coords = np.column_stack(np.where(image > 0))
angle = cv2.minAreaRect(coords)[-1]
if angle < -45:
angle = -(90 + angle)
else:
angle = -angle
(h, w) = image.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
return rotated
# template matching
def match_template(self, image, template):
return cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED)
def numbers(self, img_path):
reader = cv2.imread(img_path)
# reader = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_RGB2BGR)'
gray = self.get_grayscale(reader)
thresh = self.thresholding(reader)
opening = self.opening(reader)
canny = self.canny(reader)
noiseless = self.remove_noise(reader)
# cv2.imshow('canny', canny)
# cv2.waitKey(0)
# cv2.imshow('gray', gray)
# cv2.waitKey(0)
cv2.imshow('threshold', thresh)
cv2.waitKey(0)
# cv2.imshow('opening', opening)
# cv2.waitKey(0)
# cv2.imshow('noise removal', noiseless)
# cv2.waitKey(0)
# cv2.imshow('og', reader)
# cv2.waitKey(0)
print('yes')
print(pt.image_to_string(thresh, config='--psm 11, -c tessedit_char_whitelist=$,0123456789'))
The --psm 11 configuration addition/deletion does not change anything.
Any help would be super appreciated!

You apply multiple simple thresholding consecutively, but you should also test it with other types of thresholding such as adaptive and inRange.
For example, if you use inRange thresholding for the given example:
The result for the high resolution image will be:
The output for the 0.38 version:
20000
4.000
100
The result for the low resolution image will be:
The output for the 0.38 version:
44.900
16.000
34
Unfortunately, only the middle number is recognized correctly. If you set the range values, the resulting image may give a better result.
For more read: Improving the quality of the output
Tesseract documentation
Code:
import cv2
import pytesseract
from numpy import array
img = cv2.imread("eO1XG.png") # Load the images: high-res: l9Zbt.png, low-res: eO1XG.png
img = cv2.cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
msk = cv2.inRange(img, array([94, 0, 196]), array([179, 84, 255])) # for low resolution
# msk = cv2.inRange(img, array([0, 0, 0]), array([179, 26, 255])) # for high resolution
krn = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 3))
dlt = cv2.dilate(msk, krn, iterations=1)
thr = 255 - cv2.bitwise_and(dlt, msk)
txt = pytesseract.image_to_string(thr, config='--psm 6 digits')
print(txt)
cv2.imshow("", thr)
cv2.waitKey(0)

Related

Not getting results in OCR - Pytesseract

Just used the following code a OCR application. The OCR to be read is on a metal milled surface with a unique font. The below code work well for embossed surfaces but not engraved surfaces. I have tried tweaking the blur and dilate iterations, still no results.
How i can add new fonts into tesseract , if the unique font is an issue ?
Any tips on how i can get better results ?
import cv2
import numpy as np
import imutils
import pytesseract
# read image from disk
image = cv2.imread('test.jpg')
# make it gray
img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# blur it to remove noise
img = cv2.GaussianBlur(img, (7,7), 0)
# perform edge detection, then perform a dilation + erosion to
# close gaps in between object edges
edged = cv2.Canny(img, 40, 90)
dilate = cv2.dilate(edged, None, iterations=1)
# perform erosion if necessay, it completely depends on the image
#erode = cv2.erode(dilate, None, iterations=1)
# create an empty masks
mask = np.ones(img.shape[:2], dtype="uint8") * 255
# find contours
cnts = cv2.findContours(dilate.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[1] if imutils.is_cv2() else cnts[0]
orig = img.copy()
for c in cnts:
# if the contour is not sufficiently large, ignore it
if cv2.contourArea(c) < 300:
cv2.drawContours(mask, [c], -1, 0, -1)
x,y,w,h = cv2.boundingRect(c)
# filter more contours if nessesary
if(w>h):
cv2.drawContours(mask, [c], -1, 0, -1)
newimage = cv2.bitwise_and(dilate.copy(), dilate.copy(), mask=mask)
img2 = cv2.dilate(newimage, None, iterations=5)
ret2,th1 = cv2.threshold(img2 ,0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
pytesseract.pytesseract.tesseract_cmd = r'root\folder'
# Tesseract OCR on the image
temp = pytesseract.image_to_string(th1)
# Write results on the image
cv2.putText(image, temp, (100,100), cv2.FONT_HERSHEY_SIMPLEX, 1.8, (0,255,255), 3)
# show the outputs
cv2.imshow('Original image', cv2.resize(image,(640,480)))
cv2.imshow('Dilated', cv2.resize(dilate,(640,480)))
cv2.imshow('New Image', cv2.resize(newimage,(640,480)))
cv2.imshow('Inverted Threshold', cv2.resize(th1,(640,480)))
cv2.waitKey(0)
cv2.destroyAllWindows()

Identify visible stones in the image - Any Approach in OpenCV & Deeplearning

I have samples images of stones present in the images. I need to identify the visible stones only. The approach which I tried is threshold based filtering and detecting cv2.contours. Also, I am looking into ENet Architecture for semantic segmentation based deep learning approach. The samples images are below.
Example image1:
Example image2:
The code which I tried for contour based detection is as below
image = cv2.imread(os.path.join(img_path, img_name2))
# threshold based customization
lower_bound = np.array([0, 0, 0])
upper_bound = np.array([250,55,100])
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
#masking the image using inRange() function
imagemask = cv2.inRange(hsv, lower_bound, upper_bound)
plt.figure(figsize=(20,10))
plt.imshow(imagemask, cmap="gray")
# erode and diluation to smoothen the edeges
final_mask = cv2.erode(imagemask, np.ones((3, 3), dtype=np.uint8))
final_mask = cv2.dilate(imagemask, np.ones((5, 5), dtype=np.uint8))
# find contours based on the mask
contours = cv2.findContours(final_mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
# draw contours
img_conts = cv2.drawContours(image.copy(), contours[0], -1, (0,255,0), 3)
plt.figure(figsize=(20,10))
plt.imshow(img_conts, cmap="gray")
The sample contours ouput. I know that the thresholds can be tuned for better results here.
But, what I am looking here for the any better approach or solution can work in this heavy environment for detection small particles like stones. Any ideas to solve in better way?
Here is how you can use the Canny edge detector to detect the rocks in your images:
import cv2
import numpy as np
def process(img):
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(img_gray, 103, 255, cv2.THRESH_BINARY)
img_blur = cv2.GaussianBlur(thresh, (23, 23), 0)
img_canny = cv2.Canny(img_blur, 65, 0)
img_dilate = cv2.dilate(img_canny, None, iterations=2)
return cv2.erode(img_dilate, None, iterations=2)
imgs = [cv2.imread("image1.jpg"), cv2.imread("image2.jpg")]
for i, img in enumerate(imgs):
contours = cv2.findContours(process(img), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[0]
cv2.drawContours(img, contours, -1, (0, 255, 0), 1)
cv2.imshow(str(i), img)
cv2.waitKey(0)
cv2.destroyAllWindows()
Output for sample images 1 and 2:
You can also tweak the parameters using OpenCV trackbars using the code below:
import cv2
import numpy as np
from random import randint, sample
def process(img, c_t1, c_t2):
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(img_gray, 103, 255, cv2.THRESH_BINARY)
img_blur = cv2.GaussianBlur(thresh, (23, 23), 0)
img_canny = cv2.Canny(img_blur, c_t1, c_t2)
img_dilate = cv2.dilate(img_canny, None, iterations=2)
return cv2.erode(img_dilate, None, iterations=2)
def show(imgs, win="Image", scale=1):
imgs = [cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) if len(img.shape) == 2 else img for img in imgs]
img_concat = np.concatenate(imgs, 1)
h, w = img_concat.shape[:2]
cv2.imshow(win, cv2.resize(img_concat, (int(w * scale), int(h * scale))))
d = {"Canny Threshold 1": (65, 500),
"Canny Threshold 2": (0, 500)}
imgs = [cv2.imread("image1.jpg"), cv2.imread("image2.jpg")]
cv2.namedWindow("Track Bars")
for i in d:
cv2.createTrackbar(i, "Track Bars", *d[i], id)
while True:
c_t1, c_t2 = (cv2.getTrackbarPos(i, "Track Bars") for i in d)
for i, img in enumerate(imgs):
img_copy = img.copy()
processed = process(img, c_t1, c_t2)
contours = cv2.findContours(processed, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)[0]
cv2.drawContours(img_copy, contours, -1, (0, 255, 0), 1)
show([img_copy, processed], str(i))
if cv2.waitKey(1) & 0xFF == ord("q"):
break
cv2.destroyAllWindows()
Output:
(Click image to expand)

How can i remove background noise from a handwritten text image?

I tried these approaches but didn't get any real changes. actually, I am trying to build a handwritten OCR using Google cloud vision API. please suggest to me what can I do for preprocessing steps.
1.
image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 15)
kernel = np.ones((5, 5), np.uint8)
image = cv2.dilate(image, kernel, iterations = 1)
kernel = np.ones((5, 5), np.uint8)
image = cv2.erode(image, kernel, iterations = 1)
Another way is HSV color filter. Because you are using blue pen, so we can choice the color that we want. Sample code:
import cv2
import numpy as np
image = cv2.imread('9rS31.jpg')
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
lower_green = np.array([100, 43, 20])
upper_green = np.array([130, 255, 255])
mask = cv2.inRange(hsv, lower_green, upper_green)
res = cv2.bitwise_and(image, image, mask=mask)
gray = cv2.cvtColor(res,cv2.COLOR_BGR2GRAY)
ret, generator = cv2.threshold(gray, 1,255,cv2.THRESH_BINARY)
cv2.imwrite("img.jpg",generator)
Generated image:
The noise is including horizontal line in your text book. So one method is using
cv2.getStructuringElement
You can find more information on the internet. Sample code:
import cv2
# Load image
image = cv2.imread('9rS31.jpg')
img=image.copy()
# Remove border
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50,1))
temp2 = 255 - cv2.morphologyEx(image, cv2.MORPH_CLOSE, horizontal_kernel)
result = cv2.add(temp2, image)
# Convert to grayscale and Otsu's threshold
gray = cv2.cvtColor(result, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray,(5,5),0)
_,thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
cv2.imwrite('img.jpg',thresh)
cv2.imshow('img', thresh)
cv2.waitKey()
Generated image:

OpenCV Python Contour Approximation

I want to detect a rectangle shape in the digital meter, to detect the shape contour approximation, but not able to find the exact contour of rectangle .I don't know where is the mistake .please have a look and suggest
digitalMeter.jpg
required-Output-digitalMeter-contour
import imutils
import cv2
image = cv2.imread('C:\\digitalMeter.jpg')
image = imutils.resize(image, height=500)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(blurred, 50, 200, 255)
cnts = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
displayCnt = None
for c in (cnts):
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
if len(approx) == 4:
print(displayCnt)[enter image description here][2]
displayCnt = approx
break
cv2.drawContours(image, [displayCnt], -1, (0, 230, 255), 6)
cv2.imshow('cnts', image)
cv2.waitKey(0)
Here is one way to do that in Python/OpenCV.
Read the input
Convert to gray
Threshold
Apply morphology to clean up the threshold image
Invert so that the meter is white on a black background
Find the contours and extract the largest (actually only) contour
Draw the contour on the input image
Save the results
Input:
import cv2
import numpy as np
# read image
img = cv2.imread('digital_meter.jpg')
hh, ww = img.shape[:2]
# convert to grayscale
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# threshold
thresh = cv2.threshold(gray,30,255,cv2.THRESH_BINARY)[1]
# apply close and open morphology
kernel = np.ones((3,3), np.uint8)
mask = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
kernel = np.ones((11,11), np.uint8)
mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
# invert
mask = 255 - mask
# get largest contour
contours = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
big_contour = max(contours, key=cv2.contourArea)
# draw green contour on input
contour_img = img.copy()
cv2.drawContours(contour_img,[big_contour],0,(0,255,0),2)
# save cropped image
cv2.imwrite('digital_meter_thresh.png',thresh)
cv2.imwrite('digital_meter_mask.png',mask)
cv2.imwrite('digital_meter_contour.png',contour_img)
# show the images
cv2.imshow("THRESH", thresh)
cv2.imshow("MASK", mask)
cv2.imshow("CONTOUR", contour_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
Threshold Image:
Morphology cleaned and inverted image:
Resulting contour on input:

How to do digit segmentation for hand-written account numbers & sort codes in OPENCV?

Is there a simple way that I can do digit segmentation from files like in the screenshot below?
I want to use OpenCV to do it as it's the library I'm using to do the rest of processing but other suggestions are welcomed.
Paper form:
Text box:
Simple OpenCV Contours method will not work here because digits are present in some sort of box template hence you need to detect boxes first check out this blog
Take this piece of code and adapt for your problem. It's not difficult with your situation:
import cv2
import numpy as np
# import image
image = cv2.imread('C:\\Users\\PC\\Desktop\\roi.png')
# grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
cv2.imshow('gray', gray)
# binary
ret, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
cv2.imshow('threshold', thresh)
# dilation
kernel = np.ones((10, 1), np.uint8)
img_dilation = cv2.dilate(thresh, kernel, iterations=1)
cv2.imshow('dilated', img_dilation)
# find contours
# cv2.findCountours() function changed from OpenCV3 to OpenCV4: now it have only two parameters instead of 3
cv2MajorVersion = cv2.__version__.split(".")[0]
# check for contours on thresh
if int(cv2MajorVersion) >= 4:
ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
else:
im2, ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# sort contours
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
for i, ctr in enumerate(sorted_ctrs):
# Get bounding box
x, y, w, h = cv2.boundingRect(ctr)
# Getting ROI
roi = image[y:y + h, x:x + w]
# show ROI
# cv2.imshow('segment no:'+str(i),roi)
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
if w > 15 and h > 15:
cv2.imwrite('C:\\Users\\PC\\Desktop\\output\\{}.png'.format(i), roi)
cv2.imshow('marked areas', image)
cv2.waitKey(0)
Source: https://lucians.dev/extract-roi-from-image-with-python-and-opencv

Resources