Cropping images in opencv - opencv

I have an image that has some text in it. I want to send the image to OCR but the image has some white noise in it so the OCR results aren't that great. I've tried to erode/dilate the image but couldn't get the perfect threshold to work. Since all the text in the images will be perfectly horizontal I tried the Hough Transform.
Here is what the image looks like when I run the sample hough transform program bundled with OpenCV.
Question
How can I black out everything except where the red lines are?
OR How can I crop out a separate images for each of the areas highlighted by the red lines?
I would only like to concentrate on lines that are horizontal, I can discard the diagonal lines.
Either option will work for me when sending to OCR. However, I'd like to try both to see which fetches best results.

howto/s with output
How can I black out everything except where the red lines are?
dotess2()
['Footel text goes he: e\n', 'Some mole hele\n', 'Some Text Here\n']
OR How can I crop out a separate images for each of the areas highlighted by the red lines?
dotess1()
['Foolel text goes he: e\n', 'Some mole hele\n', 'Some Text Here\n', 'Directions\n']
code
# -*- coding: utf-8 -*-
import cv2
import numpy as np
import math
import subprocess
import os
import operator
#some clean up/init blah blah
junk='\/,-‘’“ ”?.\';!{§_~!##$%^&*()_+-|:}»£[]¢€¥°><'
tmpdir='./tmp'
if not os.path.exists(tmpdir):
os.makedirs(tmpdir)
for path, subdirs, files in os.walk(tmpdir):
for name in files:
os.remove(os.path.join(path, name))
#when the preprocessor is not pefect, there will be junk in the result. this is a crude mean of ridding them off
def resfilter(res):
rd = dict()
for l in set(res):
rd[l]=0.
for l in rd:
for i in l:
if i in junk:
rd[l]-=1
elif i.isdigit():
rd[l]+=.5
else:
rd[l]+=1
ret=[]
for v in sorted(rd.iteritems(), key=operator.itemgetter(1), reverse=True):
ret.append(v[0])
return ret
def dotess1():
res =[]
for path, subdirs, files in os.walk(tmpdir):
for name in files:
fpath = os.path.join(path, name)
img = cv2.imread(fpath)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
'''
#if the text is too small/contains noise etc, resize and maintain aspect ratio
if gray.shape[1]<100:
gray=cv2.resize(gray,(int(100/gray.shape[0]*gray.shape[1]),100))
'''
cv2.imwrite('tmp.jpg',gray)
args = ['tesseract.exe','tmp.jpg','tessres','-psm','7', '-l','eng']
subprocess.call(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
with open('tessres.txt') as f:
for line in f:
if line.strip() != '':
res.append(line)
print resfilter(res)
def dotess2():
res =[]
args = ['tesseract.exe','clean.jpg','tessres','-psm','3', '-l','eng']
subprocess.call(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
with open('tessres.txt') as f:
for line in f:
if line.strip() != '':
res.append(line)
print resfilter(res)
'''
start of code
'''
img = cv2.imread('c:/data/ocr3.png')
gray=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
canny=cv2.Canny(gray,50,200,3)
cv2.imshow('canny',canny)
#remove the actual horizontal lines so that hough wont detect them
linek = np.zeros((11,11),dtype=np.uint8)
linek[5,...]=1
x=cv2.morphologyEx(canny, cv2.MORPH_OPEN, linek ,iterations=1)
canny-=x
cv2.imshow('canny no horizontal lines',canny)
#draw a fat line so that you can box it up
lines = cv2.HoughLinesP(canny, 1, math.pi/2, 50,50, 50, 20)
linemask = np.zeros(gray.shape,gray.dtype)
for line in lines[0]:
if line[1]==line[3]:#check horizontal
pt1 = (line[0],line[1])
pt2 = (line[2],line[3])
cv2.line(linemask, pt1, pt2, (255), 30)
cv2.imshow('linemask',linemask)
'''
* two methods of doing ocr,line mode and page mode
* boxmask is used to so that a clean image can be saved for page mode
* for every detected boxes, the roi are cropped and saved so that tess3 can be run in line mode
'''
boxmask = np.zeros(gray.shape,gray.dtype)
contours,hierarchy = cv2.findContours(linemask,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
idx=0
for cnt in contours:
idx+=1
area = cv2.contourArea(cnt)
x,y,w,h = cv2.boundingRect(cnt)
roi=img[y:y+h,x:x+w].copy()
cv2.imwrite('%s/%s.jpg'%(tmpdir,str(idx)),roi)
cv2.rectangle(boxmask,(x,y),(x+w,y+h),(255),-1)
cv2.imshow('clean',img&cv2.cvtColor(boxmask,cv2.COLOR_GRAY2BGR))
cv2.imwrite('clean.jpg',img&cv2.cvtColor(boxmask,cv2.COLOR_GRAY2BGR))
cv2.imshow('img',img)
dotess1()
dotess2()
cv2.waitKey(0)

Related

unable to get text from the image

I'm learning AI/ML and trying to get text from this sample form.
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Users\Pranav\AppData\Local\Programs\Tesseract-OCR\tesseract.exe'
image = cv2.imread('image2.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
x,y,w,h = 393, 531, 837, 80
firstROI = blur[y:y+h,x:x+w]
firstname = pytesseract.image_to_string(firstROI, lang='eng', config='--psm 6')
print(firstname)
firstname = re.sub(r'[^\w]', '', firstname)
cv2.imshow('image', firstROI)
cv2.waitKey()
cv2.destroyAllWindows()
Using the above code, I can able to get text the normal printed text in the white background but unable to get the text from the grey background boxes. For example, first name box real value is "Andrew" but I m getting as "oe" only.
firstROI shows like this:
As per Freddy's comments, I go through this link and updated the following code but still no output.
from tesserocr import PyTessBaseAPI, PSM, OEM
api = PyTessBaseAPI(psm=PSM.AUTO_OSD, lang='eng', path=r'C:\Users\Pranav\tessdata-master')
images = ['andrew1.png', 'andrew2.png', 'test1.png']
for img in images:
api.SetImageFile(img)
print (api.GetUTF8Text())
print (api.AllWordConfidences())
these are the sample images
It can read the text output from the third image only(Demographics). Please help me how to read the text from gray background images(Andrew).
This link provides me the answer. Its removing the noise in the background image.

i wanted to detect objects in a hsv image. but i keep getting an error,,Expected Ptr<cv::UMat> for argument '%s'

i was trying to create a trackbar window and get hsv value of the image by adjusting the trackbar. created a mask and then adjusted the trackbar to detect an object of the hsv image
enter code here
def nothing(x):
pass
cv.namedWindow("Tracking")
cv.createTrackbar("LH","Tracking",0,255,nothing)
cv.createTrackbar("LS","Tracking",0,255,nothing)
cv.createTrackbar("LV","Tracking",0,255,nothing)
cv.createTrackbar("UH","Tracking",255,255,nothing)
cv.createTrackbar("US","Tracking",255,255,nothing)
cv.createTrackbar("UV","Tracking",255,255,nothing)
while True:
frame = cv.imread("C:/Users/acer/Desktop/insects/New folder/ins.jpg")
hsv = cv.cvtColor(frame,cv.COLOR_BGR2HSV)
l_h = cv.getTrackbarPos("LH","Tracking")
l_s = cv.getTrackbarPos("LS","Tracking")
l_v = cv.getTrackbarPos("LV","Tracking")
u_h = cv.getTrackbarPos("UH","Tracking")
u_s = cv.getTrackbarPos("US","Tracking")
u_v = cv.getTrackbarPos("UV","Tracking")
l_b = np.array([l_h,l_s,l_v])
u_b = np.array([u_h,u_s,u_v])
mask = (hsv,l_b,u_b)
res = cv.bitwise_and(frame,frame,mask=mask)
cv.imshow("frame",frame)
cv.imshow("mask",mask)
cv.imshow("res",res)
key = cv.waitKey(1)
if key == 27:
break
cv.destroyAllWindows()
There are a few issues with your code:
1) You have no import statements. You need at least:
import cv2 as cv
import numpy as np
2) Your indentation is incorrect. Your function nothing() should not be indented.
3) You omitted to call inRange(), you need:
mask = cv.inRange(hsv,l_b,u_b)
4) You have scaled the Hue into the range 0..255 when it actually has the range 0..180 when used with uint8 images so that 360 degrees comes out as 180 degrees which is less than the 255 upper limit of uint8.
By the way, it is fairly poor practice to do "loop invariant" stuff inside a loop - I mean the part where you hit the disk every millisecond and re-read the image, re-decode the JPEG and convert it to HSV. All that can be done outside the loop, then inside it, just do a quick memory copy of the HSV image.

error: (-215:Assertion failed) totalSampleCount > 0 in function 'GMM::endLearning'

Im trying to use the opencv to remove the background of my pictures.
When Im running a single file. It works out.
The code as below:
def bgremove(name,count):
import cv2
import numpy as np
# cv2.namedWindow('image',cv2.WINDOW_NORMAL)
#Load the Image
imgo = cv2.imread(name)# the place to input picture path
height,width = imgo.shape[:2]
#Create a mask holder
mask = np.zeros(imgo.shape[:2],np.uint8)
#Grab Cut the object
bgdModel = np.zeros((1,65),np.float64)
fgdModel = np.zeros((1,65),np.float64)
#Hard Coding the Rect… The object must lie within this rect.
rect = (10,10,width-30,height-30)
cv2.grabCut(imgo,mask,rect,bgdModel,fgdModel,5,cv2.GC_INIT_WITH_RECT)
mask = np.where((mask==2)|(mask==0),0,1).astype('uint8')
img1 = imgo*mask[:,:,np.newaxis]
#Get the background
background = imgo-img1
#Change all pixels in the background that are not black to white
background[np.where((background > [0,0,0]).all(axis = 2))] = [255,255,255]
#Add the background and the image
final = background + img1
DP1=count
#To be done – Smoothening the edges….
cv2.imwrite("A%s.JPG"%DP1, final)
However, when I use the function in a for loop. it pops-up:
error: (-215:Assertion failed) totalSampleCount > 0 in function
'GMM::endLearning'
when Im generating a group of pictures
I encountered this problem and the issue was that the rectangle rect was too small. I don't know the dimensions of your image but try a bigger rectangle and it may solve this.

Why isn't Python OpenCV HoughP Transform able to identify all the spaced lines?

When we have spaced lines on 1px. HoughP transform of python opencv doesn't mark all the points.
I used:
cv2.HoughLinesP(img,1,np.pi/180,400)
Theoretically it should be working fine be it dashed or non dashed. In this case it doesn't mark all the lines if they are on the same height.
HoughP Transfrom Sample Output
The Green Lines indicate the white lines that were identified.
I changed the parameters to this:
cv2.HoughLinesP(img,1,np.pi/180,10,10,10)
And got this output, as you can see the detection is still missing some parts. Its unclear how, for a straight line, a shorter line is marked but not a longer line.
*** After the method suggested!
After method suggested by Robert
Input Image: Input Image
Here is the code:
import numpy as np
import cv2
import time
img=cv2.imread("in.PNG")
img2=np.abs(img)
img=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
ret,thresh1 = cv2.threshold(img,127,255,cv2.THRESH_BINARY)
lines = cv2.HoughLinesP(img,rho = 1,theta = 1*np.pi/180,threshold =
10,minLineLength = 10,maxLineGap = 10)
N = lines.shape[0]
print lines
for i in range(N):
x1 = lines[i][0][0]
y1 = lines[i][0][1]
x2 = lines[i][0][2]
y2 = lines[i][0][3]
cv2.line(img2,(x1,y1),(x2,y2),(0,255,0),1)
#cv2.imshow("Window",thresh1)
cv2.imwrite("out.PNG",img2)

How to import and use scipy.spatial.distance functions correctly?

from scipy.spatial.distance import seuclidean #imports abridged
import scipy
img = np.asarray(Image.open("testtwo.tif").convert('L'))
img = 1 * (img < 127)
area = (img == 0).sum() # computing white pixel area
print area
areasplit = np.split(img, 24) # splitting image array
print areasplit
for i in areasplit:
result = (i == 0).sum()
print result #computing white pixel area for every single array
minimal = result.min()
maximal = result.max()
dist = seuclidian(minimal, maximal)
print dist
I want to compute distances between array elements, produced from splitting an image. Python can`t recognize the name of a distance functions (I have tried several of them and variuos approaches to importing modules). How to import and call these functions correctly? Thank you
You haven't stated what the error is, but you are using numpy as well and I can't see an import for that
Try
import numpy as np
import scipy
Then try
dist = scipy.spatial.distance.euclidian(minimal, maximal)
dists = scipy.spatial.distance.seuclidian(minimal, maximal, variances)
Note - the standardised euclidean distance takes a third parameter.

Resources